diff options
author | Sylvain Henry <sylvain@haskus.fr> | 2019-08-13 17:26:32 +0200 |
---|---|---|
committer | Sylvain Henry <sylvain@haskus.fr> | 2019-09-10 00:04:50 +0200 |
commit | 447864a94a1679b5b079e08bb7208a0005381cef (patch) | |
tree | baa469c52620ce7ae02def3e5e6a6f109cc89f40 /compiler/codeGen | |
parent | 270fbe8512f04b6107755fa22bdec62205c0a567 (diff) | |
download | haskell-447864a94a1679b5b079e08bb7208a0005381cef.tar.gz |
Module hierarchy: StgToCmm (#13009)
Add StgToCmm module hierarchy. Platform modules that are used in several
other places (NCG, LLVM codegen, Cmm transformations) are put into
GHC.Platform.
Diffstat (limited to 'compiler/codeGen')
27 files changed, 0 insertions, 11330 deletions
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs deleted file mode 100644 index 0ff9bd8b56..0000000000 --- a/compiler/codeGen/CgUtils.hs +++ /dev/null @@ -1,186 +0,0 @@ -{-# LANGUAGE GADTs #-} - ------------------------------------------------------------------------------ --- --- Code generator utilities; mostly monadic --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ - -module CgUtils ( - fixStgRegisters, - baseRegOffset, - get_Regtable_addr_from_offset, - regTableOffset, - get_GlobalReg_addr, - ) where - -import GhcPrelude - -import CodeGen.Platform -import Cmm -import Hoopl.Block -import Hoopl.Graph -import CmmUtils -import CLabel -import DynFlags -import Outputable - --- ----------------------------------------------------------------------------- --- Information about global registers - -baseRegOffset :: DynFlags -> GlobalReg -> Int - -baseRegOffset dflags (VanillaReg 1 _) = oFFSET_StgRegTable_rR1 dflags -baseRegOffset dflags (VanillaReg 2 _) = oFFSET_StgRegTable_rR2 dflags -baseRegOffset dflags (VanillaReg 3 _) = oFFSET_StgRegTable_rR3 dflags -baseRegOffset dflags (VanillaReg 4 _) = oFFSET_StgRegTable_rR4 dflags -baseRegOffset dflags (VanillaReg 5 _) = oFFSET_StgRegTable_rR5 dflags -baseRegOffset dflags (VanillaReg 6 _) = oFFSET_StgRegTable_rR6 dflags -baseRegOffset dflags (VanillaReg 7 _) = oFFSET_StgRegTable_rR7 dflags -baseRegOffset dflags (VanillaReg 8 _) = oFFSET_StgRegTable_rR8 dflags -baseRegOffset dflags (VanillaReg 9 _) = oFFSET_StgRegTable_rR9 dflags -baseRegOffset dflags (VanillaReg 10 _) = oFFSET_StgRegTable_rR10 dflags -baseRegOffset _ (VanillaReg n _) = panic ("Registers above R10 are not supported (tried to use R" ++ show n ++ ")") -baseRegOffset dflags (FloatReg 1) = oFFSET_StgRegTable_rF1 dflags -baseRegOffset dflags (FloatReg 2) = oFFSET_StgRegTable_rF2 dflags -baseRegOffset dflags (FloatReg 3) = oFFSET_StgRegTable_rF3 dflags -baseRegOffset dflags (FloatReg 4) = oFFSET_StgRegTable_rF4 dflags -baseRegOffset dflags (FloatReg 5) = oFFSET_StgRegTable_rF5 dflags -baseRegOffset dflags (FloatReg 6) = oFFSET_StgRegTable_rF6 dflags -baseRegOffset _ (FloatReg n) = panic ("Registers above F6 are not supported (tried to use F" ++ show n ++ ")") -baseRegOffset dflags (DoubleReg 1) = oFFSET_StgRegTable_rD1 dflags -baseRegOffset dflags (DoubleReg 2) = oFFSET_StgRegTable_rD2 dflags -baseRegOffset dflags (DoubleReg 3) = oFFSET_StgRegTable_rD3 dflags -baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags -baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags -baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags -baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")") -baseRegOffset dflags (XmmReg 1) = oFFSET_StgRegTable_rXMM1 dflags -baseRegOffset dflags (XmmReg 2) = oFFSET_StgRegTable_rXMM2 dflags -baseRegOffset dflags (XmmReg 3) = oFFSET_StgRegTable_rXMM3 dflags -baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags -baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags -baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags -baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")") -baseRegOffset dflags (YmmReg 1) = oFFSET_StgRegTable_rYMM1 dflags -baseRegOffset dflags (YmmReg 2) = oFFSET_StgRegTable_rYMM2 dflags -baseRegOffset dflags (YmmReg 3) = oFFSET_StgRegTable_rYMM3 dflags -baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags -baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags -baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags -baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")") -baseRegOffset dflags (ZmmReg 1) = oFFSET_StgRegTable_rZMM1 dflags -baseRegOffset dflags (ZmmReg 2) = oFFSET_StgRegTable_rZMM2 dflags -baseRegOffset dflags (ZmmReg 3) = oFFSET_StgRegTable_rZMM3 dflags -baseRegOffset dflags (ZmmReg 4) = oFFSET_StgRegTable_rZMM4 dflags -baseRegOffset dflags (ZmmReg 5) = oFFSET_StgRegTable_rZMM5 dflags -baseRegOffset dflags (ZmmReg 6) = oFFSET_StgRegTable_rZMM6 dflags -baseRegOffset _ (ZmmReg n) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")") -baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags -baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags -baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags -baseRegOffset _ (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")") -baseRegOffset dflags Hp = oFFSET_StgRegTable_rHp dflags -baseRegOffset dflags HpLim = oFFSET_StgRegTable_rHpLim dflags -baseRegOffset dflags CCCS = oFFSET_StgRegTable_rCCCS dflags -baseRegOffset dflags CurrentTSO = oFFSET_StgRegTable_rCurrentTSO dflags -baseRegOffset dflags CurrentNursery = oFFSET_StgRegTable_rCurrentNursery dflags -baseRegOffset dflags HpAlloc = oFFSET_StgRegTable_rHpAlloc dflags -baseRegOffset dflags EagerBlackholeInfo = oFFSET_stgEagerBlackholeInfo dflags -baseRegOffset dflags GCEnter1 = oFFSET_stgGCEnter1 dflags -baseRegOffset dflags GCFun = oFFSET_stgGCFun dflags -baseRegOffset _ BaseReg = panic "CgUtils.baseRegOffset:BaseReg" -baseRegOffset _ PicBaseReg = panic "CgUtils.baseRegOffset:PicBaseReg" -baseRegOffset _ MachSp = panic "CgUtils.baseRegOffset:MachSp" -baseRegOffset _ UnwindReturnReg = panic "CgUtils.baseRegOffset:UnwindReturnReg" - - --- ----------------------------------------------------------------------------- --- --- STG/Cmm GlobalReg --- --- ----------------------------------------------------------------------------- - --- | We map STG registers onto appropriate CmmExprs. Either they map --- to real machine registers or stored as offsets from BaseReg. Given --- a GlobalReg, get_GlobalReg_addr always produces the --- register table address for it. -get_GlobalReg_addr :: DynFlags -> GlobalReg -> CmmExpr -get_GlobalReg_addr dflags BaseReg = regTableOffset dflags 0 -get_GlobalReg_addr dflags mid - = get_Regtable_addr_from_offset dflags (baseRegOffset dflags mid) - --- Calculate a literal representing an offset into the register table. --- Used when we don't have an actual BaseReg to offset from. -regTableOffset :: DynFlags -> Int -> CmmExpr -regTableOffset dflags n = - CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r dflags + n)) - -get_Regtable_addr_from_offset :: DynFlags -> Int -> CmmExpr -get_Regtable_addr_from_offset dflags offset = - if haveRegBase (targetPlatform dflags) - then CmmRegOff baseReg offset - else regTableOffset dflags offset - --- | Fixup global registers so that they assign to locations within the --- RegTable if they aren't pinned for the current target. -fixStgRegisters :: DynFlags -> RawCmmDecl -> RawCmmDecl -fixStgRegisters _ top@(CmmData _ _) = top - -fixStgRegisters dflags (CmmProc info lbl live graph) = - let graph' = modifyGraph (mapGraphBlocks (fixStgRegBlock dflags)) graph - in CmmProc info lbl live graph' - -fixStgRegBlock :: DynFlags -> Block CmmNode e x -> Block CmmNode e x -fixStgRegBlock dflags block = mapBlock (fixStgRegStmt dflags) block - -fixStgRegStmt :: DynFlags -> CmmNode e x -> CmmNode e x -fixStgRegStmt dflags stmt = fixAssign $ mapExpDeep fixExpr stmt - where - platform = targetPlatform dflags - - fixAssign stmt = - case stmt of - CmmAssign (CmmGlobal reg) src - -- MachSp isn't an STG register; it's merely here for tracking unwind - -- information - | reg == MachSp -> stmt - | otherwise -> - let baseAddr = get_GlobalReg_addr dflags reg - in case reg `elem` activeStgRegs (targetPlatform dflags) of - True -> CmmAssign (CmmGlobal reg) src - False -> CmmStore baseAddr src - other_stmt -> other_stmt - - fixExpr expr = case expr of - -- MachSp isn't an STG; it's merely here for tracking unwind information - CmmReg (CmmGlobal MachSp) -> expr - CmmReg (CmmGlobal reg) -> - -- Replace register leaves with appropriate StixTrees for - -- the given target. MagicIds which map to a reg on this - -- arch are left unchanged. For the rest, BaseReg is taken - -- to mean the address of the reg table in MainCapability, - -- and for all others we generate an indirection to its - -- location in the register table. - case reg `elem` activeStgRegs platform of - True -> expr - False -> - let baseAddr = get_GlobalReg_addr dflags reg - in case reg of - BaseReg -> baseAddr - _other -> CmmLoad baseAddr (globalRegType dflags reg) - - CmmRegOff (CmmGlobal reg) offset -> - -- RegOf leaves are just a shorthand form. If the reg maps - -- to a real reg, we keep the shorthand, otherwise, we just - -- expand it and defer to the above code. - case reg `elem` activeStgRegs platform of - True -> expr - False -> CmmMachOp (MO_Add (wordWidth dflags)) [ - fixExpr (CmmReg (CmmGlobal reg)), - CmmLit (CmmInt (fromIntegral offset) - (wordWidth dflags))] - - other_expr -> other_expr diff --git a/compiler/codeGen/CodeGen/Platform.hs b/compiler/codeGen/CodeGen/Platform.hs deleted file mode 100644 index bc216758a0..0000000000 --- a/compiler/codeGen/CodeGen/Platform.hs +++ /dev/null @@ -1,107 +0,0 @@ - -module CodeGen.Platform - (callerSaves, activeStgRegs, haveRegBase, globalRegMaybe, freeReg) - where - -import GhcPrelude - -import CmmExpr -import GHC.Platform -import Reg - -import qualified CodeGen.Platform.ARM as ARM -import qualified CodeGen.Platform.ARM64 as ARM64 -import qualified CodeGen.Platform.PPC as PPC -import qualified CodeGen.Platform.SPARC as SPARC -import qualified CodeGen.Platform.X86 as X86 -import qualified CodeGen.Platform.X86_64 as X86_64 -import qualified CodeGen.Platform.NoRegs as NoRegs - --- | Returns 'True' if this global register is stored in a caller-saves --- machine register. - -callerSaves :: Platform -> GlobalReg -> Bool -callerSaves platform - | platformUnregisterised platform = NoRegs.callerSaves - | otherwise - = case platformArch platform of - ArchX86 -> X86.callerSaves - ArchX86_64 -> X86_64.callerSaves - ArchSPARC -> SPARC.callerSaves - ArchARM {} -> ARM.callerSaves - ArchARM64 -> ARM64.callerSaves - arch - | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] -> - PPC.callerSaves - - | otherwise -> NoRegs.callerSaves - --- | Here is where the STG register map is defined for each target arch. --- The order matters (for the llvm backend anyway)! We must make sure to --- maintain the order here with the order used in the LLVM calling conventions. --- Note that also, this isn't all registers, just the ones that are currently --- possbily mapped to real registers. -activeStgRegs :: Platform -> [GlobalReg] -activeStgRegs platform - | platformUnregisterised platform = NoRegs.activeStgRegs - | otherwise - = case platformArch platform of - ArchX86 -> X86.activeStgRegs - ArchX86_64 -> X86_64.activeStgRegs - ArchSPARC -> SPARC.activeStgRegs - ArchARM {} -> ARM.activeStgRegs - ArchARM64 -> ARM64.activeStgRegs - arch - | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] -> - PPC.activeStgRegs - - | otherwise -> NoRegs.activeStgRegs - -haveRegBase :: Platform -> Bool -haveRegBase platform - | platformUnregisterised platform = NoRegs.haveRegBase - | otherwise - = case platformArch platform of - ArchX86 -> X86.haveRegBase - ArchX86_64 -> X86_64.haveRegBase - ArchSPARC -> SPARC.haveRegBase - ArchARM {} -> ARM.haveRegBase - ArchARM64 -> ARM64.haveRegBase - arch - | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] -> - PPC.haveRegBase - - | otherwise -> NoRegs.haveRegBase - -globalRegMaybe :: Platform -> GlobalReg -> Maybe RealReg -globalRegMaybe platform - | platformUnregisterised platform = NoRegs.globalRegMaybe - | otherwise - = case platformArch platform of - ArchX86 -> X86.globalRegMaybe - ArchX86_64 -> X86_64.globalRegMaybe - ArchSPARC -> SPARC.globalRegMaybe - ArchARM {} -> ARM.globalRegMaybe - ArchARM64 -> ARM64.globalRegMaybe - arch - | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] -> - PPC.globalRegMaybe - - | otherwise -> NoRegs.globalRegMaybe - -freeReg :: Platform -> RegNo -> Bool -freeReg platform - | platformUnregisterised platform = NoRegs.freeReg - | otherwise - = case platformArch platform of - ArchX86 -> X86.freeReg - ArchX86_64 -> X86_64.freeReg - ArchSPARC -> SPARC.freeReg - ArchARM {} -> ARM.freeReg - ArchARM64 -> ARM64.freeReg - arch - | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] -> - PPC.freeReg - - | otherwise -> NoRegs.freeReg - diff --git a/compiler/codeGen/CodeGen/Platform/ARM.hs b/compiler/codeGen/CodeGen/Platform/ARM.hs deleted file mode 100644 index a2cb476e04..0000000000 --- a/compiler/codeGen/CodeGen/Platform/ARM.hs +++ /dev/null @@ -1,10 +0,0 @@ -{-# LANGUAGE CPP #-} - -module CodeGen.Platform.ARM where - -import GhcPrelude - -#define MACHREGS_NO_REGS 0 -#define MACHREGS_arm 1 -#include "../../../../includes/CodeGen.Platform.hs" - diff --git a/compiler/codeGen/CodeGen/Platform/ARM64.hs b/compiler/codeGen/CodeGen/Platform/ARM64.hs deleted file mode 100644 index 6ace181356..0000000000 --- a/compiler/codeGen/CodeGen/Platform/ARM64.hs +++ /dev/null @@ -1,10 +0,0 @@ -{-# LANGUAGE CPP #-} - -module CodeGen.Platform.ARM64 where - -import GhcPrelude - -#define MACHREGS_NO_REGS 0 -#define MACHREGS_aarch64 1 -#include "../../../../includes/CodeGen.Platform.hs" - diff --git a/compiler/codeGen/CodeGen/Platform/NoRegs.hs b/compiler/codeGen/CodeGen/Platform/NoRegs.hs deleted file mode 100644 index 4c074ee313..0000000000 --- a/compiler/codeGen/CodeGen/Platform/NoRegs.hs +++ /dev/null @@ -1,9 +0,0 @@ -{-# LANGUAGE CPP #-} - -module CodeGen.Platform.NoRegs where - -import GhcPrelude - -#define MACHREGS_NO_REGS 1 -#include "../../../../includes/CodeGen.Platform.hs" - diff --git a/compiler/codeGen/CodeGen/Platform/PPC.hs b/compiler/codeGen/CodeGen/Platform/PPC.hs deleted file mode 100644 index f7eae6b4ca..0000000000 --- a/compiler/codeGen/CodeGen/Platform/PPC.hs +++ /dev/null @@ -1,10 +0,0 @@ -{-# LANGUAGE CPP #-} - -module CodeGen.Platform.PPC where - -import GhcPrelude - -#define MACHREGS_NO_REGS 0 -#define MACHREGS_powerpc 1 -#include "../../../../includes/CodeGen.Platform.hs" - diff --git a/compiler/codeGen/CodeGen/Platform/SPARC.hs b/compiler/codeGen/CodeGen/Platform/SPARC.hs deleted file mode 100644 index 5d8dbb1da9..0000000000 --- a/compiler/codeGen/CodeGen/Platform/SPARC.hs +++ /dev/null @@ -1,10 +0,0 @@ -{-# LANGUAGE CPP #-} - -module CodeGen.Platform.SPARC where - -import GhcPrelude - -#define MACHREGS_NO_REGS 0 -#define MACHREGS_sparc 1 -#include "../../../../includes/CodeGen.Platform.hs" - diff --git a/compiler/codeGen/CodeGen/Platform/X86.hs b/compiler/codeGen/CodeGen/Platform/X86.hs deleted file mode 100644 index 84d52c1585..0000000000 --- a/compiler/codeGen/CodeGen/Platform/X86.hs +++ /dev/null @@ -1,10 +0,0 @@ -{-# LANGUAGE CPP #-} - -module CodeGen.Platform.X86 where - -import GhcPrelude - -#define MACHREGS_NO_REGS 0 -#define MACHREGS_i386 1 -#include "../../../../includes/CodeGen.Platform.hs" - diff --git a/compiler/codeGen/CodeGen/Platform/X86_64.hs b/compiler/codeGen/CodeGen/Platform/X86_64.hs deleted file mode 100644 index 1b2b5549ac..0000000000 --- a/compiler/codeGen/CodeGen/Platform/X86_64.hs +++ /dev/null @@ -1,10 +0,0 @@ -{-# LANGUAGE CPP #-} - -module CodeGen.Platform.X86_64 where - -import GhcPrelude - -#define MACHREGS_NO_REGS 0 -#define MACHREGS_x86_64 1 -#include "../../../../includes/CodeGen.Platform.hs" - diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs deleted file mode 100644 index 83409b6b24..0000000000 --- a/compiler/codeGen/StgCmm.hs +++ /dev/null @@ -1,223 +0,0 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE DataKinds #-} - ------------------------------------------------------------------------------ --- --- Stg to C-- code generation --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ - -module StgCmm ( codeGen ) where - -#include "HsVersions.h" - -import GhcPrelude as Prelude - -import StgCmmProf (initCostCentres, ldvEnter) -import StgCmmMonad -import StgCmmEnv -import StgCmmBind -import StgCmmCon -import StgCmmLayout -import StgCmmUtils -import StgCmmClosure -import StgCmmHpc -import StgCmmTicky - -import Cmm -import CmmUtils -import CLabel - -import StgSyn -import DynFlags -import ErrUtils - -import HscTypes -import CostCentre -import Id -import IdInfo -import RepType -import DataCon -import TyCon -import Module -import Outputable -import Stream -import BasicTypes -import VarSet ( isEmptyDVarSet ) - -import OrdList -import MkGraph - -import Data.IORef -import Control.Monad (when,void) -import Util - -codeGen :: DynFlags - -> Module - -> [TyCon] - -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering. - -> [CgStgTopBinding] -- Bindings to convert - -> HpcInfo - -> Stream IO CmmGroup () -- Output as a stream, so codegen can - -- be interleaved with output - -codeGen dflags this_mod data_tycons - cost_centre_info stg_binds hpc_info - = do { -- cg: run the code generator, and yield the resulting CmmGroup - -- Using an IORef to store the state is a bit crude, but otherwise - -- we would need to add a state monad layer. - ; cgref <- liftIO $ newIORef =<< initC - ; let cg :: FCode () -> Stream IO CmmGroup () - cg fcode = do - cmm <- liftIO . withTiming (return dflags) (text "STG -> Cmm") (`seq` ()) $ do - st <- readIORef cgref - let (a,st') = runC dflags this_mod st (getCmm fcode) - - -- NB. stub-out cgs_tops and cgs_stmts. This fixes - -- a big space leak. DO NOT REMOVE! - writeIORef cgref $! st'{ cgs_tops = nilOL, - cgs_stmts = mkNop } - return a - yield cmm - - -- Note [codegen-split-init] the cmm_init block must come - -- FIRST. This is because when -split-objs is on we need to - -- combine this block with its initialisation routines; see - -- Note [pipeline-split-init]. - ; cg (mkModuleInit cost_centre_info this_mod hpc_info) - - ; mapM_ (cg . cgTopBinding dflags) stg_binds - - -- Put datatype_stuff after code_stuff, because the - -- datatype closure table (for enumeration types) to - -- (say) PrelBase_True_closure, which is defined in - -- code_stuff - ; let do_tycon tycon = do - -- Generate a table of static closures for an - -- enumeration type Note that the closure pointers are - -- tagged. - when (isEnumerationTyCon tycon) $ cg (cgEnumerationTyCon tycon) - mapM_ (cg . cgDataCon) (tyConDataCons tycon) - - ; mapM_ do_tycon data_tycons - } - ---------------------------------------------------------------- --- Top-level bindings ---------------------------------------------------------------- - -{- 'cgTopBinding' is only used for top-level bindings, since they need -to be allocated statically (not in the heap) and need to be labelled. -No unboxed bindings can happen at top level. - -In the code below, the static bindings are accumulated in the -@MkCgState@, and transferred into the ``statics'' slot by @forkStatics@. -This is so that we can write the top level processing in a compositional -style, with the increasing static environment being plumbed as a state -variable. -} - -cgTopBinding :: DynFlags -> CgStgTopBinding -> FCode () -cgTopBinding dflags (StgTopLifted (StgNonRec id rhs)) - = do { let (info, fcode) = cgTopRhs dflags NonRecursive id rhs - ; fcode - ; addBindC info - } - -cgTopBinding dflags (StgTopLifted (StgRec pairs)) - = do { let (bndrs, rhss) = unzip pairs - ; let pairs' = zip bndrs rhss - r = unzipWith (cgTopRhs dflags Recursive) pairs' - (infos, fcodes) = unzip r - ; addBindsC infos - ; sequence_ fcodes - } - -cgTopBinding dflags (StgTopStringLit id str) - = do { let label = mkBytesLabel (idName id) - ; let (lit, decl) = mkByteStringCLit label str - ; emitDecl decl - ; addBindC (litIdInfo dflags id mkLFStringLit lit) - } - -cgTopRhs :: DynFlags -> RecFlag -> Id -> CgStgRhs -> (CgIdInfo, FCode ()) - -- The Id is passed along for setting up a binding... - -cgTopRhs dflags _rec bndr (StgRhsCon _cc con args) - = cgTopRhsCon dflags bndr con (assertNonVoidStgArgs args) - -- con args are always non-void, - -- see Note [Post-unarisation invariants] in UnariseStg - -cgTopRhs dflags rec bndr (StgRhsClosure fvs cc upd_flag args body) - = ASSERT(isEmptyDVarSet fvs) -- There should be no free variables - cgTopRhsClosure dflags rec bndr cc upd_flag args body - - ---------------------------------------------------------------- --- Module initialisation code ---------------------------------------------------------------- - -mkModuleInit - :: CollectedCCs -- cost centre info - -> Module - -> HpcInfo - -> FCode () - -mkModuleInit cost_centre_info this_mod hpc_info - = do { initHpc this_mod hpc_info - ; initCostCentres cost_centre_info - } - - ---------------------------------------------------------------- --- Generating static stuff for algebraic data types ---------------------------------------------------------------- - - -cgEnumerationTyCon :: TyCon -> FCode () -cgEnumerationTyCon tycon - = do dflags <- getDynFlags - emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs) - [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs) - (tagForCon dflags con) - | con <- tyConDataCons tycon] - - -cgDataCon :: DataCon -> FCode () --- Generate the entry code, info tables, and (for niladic constructor) --- the static closure, for a constructor. -cgDataCon data_con - = do { dflags <- getDynFlags - ; let - (tot_wds, -- #ptr_wds + #nonptr_wds - ptr_wds) -- #ptr_wds - = mkVirtConstrSizes dflags arg_reps - - nonptr_wds = tot_wds - ptr_wds - - dyn_info_tbl = - mkDataConInfoTable dflags data_con False ptr_wds nonptr_wds - - -- We're generating info tables, so we don't know and care about - -- what the actual arguments are. Using () here as the place holder. - arg_reps :: [NonVoid PrimRep] - arg_reps = [ NonVoid rep_ty - | ty <- dataConRepArgTys data_con - , rep_ty <- typePrimRep ty - , not (isVoidRep rep_ty) ] - - ; emitClosureAndInfoTable dyn_info_tbl NativeDirectCall [] $ - -- NB: the closure pointer is assumed *untagged* on - -- entry to a constructor. If the pointer is tagged, - -- then we should not be entering it. This assumption - -- is used in ldvEnter and when tagging the pointer to - -- return it. - -- NB 2: We don't set CC when entering data (WDP 94/06) - do { tickyEnterDynCon - ; ldvEnter (CmmReg nodeReg) - ; tickyReturnOldCon (length arg_reps) - ; void $ emitReturn [cmmOffsetB dflags (CmmReg nodeReg) (tagForCon dflags data_con)] - } - -- The case continuation code expects a tagged pointer - } diff --git a/compiler/codeGen/StgCmmArgRep.hs b/compiler/codeGen/StgCmmArgRep.hs deleted file mode 100644 index ef40fce464..0000000000 --- a/compiler/codeGen/StgCmmArgRep.hs +++ /dev/null @@ -1,160 +0,0 @@ ------------------------------------------------------------------------------ --- --- Argument representations used in StgCmmLayout. --- --- (c) The University of Glasgow 2013 --- ------------------------------------------------------------------------------ - -module StgCmmArgRep ( - ArgRep(..), toArgRep, argRepSizeW, - - argRepString, isNonV, idArgRep, - - slowCallPattern, - - ) where - -import GhcPrelude - -import StgCmmClosure ( idPrimRep ) - -import SMRep ( WordOff ) -import Id ( Id ) -import TyCon ( PrimRep(..), primElemRepSizeB ) -import BasicTypes ( RepArity ) -import Constants ( wORD64_SIZE ) -import DynFlags - -import Outputable -import FastString - --- I extricated this code as this new module in order to avoid a --- cyclic dependency between StgCmmLayout and StgCmmTicky. --- --- NSF 18 Feb 2013 - -------------------------------------------------------------------------- --- Classifying arguments: ArgRep -------------------------------------------------------------------------- - --- ArgRep is re-exported by StgCmmLayout, but only for use in the --- byte-code generator which also needs to know about the --- classification of arguments. - -data ArgRep = P -- GC Ptr - | N -- Word-sized non-ptr - | L -- 64-bit non-ptr (long) - | V -- Void - | F -- Float - | D -- Double - | V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc. - | V32 -- 32-byte (256-bit) vectors of Float/Double/Int8/Word32/etc. - | V64 -- 64-byte (512-bit) vectors of Float/Double/Int8/Word32/etc. -instance Outputable ArgRep where ppr = text . argRepString - -argRepString :: ArgRep -> String -argRepString P = "P" -argRepString N = "N" -argRepString L = "L" -argRepString V = "V" -argRepString F = "F" -argRepString D = "D" -argRepString V16 = "V16" -argRepString V32 = "V32" -argRepString V64 = "V64" - -toArgRep :: PrimRep -> ArgRep -toArgRep VoidRep = V -toArgRep LiftedRep = P -toArgRep UnliftedRep = P -toArgRep IntRep = N -toArgRep WordRep = N -toArgRep Int8Rep = N -- Gets widened to native word width for calls -toArgRep Word8Rep = N -- Gets widened to native word width for calls -toArgRep Int16Rep = N -- Gets widened to native word width for calls -toArgRep Word16Rep = N -- Gets widened to native word width for calls -toArgRep Int32Rep = N -- Gets widened to native word width for calls -toArgRep Word32Rep = N -- Gets widened to native word width for calls -toArgRep AddrRep = N -toArgRep Int64Rep = L -toArgRep Word64Rep = L -toArgRep FloatRep = F -toArgRep DoubleRep = D -toArgRep (VecRep len elem) = case len*primElemRepSizeB elem of - 16 -> V16 - 32 -> V32 - 64 -> V64 - _ -> error "toArgRep: bad vector primrep" - -isNonV :: ArgRep -> Bool -isNonV V = False -isNonV _ = True - -argRepSizeW :: DynFlags -> ArgRep -> WordOff -- Size in words -argRepSizeW _ N = 1 -argRepSizeW _ P = 1 -argRepSizeW _ F = 1 -argRepSizeW dflags L = wORD64_SIZE `quot` wORD_SIZE dflags -argRepSizeW dflags D = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags -argRepSizeW _ V = 0 -argRepSizeW dflags V16 = 16 `quot` wORD_SIZE dflags -argRepSizeW dflags V32 = 32 `quot` wORD_SIZE dflags -argRepSizeW dflags V64 = 64 `quot` wORD_SIZE dflags - -idArgRep :: Id -> ArgRep -idArgRep = toArgRep . idPrimRep - --- This list of argument patterns should be kept in sync with at least --- the following: --- --- * StgCmmLayout.stdPattern maybe to some degree? --- --- * the RTS_RET(stg_ap_*) and RTS_FUN_DECL(stg_ap_*_fast) --- declarations in includes/stg/MiscClosures.h --- --- * the SLOW_CALL_*_ctr declarations in includes/stg/Ticky.h, --- --- * the TICK_SLOW_CALL_*() #defines in includes/Cmm.h, --- --- * the PR_CTR(SLOW_CALL_*_ctr) calls in rts/Ticky.c, --- --- * and the SymI_HasProto(stg_ap_*_{ret,info,fast}) calls and --- SymI_HasProto(SLOW_CALL_*_ctr) calls in rts/Linker.c --- --- There may be more places that I haven't found; I merely igrep'd for --- pppppp and excluded things that seemed ghci-specific. --- --- Also, it seems at the moment that ticky counters with void --- arguments will never be bumped, but I'm still declaring those --- counters, defensively. --- --- NSF 6 Mar 2013 - -slowCallPattern :: [ArgRep] -> (FastString, RepArity) --- Returns the generic apply function and arity --- --- The first batch of cases match (some) specialised entries --- The last group deals exhaustively with the cases for the first argument --- (and the zero-argument case) --- --- In 99% of cases this function will match *all* the arguments in one batch - -slowCallPattern (P: P: P: P: P: P: _) = (fsLit "stg_ap_pppppp", 6) -slowCallPattern (P: P: P: P: P: _) = (fsLit "stg_ap_ppppp", 5) -slowCallPattern (P: P: P: P: _) = (fsLit "stg_ap_pppp", 4) -slowCallPattern (P: P: P: V: _) = (fsLit "stg_ap_pppv", 4) -slowCallPattern (P: P: P: _) = (fsLit "stg_ap_ppp", 3) -slowCallPattern (P: P: V: _) = (fsLit "stg_ap_ppv", 3) -slowCallPattern (P: P: _) = (fsLit "stg_ap_pp", 2) -slowCallPattern (P: V: _) = (fsLit "stg_ap_pv", 2) -slowCallPattern (P: _) = (fsLit "stg_ap_p", 1) -slowCallPattern (V: _) = (fsLit "stg_ap_v", 1) -slowCallPattern (N: _) = (fsLit "stg_ap_n", 1) -slowCallPattern (F: _) = (fsLit "stg_ap_f", 1) -slowCallPattern (D: _) = (fsLit "stg_ap_d", 1) -slowCallPattern (L: _) = (fsLit "stg_ap_l", 1) -slowCallPattern (V16: _) = (fsLit "stg_ap_v16", 1) -slowCallPattern (V32: _) = (fsLit "stg_ap_v32", 1) -slowCallPattern (V64: _) = (fsLit "stg_ap_v64", 1) -slowCallPattern [] = (fsLit "stg_ap_0", 0) diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs deleted file mode 100644 index 7189800f6e..0000000000 --- a/compiler/codeGen/StgCmmBind.hs +++ /dev/null @@ -1,753 +0,0 @@ ------------------------------------------------------------------------------ --- --- Stg to C-- code generation: bindings --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ - -module StgCmmBind ( - cgTopRhsClosure, - cgBind, - emitBlackHoleCode, - pushUpdateFrame, emitUpdateFrame - ) where - -import GhcPrelude hiding ((<*>)) - -import StgCmmExpr -import StgCmmMonad -import StgCmmEnv -import StgCmmCon -import StgCmmHeap -import StgCmmProf (ldvEnterClosure, enterCostCentreFun, enterCostCentreThunk, - initUpdFrameProf) -import StgCmmTicky -import StgCmmLayout -import StgCmmUtils -import StgCmmClosure -import StgCmmForeign (emitPrimCall) - -import MkGraph -import CoreSyn ( AltCon(..), tickishIsCode ) -import BlockId -import SMRep -import Cmm -import CmmInfo -import CmmUtils -import CLabel -import StgSyn -import CostCentre -import Id -import IdInfo -import Name -import Module -import ListSetOps -import Util -import VarSet -import BasicTypes -import Outputable -import FastString -import DynFlags - -import Control.Monad - ------------------------------------------------------------------------- --- Top-level bindings ------------------------------------------------------------------------- - --- For closures bound at top level, allocate in static space. --- They should have no free variables. - -cgTopRhsClosure :: DynFlags - -> RecFlag -- member of a recursive group? - -> Id - -> CostCentreStack -- Optional cost centre annotation - -> UpdateFlag - -> [Id] -- Args - -> CgStgExpr - -> (CgIdInfo, FCode ()) - -cgTopRhsClosure dflags rec id ccs upd_flag args body = - let closure_label = mkLocalClosureLabel (idName id) (idCafInfo id) - cg_id_info = litIdInfo dflags id lf_info (CmmLabel closure_label) - lf_info = mkClosureLFInfo dflags id TopLevel [] upd_flag args - in (cg_id_info, gen_code dflags lf_info closure_label) - where - -- special case for a indirection (f = g). We create an IND_STATIC - -- closure pointing directly to the indirectee. This is exactly - -- what the CAF will eventually evaluate to anyway, we're just - -- shortcutting the whole process, and generating a lot less code - -- (#7308). Eventually the IND_STATIC closure will be eliminated - -- by assembly '.equiv' directives, where possible (#15155). - -- See note [emit-time elimination of static indirections] in CLabel. - -- - -- Note: we omit the optimisation when this binding is part of a - -- recursive group, because the optimisation would inhibit the black - -- hole detection from working in that case. Test - -- concurrent/should_run/4030 fails, for instance. - -- - gen_code dflags _ closure_label - | StgApp f [] <- body, null args, isNonRec rec - = do - cg_info <- getCgIdInfo f - let closure_rep = mkStaticClosureFields dflags - indStaticInfoTable ccs MayHaveCafRefs - [unLit (idInfoToAmode cg_info)] - emitDataLits closure_label closure_rep - return () - - gen_code dflags lf_info _closure_label - = do { let name = idName id - ; mod_name <- getModuleName - ; let descr = closureDescription dflags mod_name name - closure_info = mkClosureInfo dflags True id lf_info 0 0 descr - - -- We don't generate the static closure here, because we might - -- want to add references to static closures to it later. The - -- static closure is generated by CmmBuildInfoTables.updInfoSRTs, - -- See Note [SRTs], specifically the [FUN] optimisation. - - ; let fv_details :: [(NonVoid Id, ByteOff)] - header = if isLFThunk lf_info then ThunkHeader else StdHeader - (_, _, fv_details) = mkVirtHeapOffsets dflags header [] - -- Don't drop the non-void args until the closure info has been made - ; forkClosureBody (closureCodeBody True id closure_info ccs - (nonVoidIds args) (length args) body fv_details) - - ; return () } - - unLit (CmmLit l) = l - unLit _ = panic "unLit" - ------------------------------------------------------------------------- --- Non-top-level bindings ------------------------------------------------------------------------- - -cgBind :: CgStgBinding -> FCode () -cgBind (StgNonRec name rhs) - = do { (info, fcode) <- cgRhs name rhs - ; addBindC info - ; init <- fcode - ; emit init } - -- init cannot be used in body, so slightly better to sink it eagerly - -cgBind (StgRec pairs) - = do { r <- sequence $ unzipWith cgRhs pairs - ; let (id_infos, fcodes) = unzip r - ; addBindsC id_infos - ; (inits, body) <- getCodeR $ sequence fcodes - ; emit (catAGraphs inits <*> body) } - -{- Note [cgBind rec] - - Recursive let-bindings are tricky. - Consider the following pseudocode: - - let x = \_ -> ... y ... - y = \_ -> ... z ... - z = \_ -> ... x ... - in ... - - For each binding, we need to allocate a closure, and each closure must - capture the address of the other closures. - We want to generate the following C-- code: - // Initialization Code - x = hp - 24; // heap address of x's closure - y = hp - 40; // heap address of x's closure - z = hp - 64; // heap address of x's closure - // allocate and initialize x - m[hp-8] = ... - m[hp-16] = y // the closure for x captures y - m[hp-24] = x_info; - // allocate and initialize y - m[hp-32] = z; // the closure for y captures z - m[hp-40] = y_info; - // allocate and initialize z - ... - - For each closure, we must generate not only the code to allocate and - initialize the closure itself, but also some initialization Code that - sets a variable holding the closure pointer. - - We could generate a pair of the (init code, body code), but since - the bindings are recursive we also have to initialise the - environment with the CgIdInfo for all the bindings before compiling - anything. So we do this in 3 stages: - - 1. collect all the CgIdInfos and initialise the environment - 2. compile each binding into (init, body) code - 3. emit all the inits, and then all the bodies - - We'd rather not have separate functions to do steps 1 and 2 for - each binding, since in pratice they share a lot of code. So we - have just one function, cgRhs, that returns a pair of the CgIdInfo - for step 1, and a monadic computation to generate the code in step - 2. - - The alternative to separating things in this way is to use a - fixpoint. That's what we used to do, but it introduces a - maintenance nightmare because there is a subtle dependency on not - being too strict everywhere. Doing things this way means that the - FCode monad can be strict, for example. - -} - -cgRhs :: Id - -> CgStgRhs - -> FCode ( - CgIdInfo -- The info for this binding - , FCode CmmAGraph -- A computation which will generate the - -- code for the binding, and return an - -- assignent of the form "x = Hp - n" - -- (see above) - ) - -cgRhs id (StgRhsCon cc con args) - = withNewTickyCounterCon (idName id) $ - buildDynCon id True cc con (assertNonVoidStgArgs args) - -- con args are always non-void, - -- see Note [Post-unarisation invariants] in UnariseStg - -{- See Note [GC recovery] in compiler/codeGen/StgCmmClosure.hs -} -cgRhs id (StgRhsClosure fvs cc upd_flag args body) - = do dflags <- getDynFlags - mkRhsClosure dflags id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body - ------------------------------------------------------------------------- --- Non-constructor right hand sides ------------------------------------------------------------------------- - -mkRhsClosure :: DynFlags -> Id -> CostCentreStack - -> [NonVoid Id] -- Free vars - -> UpdateFlag - -> [Id] -- Args - -> CgStgExpr - -> FCode (CgIdInfo, FCode CmmAGraph) - -{- mkRhsClosure looks for two special forms of the right-hand side: - a) selector thunks - b) AP thunks - -If neither happens, it just calls mkClosureLFInfo. You might think -that mkClosureLFInfo should do all this, but it seems wrong for the -latter to look at the structure of an expression - -Note [Selectors] -~~~~~~~~~~~~~~~~ -We look at the body of the closure to see if it's a selector---turgid, -but nothing deep. We are looking for a closure of {\em exactly} the -form: - -... = [the_fv] \ u [] -> - case the_fv of - con a_1 ... a_n -> a_i - -Note [Ap thunks] -~~~~~~~~~~~~~~~~ -A more generic AP thunk of the form - - x = [ x_1...x_n ] \.. [] -> x_1 ... x_n - -A set of these is compiled statically into the RTS, so we just use -those. We could extend the idea to thunks where some of the x_i are -global ids (and hence not free variables), but this would entail -generating a larger thunk. It might be an option for non-optimising -compilation, though. - -We only generate an Ap thunk if all the free variables are pointers, -for semi-obvious reasons. - --} - ----------- Note [Selectors] ------------------ -mkRhsClosure dflags bndr _cc - [NonVoid the_fv] -- Just one free var - upd_flag -- Updatable thunk - [] -- A thunk - expr - | let strip = stripStgTicksTopE (not . tickishIsCode) - , StgCase (StgApp scrutinee [{-no args-}]) - _ -- ignore bndr - (AlgAlt _) - [(DataAlt _, params, sel_expr)] <- strip expr - , StgApp selectee [{-no args-}] <- strip sel_expr - , the_fv == scrutinee -- Scrutinee is the only free variable - - , let (_, _, params_w_offsets) = mkVirtConstrOffsets dflags (addIdReps (assertNonVoidIds params)) - -- pattern binders are always non-void, - -- see Note [Post-unarisation invariants] in UnariseStg - , Just the_offset <- assocMaybe params_w_offsets (NonVoid selectee) - - , let offset_into_int = bytesToWordsRoundUp dflags the_offset - - fixedHdrSizeW dflags - , offset_into_int <= mAX_SPEC_SELECTEE_SIZE dflags -- Offset is small enough - = -- NOT TRUE: ASSERT(is_single_constructor) - -- The simplifier may have statically determined that the single alternative - -- is the only possible case and eliminated the others, even if there are - -- other constructors in the datatype. It's still ok to make a selector - -- thunk in this case, because we *know* which constructor the scrutinee - -- will evaluate to. - -- - -- srt is discarded; it must be empty - let lf_info = mkSelectorLFInfo bndr offset_into_int (isUpdatable upd_flag) - in cgRhsStdThunk bndr lf_info [StgVarArg the_fv] - ----------- Note [Ap thunks] ------------------ -mkRhsClosure dflags bndr _cc - fvs - upd_flag - [] -- No args; a thunk - (StgApp fun_id args) - - -- We are looking for an "ApThunk"; see data con ApThunk in StgCmmClosure - -- of form (x1 x2 .... xn), where all the xi are locals (not top-level) - -- So the xi will all be free variables - | args `lengthIs` (n_fvs-1) -- This happens only if the fun_id and - -- args are all distinct local variables - -- The "-1" is for fun_id - -- Missed opportunity: (f x x) is not detected - , all (isGcPtrRep . idPrimRep . fromNonVoid) fvs - , isUpdatable upd_flag - , n_fvs <= mAX_SPEC_AP_SIZE dflags - , not (gopt Opt_SccProfilingOn dflags) - -- not when profiling: we don't want to - -- lose information about this particular - -- thunk (e.g. its type) (#949) - , idArity fun_id == unknownArity -- don't spoil a known call - - -- Ha! an Ap thunk - = cgRhsStdThunk bndr lf_info payload - - where - n_fvs = length fvs - lf_info = mkApLFInfo bndr upd_flag n_fvs - -- the payload has to be in the correct order, hence we can't - -- just use the fvs. - payload = StgVarArg fun_id : args - ----------- Default case ------------------ -mkRhsClosure dflags bndr cc fvs upd_flag args body - = do { let lf_info = mkClosureLFInfo dflags bndr NotTopLevel fvs upd_flag args - ; (id_info, reg) <- rhsIdInfo bndr lf_info - ; return (id_info, gen_code lf_info reg) } - where - gen_code lf_info reg - = do { -- LAY OUT THE OBJECT - -- If the binder is itself a free variable, then don't store - -- it in the closure. Instead, just bind it to Node on entry. - -- NB we can be sure that Node will point to it, because we - -- haven't told mkClosureLFInfo about this; so if the binder - -- _was_ a free var of its RHS, mkClosureLFInfo thinks it *is* - -- stored in the closure itself, so it will make sure that - -- Node points to it... - ; let reduced_fvs = filter (NonVoid bndr /=) fvs - - -- MAKE CLOSURE INFO FOR THIS CLOSURE - ; mod_name <- getModuleName - ; dflags <- getDynFlags - ; let name = idName bndr - descr = closureDescription dflags mod_name name - fv_details :: [(NonVoid Id, ByteOff)] - header = if isLFThunk lf_info then ThunkHeader else StdHeader - (tot_wds, ptr_wds, fv_details) - = mkVirtHeapOffsets dflags header (addIdReps reduced_fvs) - closure_info = mkClosureInfo dflags False -- Not static - bndr lf_info tot_wds ptr_wds - descr - - -- BUILD ITS INFO TABLE AND CODE - ; forkClosureBody $ - -- forkClosureBody: (a) ensure that bindings in here are not seen elsewhere - -- (b) ignore Sequel from context; use empty Sequel - -- And compile the body - closureCodeBody False bndr closure_info cc (nonVoidIds args) - (length args) body fv_details - - -- BUILD THE OBJECT --- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body - ; let use_cc = cccsExpr; blame_cc = cccsExpr - ; emit (mkComment $ mkFastString "calling allocDynClosure") - ; let toVarArg (NonVoid a, off) = (NonVoid (StgVarArg a), off) - ; let info_tbl = mkCmmInfo closure_info bndr currentCCS - ; hp_plus_n <- allocDynClosure (Just bndr) info_tbl lf_info use_cc blame_cc - (map toVarArg fv_details) - - -- RETURN - ; return (mkRhsInit dflags reg lf_info hp_plus_n) } - -------------------------- -cgRhsStdThunk - :: Id - -> LambdaFormInfo - -> [StgArg] -- payload - -> FCode (CgIdInfo, FCode CmmAGraph) - -cgRhsStdThunk bndr lf_info payload - = do { (id_info, reg) <- rhsIdInfo bndr lf_info - ; return (id_info, gen_code reg) - } - where - gen_code reg -- AHA! A STANDARD-FORM THUNK - = withNewTickyCounterStdThunk (lfUpdatable lf_info) (idName bndr) $ - do - { -- LAY OUT THE OBJECT - mod_name <- getModuleName - ; dflags <- getDynFlags - ; let header = if isLFThunk lf_info then ThunkHeader else StdHeader - (tot_wds, ptr_wds, payload_w_offsets) - = mkVirtHeapOffsets dflags header - (addArgReps (nonVoidStgArgs payload)) - - descr = closureDescription dflags mod_name (idName bndr) - closure_info = mkClosureInfo dflags False -- Not static - bndr lf_info tot_wds ptr_wds - descr - --- ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body - ; let use_cc = cccsExpr; blame_cc = cccsExpr - - - -- BUILD THE OBJECT - ; let info_tbl = mkCmmInfo closure_info bndr currentCCS - ; hp_plus_n <- allocDynClosure (Just bndr) info_tbl lf_info - use_cc blame_cc payload_w_offsets - - -- RETURN - ; return (mkRhsInit dflags reg lf_info hp_plus_n) } - - -mkClosureLFInfo :: DynFlags - -> Id -- The binder - -> TopLevelFlag -- True of top level - -> [NonVoid Id] -- Free vars - -> UpdateFlag -- Update flag - -> [Id] -- Args - -> LambdaFormInfo -mkClosureLFInfo dflags bndr top fvs upd_flag args - | null args = - mkLFThunk (idType bndr) top (map fromNonVoid fvs) upd_flag - | otherwise = - mkLFReEntrant top (map fromNonVoid fvs) args (mkArgDescr dflags args) - - ------------------------------------------------------------------------- --- The code for closures ------------------------------------------------------------------------- - -closureCodeBody :: Bool -- whether this is a top-level binding - -> Id -- the closure's name - -> ClosureInfo -- Lots of information about this closure - -> CostCentreStack -- Optional cost centre attached to closure - -> [NonVoid Id] -- incoming args to the closure - -> Int -- arity, including void args - -> CgStgExpr - -> [(NonVoid Id, ByteOff)] -- the closure's free vars - -> FCode () - -{- There are two main cases for the code for closures. - -* If there are *no arguments*, then the closure is a thunk, and not in - normal form. So it should set up an update frame (if it is - shared). NB: Thunks cannot have a primitive type! - -* If there is *at least one* argument, then this closure is in - normal form, so there is no need to set up an update frame. --} - -closureCodeBody top_lvl bndr cl_info cc _args arity body fv_details - | arity == 0 -- No args i.e. thunk - = withNewTickyCounterThunk - (isStaticClosure cl_info) - (closureUpdReqd cl_info) - (closureName cl_info) $ - emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $ - \(_, node, _) -> thunkCode cl_info fv_details cc node arity body - where - lf_info = closureLFInfo cl_info - info_tbl = mkCmmInfo cl_info bndr cc - -closureCodeBody top_lvl bndr cl_info cc args arity body fv_details - = -- Note: args may be [], if all args are Void - withNewTickyCounterFun - (closureSingleEntry cl_info) - (closureName cl_info) - args $ do { - - ; let - lf_info = closureLFInfo cl_info - info_tbl = mkCmmInfo cl_info bndr cc - - -- Emit the main entry code - ; emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args $ - \(_offset, node, arg_regs) -> do - -- Emit slow-entry code (for entering a closure through a PAP) - { mkSlowEntryCode bndr cl_info arg_regs - ; dflags <- getDynFlags - ; let node_points = nodeMustPointToIt dflags lf_info - node' = if node_points then Just node else Nothing - ; loop_header_id <- newBlockId - -- Extend reader monad with information that - -- self-recursive tail calls can be optimized into local - -- jumps. See Note [Self-recursive tail calls] in StgCmmExpr. - ; withSelfLoop (bndr, loop_header_id, arg_regs) $ do - { - -- Main payload - ; entryHeapCheck cl_info node' arity arg_regs $ do - { -- emit LDV code when profiling - when node_points (ldvEnterClosure cl_info (CmmLocal node)) - -- ticky after heap check to avoid double counting - ; tickyEnterFun cl_info - ; enterCostCentreFun cc - (CmmMachOp (mo_wordSub dflags) - [ CmmReg (CmmLocal node) -- See [NodeReg clobbered with loopification] - , mkIntExpr dflags (funTag dflags cl_info) ]) - ; fv_bindings <- mapM bind_fv fv_details - -- Load free vars out of closure *after* - -- heap check, to reduce live vars over check - ; when node_points $ load_fvs node lf_info fv_bindings - ; void $ cgExpr body - }}} - - } - --- Note [NodeReg clobbered with loopification] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- --- Previously we used to pass nodeReg (aka R1) here. With profiling, upon --- entering a closure, enterFunCCS was called with R1 passed to it. But since R1 --- may get clobbered inside the body of a closure, and since a self-recursive --- tail call does not restore R1, a subsequent call to enterFunCCS received a --- possibly bogus value from R1. The solution is to not pass nodeReg (aka R1) to --- enterFunCCS. Instead, we pass node, the callee-saved temporary that stores --- the original value of R1. This way R1 may get modified but loopification will --- not care. - --- A function closure pointer may be tagged, so we --- must take it into account when accessing the free variables. -bind_fv :: (NonVoid Id, ByteOff) -> FCode (LocalReg, ByteOff) -bind_fv (id, off) = do { reg <- rebindToReg id; return (reg, off) } - -load_fvs :: LocalReg -> LambdaFormInfo -> [(LocalReg, ByteOff)] -> FCode () -load_fvs node lf_info = mapM_ (\ (reg, off) -> - do dflags <- getDynFlags - let tag = lfDynTag dflags lf_info - emit $ mkTaggedObjectLoad dflags reg node off tag) - ------------------------------------------ --- The "slow entry" code for a function. This entry point takes its --- arguments on the stack. It loads the arguments into registers --- according to the calling convention, and jumps to the function's --- normal entry point. The function's closure is assumed to be in --- R1/node. --- --- The slow entry point is used for unknown calls: eg. stg_PAP_entry - -mkSlowEntryCode :: Id -> ClosureInfo -> [LocalReg] -> FCode () --- If this function doesn't have a specialised ArgDescr, we need --- to generate the function's arg bitmap and slow-entry code. --- Here, we emit the slow-entry code. -mkSlowEntryCode bndr cl_info arg_regs -- function closure is already in `Node' - | Just (_, ArgGen _) <- closureFunInfo cl_info - = do dflags <- getDynFlags - let node = idToReg dflags (NonVoid bndr) - slow_lbl = closureSlowEntryLabel cl_info - fast_lbl = closureLocalEntryLabel dflags cl_info - -- mkDirectJump does not clobber `Node' containing function closure - jump = mkJump dflags NativeNodeCall - (mkLblExpr fast_lbl) - (map (CmmReg . CmmLocal) (node : arg_regs)) - (initUpdFrameOff dflags) - tscope <- getTickScope - emitProcWithConvention Slow Nothing slow_lbl - (node : arg_regs) (jump, tscope) - | otherwise = return () - ------------------------------------------ -thunkCode :: ClosureInfo -> [(NonVoid Id, ByteOff)] -> CostCentreStack - -> LocalReg -> Int -> CgStgExpr -> FCode () -thunkCode cl_info fv_details _cc node arity body - = do { dflags <- getDynFlags - ; let node_points = nodeMustPointToIt dflags (closureLFInfo cl_info) - node' = if node_points then Just node else Nothing - ; ldvEnterClosure cl_info (CmmLocal node) -- NB: Node always points when profiling - - -- Heap overflow check - ; entryHeapCheck cl_info node' arity [] $ do - { -- Overwrite with black hole if necessary - -- but *after* the heap-overflow check - ; tickyEnterThunk cl_info - ; when (blackHoleOnEntry cl_info && node_points) - (blackHoleIt node) - - -- Push update frame - ; setupUpdate cl_info node $ - -- We only enter cc after setting up update so - -- that cc of enclosing scope will be recorded - -- in update frame CAF/DICT functions will be - -- subsumed by this enclosing cc - do { enterCostCentreThunk (CmmReg nodeReg) - ; let lf_info = closureLFInfo cl_info - ; fv_bindings <- mapM bind_fv fv_details - ; load_fvs node lf_info fv_bindings - ; void $ cgExpr body }}} - - ------------------------------------------------------------------------- --- Update and black-hole wrappers ------------------------------------------------------------------------- - -blackHoleIt :: LocalReg -> FCode () --- Only called for closures with no args --- Node points to the closure -blackHoleIt node_reg - = emitBlackHoleCode (CmmReg (CmmLocal node_reg)) - -emitBlackHoleCode :: CmmExpr -> FCode () -emitBlackHoleCode node = do - dflags <- getDynFlags - - -- Eager blackholing is normally disabled, but can be turned on with - -- -feager-blackholing. When it is on, we replace the info pointer - -- of the thunk with stg_EAGER_BLACKHOLE_info on entry. - - -- If we wanted to do eager blackholing with slop filling, we'd need - -- to do it at the *end* of a basic block, otherwise we overwrite - -- the free variables in the thunk that we still need. We have a - -- patch for this from Andy Cheadle, but not incorporated yet. --SDM - -- [6/2004] - -- - -- Previously, eager blackholing was enabled when ticky-ticky was - -- on. But it didn't work, and it wasn't strictly necessary to bring - -- back minimal ticky-ticky, so now EAGER_BLACKHOLING is - -- unconditionally disabled. -- krc 1/2007 - - -- Note the eager-blackholing check is here rather than in blackHoleOnEntry, - -- because emitBlackHoleCode is called from CmmParse. - - let eager_blackholing = not (gopt Opt_SccProfilingOn dflags) - && gopt Opt_EagerBlackHoling dflags - -- Profiling needs slop filling (to support LDV - -- profiling), so currently eager blackholing doesn't - -- work with profiling. - - when eager_blackholing $ do - emitStore (cmmOffsetW dflags node (fixedHdrSizeW dflags)) currentTSOExpr - -- See Note [Heap memory barriers] in SMP.h. - emitPrimCall [] MO_WriteBarrier [] - emitStore node (CmmReg (CmmGlobal EagerBlackholeInfo)) - -setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode () - -- Nota Bene: this function does not change Node (even if it's a CAF), - -- so that the cost centre in the original closure can still be - -- extracted by a subsequent enterCostCentre -setupUpdate closure_info node body - | not (lfUpdatable (closureLFInfo closure_info)) - = body - - | not (isStaticClosure closure_info) - = if not (closureUpdReqd closure_info) - then do tickyUpdateFrameOmitted; body - else do - tickyPushUpdateFrame - dflags <- getDynFlags - let - bh = blackHoleOnEntry closure_info && - not (gopt Opt_SccProfilingOn dflags) && - gopt Opt_EagerBlackHoling dflags - - lbl | bh = mkBHUpdInfoLabel - | otherwise = mkUpdInfoLabel - - pushUpdateFrame lbl (CmmReg (CmmLocal node)) body - - | otherwise -- A static closure - = do { tickyUpdateBhCaf closure_info - - ; if closureUpdReqd closure_info - then do -- Blackhole the (updatable) CAF: - { upd_closure <- link_caf node - ; pushUpdateFrame mkBHUpdInfoLabel upd_closure body } - else do {tickyUpdateFrameOmitted; body} - } - ------------------------------------------------------------------------------ --- Setting up update frames - --- Push the update frame on the stack in the Entry area, --- leaving room for the return address that is already --- at the old end of the area. --- -pushUpdateFrame :: CLabel -> CmmExpr -> FCode () -> FCode () -pushUpdateFrame lbl updatee body - = do - updfr <- getUpdFrameOff - dflags <- getDynFlags - let - hdr = fixedHdrSize dflags - frame = updfr + hdr + sIZEOF_StgUpdateFrame_NoHdr dflags - -- - emitUpdateFrame dflags (CmmStackSlot Old frame) lbl updatee - withUpdFrameOff frame body - -emitUpdateFrame :: DynFlags -> CmmExpr -> CLabel -> CmmExpr -> FCode () -emitUpdateFrame dflags frame lbl updatee = do - let - hdr = fixedHdrSize dflags - off_updatee = hdr + oFFSET_StgUpdateFrame_updatee dflags - -- - emitStore frame (mkLblExpr lbl) - emitStore (cmmOffset dflags frame off_updatee) updatee - initUpdFrameProf frame - ------------------------------------------------------------------------------ --- Entering a CAF --- --- See Note [CAF management] in rts/sm/Storage.c - -link_caf :: LocalReg -- pointer to the closure - -> FCode CmmExpr -- Returns amode for closure to be updated --- This function returns the address of the black hole, so it can be --- updated with the new value when available. -link_caf node = do - { dflags <- getDynFlags - -- Call the RTS function newCAF, returning the newly-allocated - -- blackhole indirection closure - ; let newCAF_lbl = mkForeignLabel (fsLit "newCAF") Nothing - ForeignLabelInExternalPackage IsFunction - ; bh <- newTemp (bWord dflags) - ; emitRtsCallGen [(bh,AddrHint)] newCAF_lbl - [ (baseExpr, AddrHint), - (CmmReg (CmmLocal node), AddrHint) ] - False - - -- see Note [atomic CAF entry] in rts/sm/Storage.c - ; updfr <- getUpdFrameOff - ; let target = entryCode dflags (closureInfoPtr dflags (CmmReg (CmmLocal node))) - ; emit =<< mkCmmIfThen - (cmmEqWord dflags (CmmReg (CmmLocal bh)) (zeroExpr dflags)) - -- re-enter the CAF - (mkJump dflags NativeNodeCall target [] updfr) - - ; return (CmmReg (CmmLocal bh)) } - ------------------------------------------------------------------------- --- Profiling ------------------------------------------------------------------------- - --- For "global" data constructors the description is simply occurrence --- name of the data constructor itself. Otherwise it is determined by --- @closureDescription@ from the let binding information. - -closureDescription :: DynFlags - -> Module -- Module - -> Name -- Id of closure binding - -> String - -- Not called for StgRhsCon which have global info tables built in - -- CgConTbls.hs with a description generated from the data constructor -closureDescription dflags mod_name name - = showSDocDump dflags (char '<' <> - (if isExternalName name - then ppr name -- ppr will include the module name prefix - else pprModule mod_name <> char '.' <> ppr name) <> - char '>') - -- showSDocDump, because we want to see the unique on the Name. diff --git a/compiler/codeGen/StgCmmBind.hs-boot b/compiler/codeGen/StgCmmBind.hs-boot deleted file mode 100644 index 8e3dd38ad8..0000000000 --- a/compiler/codeGen/StgCmmBind.hs-boot +++ /dev/null @@ -1,6 +0,0 @@ -module StgCmmBind where - -import StgCmmMonad( FCode ) -import StgSyn( CgStgBinding ) - -cgBind :: CgStgBinding -> FCode () diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs deleted file mode 100644 index ac8db1268f..0000000000 --- a/compiler/codeGen/StgCmmClosure.hs +++ /dev/null @@ -1,1008 +0,0 @@ -{-# LANGUAGE CPP, RecordWildCards #-} - ------------------------------------------------------------------------------ --- --- Stg to C-- code generation: --- --- The types LambdaFormInfo --- ClosureInfo --- --- Nothing monadic in here! --- ------------------------------------------------------------------------------ - -module StgCmmClosure ( - DynTag, tagForCon, isSmallFamily, - - idPrimRep, isVoidRep, isGcPtrRep, addIdReps, addArgReps, - argPrimRep, - - NonVoid(..), fromNonVoid, nonVoidIds, nonVoidStgArgs, - assertNonVoidIds, assertNonVoidStgArgs, - - -- * LambdaFormInfo - LambdaFormInfo, -- Abstract - StandardFormInfo, -- ...ditto... - mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo, - mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape, - mkLFStringLit, - lfDynTag, - isLFThunk, isLFReEntrant, lfUpdatable, - - -- * Used by other modules - CgLoc(..), SelfLoopInfo, CallMethod(..), - nodeMustPointToIt, isKnownFun, funTag, tagForArity, getCallMethod, - - -- * ClosureInfo - ClosureInfo, - mkClosureInfo, - mkCmmInfo, - - -- ** Inspection - closureLFInfo, closureName, - - -- ** Labels - -- These just need the info table label - closureInfoLabel, staticClosureLabel, - closureSlowEntryLabel, closureLocalEntryLabel, - - -- ** Predicates - -- These are really just functions on LambdaFormInfo - closureUpdReqd, closureSingleEntry, - closureReEntrant, closureFunInfo, - isToplevClosure, - - blackHoleOnEntry, -- Needs LambdaFormInfo and SMRep - isStaticClosure, -- Needs SMPre - - -- * InfoTables - mkDataConInfoTable, - cafBlackHoleInfoTable, - indStaticInfoTable, - staticClosureNeedsLink, - ) where - -#include "../includes/MachDeps.h" - -#include "HsVersions.h" - -import GhcPrelude - -import StgSyn -import SMRep -import Cmm -import PprCmmExpr() -- For Outputable instances - -import CostCentre -import BlockId -import CLabel -import Id -import IdInfo -import DataCon -import Name -import Type -import TyCoRep -import TcType -import TyCon -import RepType -import BasicTypes -import Outputable -import DynFlags -import Util - -import Data.Coerce (coerce) -import qualified Data.ByteString.Char8 as BS8 - ------------------------------------------------------------------------------ --- Data types and synonyms ------------------------------------------------------------------------------ - --- These data types are mostly used by other modules, especially StgCmmMonad, --- but we define them here because some functions in this module need to --- have access to them as well - -data CgLoc - = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning - -- Hp, so that it remains valid across calls - - | LneLoc BlockId [LocalReg] -- A join point - -- A join point (= let-no-escape) should only - -- be tail-called, and in a saturated way. - -- To tail-call it, assign to these locals, - -- and branch to the block id - -instance Outputable CgLoc where - ppr (CmmLoc e) = text "cmm" <+> ppr e - ppr (LneLoc b rs) = text "lne" <+> ppr b <+> ppr rs - -type SelfLoopInfo = (Id, BlockId, [LocalReg]) - --- used by ticky profiling -isKnownFun :: LambdaFormInfo -> Bool -isKnownFun LFReEntrant{} = True -isKnownFun LFLetNoEscape = True -isKnownFun _ = False - - -------------------------------------- --- Non-void types -------------------------------------- --- We frequently need the invariant that an Id or a an argument --- is of a non-void type. This type is a witness to the invariant. - -newtype NonVoid a = NonVoid a - deriving (Eq, Show) - -fromNonVoid :: NonVoid a -> a -fromNonVoid (NonVoid a) = a - -instance (Outputable a) => Outputable (NonVoid a) where - ppr (NonVoid a) = ppr a - -nonVoidIds :: [Id] -> [NonVoid Id] -nonVoidIds ids = [NonVoid id | id <- ids, not (isVoidTy (idType id))] - --- | Used in places where some invariant ensures that all these Ids are --- non-void; e.g. constructor field binders in case expressions. --- See Note [Post-unarisation invariants] in UnariseStg. -assertNonVoidIds :: [Id] -> [NonVoid Id] -assertNonVoidIds ids = ASSERT(not (any (isVoidTy . idType) ids)) - coerce ids - -nonVoidStgArgs :: [StgArg] -> [NonVoid StgArg] -nonVoidStgArgs args = [NonVoid arg | arg <- args, not (isVoidTy (stgArgType arg))] - --- | Used in places where some invariant ensures that all these arguments are --- non-void; e.g. constructor arguments. --- See Note [Post-unarisation invariants] in UnariseStg. -assertNonVoidStgArgs :: [StgArg] -> [NonVoid StgArg] -assertNonVoidStgArgs args = ASSERT(not (any (isVoidTy . stgArgType) args)) - coerce args - - ------------------------------------------------------------------------------ --- Representations ------------------------------------------------------------------------------ - --- Why are these here? - --- | Assumes that there is precisely one 'PrimRep' of the type. This assumption --- holds after unarise. --- See Note [Post-unarisation invariants] -idPrimRep :: Id -> PrimRep -idPrimRep id = typePrimRep1 (idType id) - -- See also Note [VoidRep] in RepType - --- | Assumes that Ids have one PrimRep, which holds after unarisation. --- See Note [Post-unarisation invariants] -addIdReps :: [NonVoid Id] -> [NonVoid (PrimRep, Id)] -addIdReps = map (\id -> let id' = fromNonVoid id - in NonVoid (idPrimRep id', id')) - --- | Assumes that arguments have one PrimRep, which holds after unarisation. --- See Note [Post-unarisation invariants] -addArgReps :: [NonVoid StgArg] -> [NonVoid (PrimRep, StgArg)] -addArgReps = map (\arg -> let arg' = fromNonVoid arg - in NonVoid (argPrimRep arg', arg')) - --- | Assumes that the argument has one PrimRep, which holds after unarisation. --- See Note [Post-unarisation invariants] -argPrimRep :: StgArg -> PrimRep -argPrimRep arg = typePrimRep1 (stgArgType arg) - - ------------------------------------------------------------------------------ --- LambdaFormInfo ------------------------------------------------------------------------------ - --- Information about an identifier, from the code generator's point of --- view. Every identifier is bound to a LambdaFormInfo in the --- environment, which gives the code generator enough info to be able to --- tail call or return that identifier. - -data LambdaFormInfo - = LFReEntrant -- Reentrant closure (a function) - TopLevelFlag -- True if top level - OneShotInfo - !RepArity -- Arity. Invariant: always > 0 - !Bool -- True <=> no fvs - ArgDescr -- Argument descriptor (should really be in ClosureInfo) - - | LFThunk -- Thunk (zero arity) - TopLevelFlag - !Bool -- True <=> no free vars - !Bool -- True <=> updatable (i.e., *not* single-entry) - StandardFormInfo - !Bool -- True <=> *might* be a function type - - | LFCon -- A saturated constructor application - DataCon -- The constructor - - | LFUnknown -- Used for function arguments and imported things. - -- We know nothing about this closure. - -- Treat like updatable "LFThunk"... - -- Imported things which we *do* know something about use - -- one of the other LF constructors (eg LFReEntrant for - -- known functions) - !Bool -- True <=> *might* be a function type - -- The False case is good when we want to enter it, - -- because then we know the entry code will do - -- For a function, the entry code is the fast entry point - - | LFUnlifted -- A value of unboxed type; - -- always a value, needs evaluation - - | LFLetNoEscape -- See LetNoEscape module for precise description - - -------------------------- --- StandardFormInfo tells whether this thunk has one of --- a small number of standard forms - -data StandardFormInfo - = NonStandardThunk - -- The usual case: not of the standard forms - - | SelectorThunk - -- A SelectorThunk is of form - -- case x of - -- con a1,..,an -> ak - -- and the constructor is from a single-constr type. - WordOff -- 0-origin offset of ak within the "goods" of - -- constructor (Recall that the a1,...,an may be laid - -- out in the heap in a non-obvious order.) - - | ApThunk - -- An ApThunk is of form - -- x1 ... xn - -- The code for the thunk just pushes x2..xn on the stack and enters x1. - -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled - -- in the RTS to save space. - RepArity -- Arity, n - - ------------------------------------------------------- --- Building LambdaFormInfo ------------------------------------------------------- - -mkLFArgument :: Id -> LambdaFormInfo -mkLFArgument id - | isUnliftedType ty = LFUnlifted - | might_be_a_function ty = LFUnknown True - | otherwise = LFUnknown False - where - ty = idType id - -------------- -mkLFLetNoEscape :: LambdaFormInfo -mkLFLetNoEscape = LFLetNoEscape - -------------- -mkLFReEntrant :: TopLevelFlag -- True of top level - -> [Id] -- Free vars - -> [Id] -- Args - -> ArgDescr -- Argument descriptor - -> LambdaFormInfo - -mkLFReEntrant _ _ [] _ - = pprPanic "mkLFReEntrant" empty -mkLFReEntrant top fvs args arg_descr - = LFReEntrant top os_info (length args) (null fvs) arg_descr - where os_info = idOneShotInfo (head args) - -------------- -mkLFThunk :: Type -> TopLevelFlag -> [Id] -> UpdateFlag -> LambdaFormInfo -mkLFThunk thunk_ty top fvs upd_flag - = ASSERT( not (isUpdatable upd_flag) || not (isUnliftedType thunk_ty) ) - LFThunk top (null fvs) - (isUpdatable upd_flag) - NonStandardThunk - (might_be_a_function thunk_ty) - --------------- -might_be_a_function :: Type -> Bool --- Return False only if we are *sure* it's a data type --- Look through newtypes etc as much as poss -might_be_a_function ty - | [LiftedRep] <- typePrimRep ty - , Just tc <- tyConAppTyCon_maybe (unwrapType ty) - , isDataTyCon tc - = False - | otherwise - = True - -------------- -mkConLFInfo :: DataCon -> LambdaFormInfo -mkConLFInfo con = LFCon con - -------------- -mkSelectorLFInfo :: Id -> Int -> Bool -> LambdaFormInfo -mkSelectorLFInfo id offset updatable - = LFThunk NotTopLevel False updatable (SelectorThunk offset) - (might_be_a_function (idType id)) - -------------- -mkApLFInfo :: Id -> UpdateFlag -> Arity -> LambdaFormInfo -mkApLFInfo id upd_flag arity - = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity) - (might_be_a_function (idType id)) - -------------- -mkLFImported :: Id -> LambdaFormInfo -mkLFImported id - | Just con <- isDataConWorkId_maybe id - , isNullaryRepDataCon con - = LFCon con -- An imported nullary constructor - -- We assume that the constructor is evaluated so that - -- the id really does point directly to the constructor - - | arity > 0 - = LFReEntrant TopLevel noOneShotInfo arity True (panic "arg_descr") - - | otherwise - = mkLFArgument id -- Not sure of exact arity - where - arity = idFunRepArity id - -------------- -mkLFStringLit :: LambdaFormInfo -mkLFStringLit = LFUnlifted - ------------------------------------------------------ --- Dynamic pointer tagging ------------------------------------------------------ - -type DynTag = Int -- The tag on a *pointer* - -- (from the dynamic-tagging paper) - --- Note [Data constructor dynamic tags] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- --- The family size of a data type (the number of constructors --- or the arity of a function) can be either: --- * small, if the family size < 2**tag_bits --- * big, otherwise. --- --- Small families can have the constructor tag in the tag bits. --- Big families only use the tag value 1 to represent evaluatedness. --- We don't have very many tag bits: for example, we have 2 bits on --- x86-32 and 3 bits on x86-64. - -isSmallFamily :: DynFlags -> Int -> Bool -isSmallFamily dflags fam_size = fam_size <= mAX_PTR_TAG dflags - -tagForCon :: DynFlags -> DataCon -> DynTag -tagForCon dflags con - | isSmallFamily dflags fam_size = con_tag - | otherwise = 1 - where - con_tag = dataConTag con -- NB: 1-indexed - fam_size = tyConFamilySize (dataConTyCon con) - -tagForArity :: DynFlags -> RepArity -> DynTag -tagForArity dflags arity - | isSmallFamily dflags arity = arity - | otherwise = 0 - -lfDynTag :: DynFlags -> LambdaFormInfo -> DynTag --- Return the tag in the low order bits of a variable bound --- to this LambdaForm -lfDynTag dflags (LFCon con) = tagForCon dflags con -lfDynTag dflags (LFReEntrant _ _ arity _ _) = tagForArity dflags arity -lfDynTag _ _other = 0 - - ------------------------------------------------------------------------------ --- Observing LambdaFormInfo ------------------------------------------------------------------------------ - ------------- -isLFThunk :: LambdaFormInfo -> Bool -isLFThunk (LFThunk {}) = True -isLFThunk _ = False - -isLFReEntrant :: LambdaFormInfo -> Bool -isLFReEntrant (LFReEntrant {}) = True -isLFReEntrant _ = False - ------------------------------------------------------------------------------ --- Choosing SM reps ------------------------------------------------------------------------------ - -lfClosureType :: LambdaFormInfo -> ClosureTypeInfo -lfClosureType (LFReEntrant _ _ arity _ argd) = Fun arity argd -lfClosureType (LFCon con) = Constr (dataConTagZ con) - (dataConIdentity con) -lfClosureType (LFThunk _ _ _ is_sel _) = thunkClosureType is_sel -lfClosureType _ = panic "lfClosureType" - -thunkClosureType :: StandardFormInfo -> ClosureTypeInfo -thunkClosureType (SelectorThunk off) = ThunkSelector off -thunkClosureType _ = Thunk - --- We *do* get non-updatable top-level thunks sometimes. eg. f = g --- gets compiled to a jump to g (if g has non-zero arity), instead of --- messing around with update frames and PAPs. We set the closure type --- to FUN_STATIC in this case. - ------------------------------------------------------------------------------ --- nodeMustPointToIt ------------------------------------------------------------------------------ - -nodeMustPointToIt :: DynFlags -> LambdaFormInfo -> Bool --- If nodeMustPointToIt is true, then the entry convention for --- this closure has R1 (the "Node" register) pointing to the --- closure itself --- the "self" argument - -nodeMustPointToIt _ (LFReEntrant top _ _ no_fvs _) - = not no_fvs -- Certainly if it has fvs we need to point to it - || isNotTopLevel top -- See Note [GC recovery] - -- For lex_profiling we also access the cost centre for a - -- non-inherited (i.e. non-top-level) function. - -- The isNotTopLevel test above ensures this is ok. - -nodeMustPointToIt dflags (LFThunk top no_fvs updatable NonStandardThunk _) - = not no_fvs -- Self parameter - || isNotTopLevel top -- Note [GC recovery] - || updatable -- Need to push update frame - || gopt Opt_SccProfilingOn dflags - -- For the non-updatable (single-entry case): - -- - -- True if has fvs (in which case we need access to them, and we - -- should black-hole it) - -- or profiling (in which case we need to recover the cost centre - -- from inside it) ToDo: do we need this even for - -- top-level thunks? If not, - -- isNotTopLevel subsumes this - -nodeMustPointToIt _ (LFThunk {}) -- Node must point to a standard-form thunk - = True - -nodeMustPointToIt _ (LFCon _) = True - - -- Strictly speaking, the above two don't need Node to point - -- to it if the arity = 0. But this is a *really* unlikely - -- situation. If we know it's nil (say) and we are entering - -- it. Eg: let x = [] in x then we will certainly have inlined - -- x, since nil is a simple atom. So we gain little by not - -- having Node point to known zero-arity things. On the other - -- hand, we do lose something; Patrick's code for figuring out - -- when something has been updated but not entered relies on - -- having Node point to the result of an update. SLPJ - -- 27/11/92. - -nodeMustPointToIt _ (LFUnknown _) = True -nodeMustPointToIt _ LFUnlifted = False -nodeMustPointToIt _ LFLetNoEscape = False - -{- Note [GC recovery] -~~~~~~~~~~~~~~~~~~~~~ -If we a have a local let-binding (function or thunk) - let f = <body> in ... -AND <body> allocates, then the heap-overflow check needs to know how -to re-start the evaluation. It uses the "self" pointer to do this. -So even if there are no free variables in <body>, we still make -nodeMustPointToIt be True for non-top-level bindings. - -Why do any such bindings exist? After all, let-floating should have -floated them out. Well, a clever optimiser might leave one there to -avoid a space leak, deliberately recomputing a thunk. Also (and this -really does happen occasionally) let-floating may make a function f smaller -so it can be inlined, so now (f True) may generate a local no-fv closure. -This actually happened during bootstrapping GHC itself, with f=mkRdrFunBind -in TcGenDeriv.) -} - ------------------------------------------------------------------------------ --- getCallMethod ------------------------------------------------------------------------------ - -{- The entry conventions depend on the type of closure being entered, -whether or not it has free variables, and whether we're running -sequentially or in parallel. - -Closure Node Argument Enter -Characteristics Par Req'd Passing Via ---------------------------------------------------------------------------- -Unknown & no & yes & stack & node -Known fun (>1 arg), no fvs & no & no & registers & fast entry (enough args) - & slow entry (otherwise) -Known fun (>1 arg), fvs & no & yes & registers & fast entry (enough args) -0 arg, no fvs \r,\s & no & no & n/a & direct entry -0 arg, no fvs \u & no & yes & n/a & node -0 arg, fvs \r,\s,selector & no & yes & n/a & node -0 arg, fvs \r,\s & no & yes & n/a & direct entry -0 arg, fvs \u & no & yes & n/a & node -Unknown & yes & yes & stack & node -Known fun (>1 arg), no fvs & yes & no & registers & fast entry (enough args) - & slow entry (otherwise) -Known fun (>1 arg), fvs & yes & yes & registers & node -0 arg, fvs \r,\s,selector & yes & yes & n/a & node -0 arg, no fvs \r,\s & yes & no & n/a & direct entry -0 arg, no fvs \u & yes & yes & n/a & node -0 arg, fvs \r,\s & yes & yes & n/a & node -0 arg, fvs \u & yes & yes & n/a & node - -When black-holing, single-entry closures could also be entered via node -(rather than directly) to catch double-entry. -} - -data CallMethod - = EnterIt -- No args, not a function - - | JumpToIt BlockId [LocalReg] -- A join point or a header of a local loop - - | ReturnIt -- It's a value (function, unboxed value, - -- or constructor), so just return it. - - | SlowCall -- Unknown fun, or known fun with - -- too few args. - - | DirectEntry -- Jump directly, with args in regs - CLabel -- The code label - RepArity -- Its arity - -getCallMethod :: DynFlags - -> Name -- Function being applied - -> Id -- Function Id used to chech if it can refer to - -- CAF's and whether the function is tail-calling - -- itself - -> LambdaFormInfo -- Its info - -> RepArity -- Number of available arguments - -> RepArity -- Number of them being void arguments - -> CgLoc -- Passed in from cgIdApp so that we can - -- handle let-no-escape bindings and self-recursive - -- tail calls using the same data constructor, - -- JumpToIt. This saves us one case branch in - -- cgIdApp - -> Maybe SelfLoopInfo -- can we perform a self-recursive tail call? - -> CallMethod - -getCallMethod dflags _ id _ n_args v_args _cg_loc - (Just (self_loop_id, block_id, args)) - | gopt Opt_Loopification dflags - , id == self_loop_id - , args `lengthIs` (n_args - v_args) - -- If these patterns match then we know that: - -- * loopification optimisation is turned on - -- * function is performing a self-recursive call in a tail position - -- * number of non-void parameters of the function matches functions arity. - -- See Note [Self-recursive tail calls] and Note [Void arguments in - -- self-recursive tail calls] in StgCmmExpr for more details - = JumpToIt block_id args - -getCallMethod dflags name id (LFReEntrant _ _ arity _ _) n_args _v_args _cg_loc - _self_loop_info - | n_args == 0 -- No args at all - && not (gopt Opt_SccProfilingOn dflags) - -- See Note [Evaluating functions with profiling] in rts/Apply.cmm - = ASSERT( arity /= 0 ) ReturnIt - | n_args < arity = SlowCall -- Not enough args - | otherwise = DirectEntry (enterIdLabel dflags name (idCafInfo id)) arity - -getCallMethod _ _name _ LFUnlifted n_args _v_args _cg_loc _self_loop_info - = ASSERT( n_args == 0 ) ReturnIt - -getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc _self_loop_info - = ASSERT( n_args == 0 ) ReturnIt - -- n_args=0 because it'd be ill-typed to apply a saturated - -- constructor application to anything - -getCallMethod dflags name id (LFThunk _ _ updatable std_form_info is_fun) - n_args _v_args _cg_loc _self_loop_info - | is_fun -- it *might* be a function, so we must "call" it (which is always safe) - = SlowCall -- We cannot just enter it [in eval/apply, the entry code - -- is the fast-entry code] - - -- Since is_fun is False, we are *definitely* looking at a data value - | updatable || gopt Opt_Ticky dflags -- to catch double entry - {- OLD: || opt_SMP - I decided to remove this, because in SMP mode it doesn't matter - if we enter the same thunk multiple times, so the optimisation - of jumping directly to the entry code is still valid. --SDM - -} - = EnterIt - - -- even a non-updatable selector thunk can be updated by the garbage - -- collector, so we must enter it. (#8817) - | SelectorThunk{} <- std_form_info - = EnterIt - - -- We used to have ASSERT( n_args == 0 ), but actually it is - -- possible for the optimiser to generate - -- let bot :: Int = error Int "urk" - -- in (bot `cast` unsafeCoerce Int (Int -> Int)) 3 - -- This happens as a result of the case-of-error transformation - -- So the right thing to do is just to enter the thing - - | otherwise -- Jump direct to code for single-entry thunks - = ASSERT( n_args == 0 ) - DirectEntry (thunkEntryLabel dflags name (idCafInfo id) std_form_info - updatable) 0 - -getCallMethod _ _name _ (LFUnknown True) _n_arg _v_args _cg_locs _self_loop_info - = SlowCall -- might be a function - -getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info - = ASSERT2( n_args == 0, ppr name <+> ppr n_args ) - EnterIt -- Not a function - -getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs) - _self_loop_info - = JumpToIt blk_id lne_regs - -getCallMethod _ _ _ _ _ _ _ _ = panic "Unknown call method" - ------------------------------------------------------------------------------ --- Data types for closure information ------------------------------------------------------------------------------ - - -{- ClosureInfo: information about a binding - - We make a ClosureInfo for each let binding (both top level and not), - but not bindings for data constructors: for those we build a CmmInfoTable - directly (see mkDataConInfoTable). - - To a first approximation: - ClosureInfo = (LambdaFormInfo, CmmInfoTable) - - A ClosureInfo has enough information - a) to construct the info table itself, and build other things - related to the binding (e.g. slow entry points for a function) - b) to allocate a closure containing that info pointer (i.e. - it knows the info table label) --} - -data ClosureInfo - = ClosureInfo { - closureName :: !Name, -- The thing bound to this closure - -- we don't really need this field: it's only used in generating - -- code for ticky and profiling, and we could pass the information - -- around separately, but it doesn't do much harm to keep it here. - - closureLFInfo :: !LambdaFormInfo, -- NOTE: not an LFCon - -- this tells us about what the closure contains: it's right-hand-side. - - -- the rest is just an unpacked CmmInfoTable. - closureInfoLabel :: !CLabel, - closureSMRep :: !SMRep, -- representation used by storage mgr - closureProf :: !ProfilingInfo - } - --- | Convert from 'ClosureInfo' to 'CmmInfoTable'. -mkCmmInfo :: ClosureInfo -> Id -> CostCentreStack -> CmmInfoTable -mkCmmInfo ClosureInfo {..} id ccs - = CmmInfoTable { cit_lbl = closureInfoLabel - , cit_rep = closureSMRep - , cit_prof = closureProf - , cit_srt = Nothing - , cit_clo = if isStaticRep closureSMRep - then Just (id,ccs) - else Nothing } - --------------------------------------- --- Building ClosureInfos --------------------------------------- - -mkClosureInfo :: DynFlags - -> Bool -- Is static - -> Id - -> LambdaFormInfo - -> Int -> Int -- Total and pointer words - -> String -- String descriptor - -> ClosureInfo -mkClosureInfo dflags is_static id lf_info tot_wds ptr_wds val_descr - = ClosureInfo { closureName = name - , closureLFInfo = lf_info - , closureInfoLabel = info_lbl -- These three fields are - , closureSMRep = sm_rep -- (almost) an info table - , closureProf = prof } -- (we don't have an SRT yet) - where - name = idName id - sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType lf_info) - prof = mkProfilingInfo dflags id val_descr - nonptr_wds = tot_wds - ptr_wds - - info_lbl = mkClosureInfoTableLabel id lf_info - --------------------------------------- --- Other functions over ClosureInfo --------------------------------------- - --- Eager blackholing is normally disabled, but can be turned on with --- -feager-blackholing. When it is on, we replace the info pointer of --- the thunk with stg_EAGER_BLACKHOLE_info on entry. - --- If we wanted to do eager blackholing with slop filling, --- we'd need to do it at the *end* of a basic block, otherwise --- we overwrite the free variables in the thunk that we still --- need. We have a patch for this from Andy Cheadle, but not --- incorporated yet. --SDM [6/2004] --- --- Previously, eager blackholing was enabled when ticky-ticky --- was on. But it didn't work, and it wasn't strictly necessary --- to bring back minimal ticky-ticky, so now EAGER_BLACKHOLING --- is unconditionally disabled. -- krc 1/2007 --- --- Static closures are never themselves black-holed. - -blackHoleOnEntry :: ClosureInfo -> Bool -blackHoleOnEntry cl_info - | isStaticRep (closureSMRep cl_info) - = False -- Never black-hole a static closure - - | otherwise - = case closureLFInfo cl_info of - LFReEntrant {} -> False - LFLetNoEscape -> False - LFThunk _ _no_fvs upd _ _ -> upd -- See Note [Black-holing non-updatable thunks] - _other -> panic "blackHoleOnEntry" - -{- Note [Black-holing non-updatable thunks] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We must not black-hole non-updatable (single-entry) thunks otherwise -we run into issues like #10414. Specifically: - - * There is no reason to black-hole a non-updatable thunk: it should - not be competed for by multiple threads - - * It could, conceivably, cause a space leak if we don't black-hole - it, if there was a live but never-followed pointer pointing to it. - Let's hope that doesn't happen. - - * It is dangerous to black-hole a non-updatable thunk because - - is not updated (of course) - - hence, if it is black-holed and another thread tries to evaluate - it, that thread will block forever - This actually happened in #10414. So we do not black-hole - non-updatable thunks. - - * How could two threads evaluate the same non-updatable (single-entry) - thunk? See Reid Barton's example below. - - * Only eager blackholing could possibly black-hole a non-updatable - thunk, because lazy black-holing only affects thunks with an - update frame on the stack. - -Here is and example due to Reid Barton (#10414): - x = \u [] concat [[1], []] -with the following definitions, - - concat x = case x of - [] -> [] - (:) x xs -> (++) x (concat xs) - - (++) xs ys = case xs of - [] -> ys - (:) x rest -> (:) x ((++) rest ys) - -Where we use the syntax @\u []@ to denote an updatable thunk and @\s []@ to -denote a single-entry (i.e. non-updatable) thunk. After a thread evaluates @x@ -to WHNF and calls @(++)@ the heap will contain the following thunks, - - x = 1 : y - y = \u [] (++) [] z - z = \s [] concat [] - -Now that the stage is set, consider the follow evaluations by two racing threads -A and B, - - 1. Both threads enter @y@ before either is able to replace it with an - indirection - - 2. Thread A does the case analysis in @(++)@ and consequently enters @z@, - replacing it with a black-hole - - 3. At some later point thread B does the same case analysis and also attempts - to enter @z@. However, it finds that it has been replaced with a black-hole - so it blocks. - - 4. Thread A eventually finishes evaluating @z@ (to @[]@) and updates @y@ - accordingly. It does *not* update @z@, however, as it is single-entry. This - leaves Thread B blocked forever on a black-hole which will never be - updated. - -To avoid this sort of condition we never black-hole non-updatable thunks. --} - -isStaticClosure :: ClosureInfo -> Bool -isStaticClosure cl_info = isStaticRep (closureSMRep cl_info) - -closureUpdReqd :: ClosureInfo -> Bool -closureUpdReqd ClosureInfo{ closureLFInfo = lf_info } = lfUpdatable lf_info - -lfUpdatable :: LambdaFormInfo -> Bool -lfUpdatable (LFThunk _ _ upd _ _) = upd -lfUpdatable _ = False - -closureSingleEntry :: ClosureInfo -> Bool -closureSingleEntry (ClosureInfo { closureLFInfo = LFThunk _ _ upd _ _}) = not upd -closureSingleEntry (ClosureInfo { closureLFInfo = LFReEntrant _ OneShotLam _ _ _}) = True -closureSingleEntry _ = False - -closureReEntrant :: ClosureInfo -> Bool -closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant {} }) = True -closureReEntrant _ = False - -closureFunInfo :: ClosureInfo -> Maybe (RepArity, ArgDescr) -closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info - -lfFunInfo :: LambdaFormInfo -> Maybe (RepArity, ArgDescr) -lfFunInfo (LFReEntrant _ _ arity _ arg_desc) = Just (arity, arg_desc) -lfFunInfo _ = Nothing - -funTag :: DynFlags -> ClosureInfo -> DynTag -funTag dflags (ClosureInfo { closureLFInfo = lf_info }) - = lfDynTag dflags lf_info - -isToplevClosure :: ClosureInfo -> Bool -isToplevClosure (ClosureInfo { closureLFInfo = lf_info }) - = case lf_info of - LFReEntrant TopLevel _ _ _ _ -> True - LFThunk TopLevel _ _ _ _ -> True - _other -> False - --------------------------------------- --- Label generation --------------------------------------- - -staticClosureLabel :: ClosureInfo -> CLabel -staticClosureLabel = toClosureLbl . closureInfoLabel - -closureSlowEntryLabel :: ClosureInfo -> CLabel -closureSlowEntryLabel = toSlowEntryLbl . closureInfoLabel - -closureLocalEntryLabel :: DynFlags -> ClosureInfo -> CLabel -closureLocalEntryLabel dflags - | tablesNextToCode dflags = toInfoLbl . closureInfoLabel - | otherwise = toEntryLbl . closureInfoLabel - -mkClosureInfoTableLabel :: Id -> LambdaFormInfo -> CLabel -mkClosureInfoTableLabel id lf_info - = case lf_info of - LFThunk _ _ upd_flag (SelectorThunk offset) _ - -> mkSelectorInfoLabel upd_flag offset - - LFThunk _ _ upd_flag (ApThunk arity) _ - -> mkApInfoTableLabel upd_flag arity - - LFThunk{} -> std_mk_lbl name cafs - LFReEntrant{} -> std_mk_lbl name cafs - _other -> panic "closureInfoTableLabel" - - where - name = idName id - - std_mk_lbl | is_local = mkLocalInfoTableLabel - | otherwise = mkInfoTableLabel - - cafs = idCafInfo id - is_local = isDataConWorkId id - -- Make the _info pointer for the implicit datacon worker - -- binding local. The reason we can do this is that importing - -- code always either uses the _closure or _con_info. By the - -- invariants in CorePrep anything else gets eta expanded. - - -thunkEntryLabel :: DynFlags -> Name -> CafInfo -> StandardFormInfo -> Bool -> CLabel --- thunkEntryLabel is a local help function, not exported. It's used from --- getCallMethod. -thunkEntryLabel dflags _thunk_id _ (ApThunk arity) upd_flag - = enterApLabel dflags upd_flag arity -thunkEntryLabel dflags _thunk_id _ (SelectorThunk offset) upd_flag - = enterSelectorLabel dflags upd_flag offset -thunkEntryLabel dflags thunk_id c _ _ - = enterIdLabel dflags thunk_id c - -enterApLabel :: DynFlags -> Bool -> Arity -> CLabel -enterApLabel dflags is_updatable arity - | tablesNextToCode dflags = mkApInfoTableLabel is_updatable arity - | otherwise = mkApEntryLabel is_updatable arity - -enterSelectorLabel :: DynFlags -> Bool -> WordOff -> CLabel -enterSelectorLabel dflags upd_flag offset - | tablesNextToCode dflags = mkSelectorInfoLabel upd_flag offset - | otherwise = mkSelectorEntryLabel upd_flag offset - -enterIdLabel :: DynFlags -> Name -> CafInfo -> CLabel -enterIdLabel dflags id c - | tablesNextToCode dflags = mkInfoTableLabel id c - | otherwise = mkEntryLabel id c - - --------------------------------------- --- Profiling --------------------------------------- - --- Profiling requires two pieces of information to be determined for --- each closure's info table --- description and type. - --- The description is stored directly in the @CClosureInfoTable@ when the --- info table is built. - --- The type is determined from the type information stored with the @Id@ --- in the closure info using @closureTypeDescr@. - -mkProfilingInfo :: DynFlags -> Id -> String -> ProfilingInfo -mkProfilingInfo dflags id val_descr - | not (gopt Opt_SccProfilingOn dflags) = NoProfilingInfo - | otherwise = ProfilingInfo ty_descr_w8 (BS8.pack val_descr) - where - ty_descr_w8 = BS8.pack (getTyDescription (idType id)) - -getTyDescription :: Type -> String -getTyDescription ty - = case (tcSplitSigmaTy ty) of { (_, _, tau_ty) -> - case tau_ty of - TyVarTy _ -> "*" - AppTy fun _ -> getTyDescription fun - TyConApp tycon _ -> getOccString tycon - FunTy {} -> '-' : fun_result tau_ty - ForAllTy _ ty -> getTyDescription ty - LitTy n -> getTyLitDescription n - CastTy ty _ -> getTyDescription ty - CoercionTy co -> pprPanic "getTyDescription" (ppr co) - } - where - fun_result (FunTy { ft_res = res }) = '>' : fun_result res - fun_result other = getTyDescription other - -getTyLitDescription :: TyLit -> String -getTyLitDescription l = - case l of - NumTyLit n -> show n - StrTyLit n -> show n - --------------------------------------- --- CmmInfoTable-related things --------------------------------------- - -mkDataConInfoTable :: DynFlags -> DataCon -> Bool -> Int -> Int -> CmmInfoTable -mkDataConInfoTable dflags data_con is_static ptr_wds nonptr_wds - = CmmInfoTable { cit_lbl = info_lbl - , cit_rep = sm_rep - , cit_prof = prof - , cit_srt = Nothing - , cit_clo = Nothing } - where - name = dataConName data_con - info_lbl = mkConInfoTableLabel name NoCafRefs - sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type - cl_type = Constr (dataConTagZ data_con) (dataConIdentity data_con) - -- We keep the *zero-indexed* tag in the srt_len field - -- of the info table of a data constructor. - - prof | not (gopt Opt_SccProfilingOn dflags) = NoProfilingInfo - | otherwise = ProfilingInfo ty_descr val_descr - - ty_descr = BS8.pack $ occNameString $ getOccName $ dataConTyCon data_con - val_descr = BS8.pack $ occNameString $ getOccName data_con - --- We need a black-hole closure info to pass to @allocDynClosure@ when we --- want to allocate the black hole on entry to a CAF. - -cafBlackHoleInfoTable :: CmmInfoTable -cafBlackHoleInfoTable - = CmmInfoTable { cit_lbl = mkCAFBlackHoleInfoTableLabel - , cit_rep = blackHoleRep - , cit_prof = NoProfilingInfo - , cit_srt = Nothing - , cit_clo = Nothing } - -indStaticInfoTable :: CmmInfoTable -indStaticInfoTable - = CmmInfoTable { cit_lbl = mkIndStaticInfoLabel - , cit_rep = indStaticRep - , cit_prof = NoProfilingInfo - , cit_srt = Nothing - , cit_clo = Nothing } - -staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool --- A static closure needs a link field to aid the GC when traversing --- the static closure graph. But it only needs such a field if either --- a) it has an SRT --- b) it's a constructor with one or more pointer fields --- In case (b), the constructor's fields themselves play the role --- of the SRT. -staticClosureNeedsLink has_srt CmmInfoTable{ cit_rep = smrep } - | isConRep smrep = not (isStaticNoCafCon smrep) - | otherwise = has_srt diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs deleted file mode 100644 index 67a9776eac..0000000000 --- a/compiler/codeGen/StgCmmCon.hs +++ /dev/null @@ -1,285 +0,0 @@ -{-# LANGUAGE CPP #-} - ------------------------------------------------------------------------------ --- --- Stg to C--: code generation for constructors --- --- This module provides the support code for StgCmm to deal with with --- constructors on the RHSs of let(rec)s. --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ - -module StgCmmCon ( - cgTopRhsCon, buildDynCon, bindConArgs - ) where - -#include "HsVersions.h" - -import GhcPrelude - -import StgSyn -import CoreSyn ( AltCon(..) ) - -import StgCmmMonad -import StgCmmEnv -import StgCmmHeap -import StgCmmLayout -import StgCmmUtils -import StgCmmClosure - -import CmmExpr -import CmmUtils -import CLabel -import MkGraph -import SMRep -import CostCentre -import Module -import DataCon -import DynFlags -import FastString -import Id -import RepType (countConRepArgs) -import Literal -import PrelInfo -import Outputable -import GHC.Platform -import Util -import MonadUtils (mapMaybeM) - -import Control.Monad -import Data.Char - - - ---------------------------------------------------------------- --- Top-level constructors ---------------------------------------------------------------- - -cgTopRhsCon :: DynFlags - -> Id -- Name of thing bound to this RHS - -> DataCon -- Id - -> [NonVoid StgArg] -- Args - -> (CgIdInfo, FCode ()) -cgTopRhsCon dflags id con args = - let id_info = litIdInfo dflags id (mkConLFInfo con) (CmmLabel closure_label) - in (id_info, gen_code) - where - name = idName id - caffy = idCafInfo id -- any stgArgHasCafRefs args - closure_label = mkClosureLabel name caffy - - gen_code = - do { this_mod <- getModuleName - ; when (platformOS (targetPlatform dflags) == OSMinGW32) $ - -- Windows DLLs have a problem with static cross-DLL refs. - MASSERT( not (isDllConApp dflags this_mod con (map fromNonVoid args)) ) - ; ASSERT( args `lengthIs` countConRepArgs con ) return () - - -- LAY IT OUT - ; let - (tot_wds, -- #ptr_wds + #nonptr_wds - ptr_wds, -- #ptr_wds - nv_args_w_offsets) = - mkVirtHeapOffsetsWithPadding dflags StdHeader (addArgReps args) - - mk_payload (Padding len _) = return (CmmInt 0 (widthFromBytes len)) - mk_payload (FieldOff arg _) = do - amode <- getArgAmode arg - case amode of - CmmLit lit -> return lit - _ -> panic "StgCmmCon.cgTopRhsCon" - - nonptr_wds = tot_wds - ptr_wds - - -- we're not really going to emit an info table, so having - -- to make a CmmInfoTable is a bit overkill, but mkStaticClosureFields - -- needs to poke around inside it. - info_tbl = mkDataConInfoTable dflags con True ptr_wds nonptr_wds - - - ; payload <- mapM mk_payload nv_args_w_offsets - -- NB1: nv_args_w_offsets is sorted into ptrs then non-ptrs - -- NB2: all the amodes should be Lits! - -- TODO (osa): Why? - - ; let closure_rep = mkStaticClosureFields - dflags - info_tbl - dontCareCCS -- Because it's static data - caffy -- Has CAF refs - payload - - -- BUILD THE OBJECT - ; emitDataLits closure_label closure_rep - - ; return () } - - ---------------------------------------------------------------- --- Lay out and allocate non-top-level constructors ---------------------------------------------------------------- - -buildDynCon :: Id -- Name of the thing to which this constr will - -- be bound - -> Bool -- is it genuinely bound to that name, or just - -- for profiling? - -> CostCentreStack -- Where to grab cost centre from; - -- current CCS if currentOrSubsumedCCS - -> DataCon -- The data constructor - -> [NonVoid StgArg] -- Its args - -> FCode (CgIdInfo, FCode CmmAGraph) - -- Return details about how to find it and initialization code -buildDynCon binder actually_bound cc con args - = do dflags <- getDynFlags - buildDynCon' dflags (targetPlatform dflags) binder actually_bound cc con args - - -buildDynCon' :: DynFlags - -> Platform - -> Id -> Bool - -> CostCentreStack - -> DataCon - -> [NonVoid StgArg] - -> FCode (CgIdInfo, FCode CmmAGraph) - -{- We used to pass a boolean indicating whether all the -args were of size zero, so we could use a static -constructor; but I concluded that it just isn't worth it. -Now I/O uses unboxed tuples there just aren't any constructors -with all size-zero args. - -The reason for having a separate argument, rather than looking at -the addr modes of the args is that we may be in a "knot", and -premature looking at the args will cause the compiler to black-hole! --} - - --------- buildDynCon': Nullary constructors -------------- --- First we deal with the case of zero-arity constructors. They --- will probably be unfolded, so we don't expect to see this case much, --- if at all, but it does no harm, and sets the scene for characters. --- --- In the case of zero-arity constructors, or, more accurately, those --- which have exclusively size-zero (VoidRep) args, we generate no code --- at all. - -buildDynCon' dflags _ binder _ _cc con [] - | isNullaryRepDataCon con - = return (litIdInfo dflags binder (mkConLFInfo con) - (CmmLabel (mkClosureLabel (dataConName con) (idCafInfo binder))), - return mkNop) - --------- buildDynCon': Charlike and Intlike constructors ----------- -{- The following three paragraphs about @Char@-like and @Int@-like -closures are obsolete, but I don't understand the details well enough -to properly word them, sorry. I've changed the treatment of @Char@s to -be analogous to @Int@s: only a subset is preallocated, because @Char@ -has now 31 bits. Only literals are handled here. -- Qrczak - -Now for @Char@-like closures. We generate an assignment of the -address of the closure to a temporary. It would be possible simply to -generate no code, and record the addressing mode in the environment, -but we'd have to be careful if the argument wasn't a constant --- so -for simplicity we just always assign to a temporary. - -Last special case: @Int@-like closures. We only special-case the -situation in which the argument is a literal in the range -@mIN_INTLIKE@..@mAX_INTLILKE@. NB: for @Char@-like closures we can -work with any old argument, but for @Int@-like ones the argument has -to be a literal. Reason: @Char@ like closures have an argument type -which is guaranteed in range. - -Because of this, we use can safely return an addressing mode. - -We don't support this optimisation when compiling into Windows DLLs yet -because they don't support cross package data references well. --} - -buildDynCon' dflags platform binder _ _cc con [arg] - | maybeIntLikeCon con - , platformOS platform /= OSMinGW32 || not (positionIndependent dflags) - , NonVoid (StgLitArg (LitNumber LitNumInt val _)) <- arg - , val <= fromIntegral (mAX_INTLIKE dflags) -- Comparisons at type Integer! - , val >= fromIntegral (mIN_INTLIKE dflags) -- ...ditto... - = do { let intlike_lbl = mkCmmClosureLabel rtsUnitId (fsLit "stg_INTLIKE") - val_int = fromIntegral val :: Int - offsetW = (val_int - mIN_INTLIKE dflags) * (fixedHdrSizeW dflags + 1) - -- INTLIKE closures consist of a header and one word payload - intlike_amode = cmmLabelOffW dflags intlike_lbl offsetW - ; return ( litIdInfo dflags binder (mkConLFInfo con) intlike_amode - , return mkNop) } - -buildDynCon' dflags platform binder _ _cc con [arg] - | maybeCharLikeCon con - , platformOS platform /= OSMinGW32 || not (positionIndependent dflags) - , NonVoid (StgLitArg (LitChar val)) <- arg - , let val_int = ord val :: Int - , val_int <= mAX_CHARLIKE dflags - , val_int >= mIN_CHARLIKE dflags - = do { let charlike_lbl = mkCmmClosureLabel rtsUnitId (fsLit "stg_CHARLIKE") - offsetW = (val_int - mIN_CHARLIKE dflags) * (fixedHdrSizeW dflags + 1) - -- CHARLIKE closures consist of a header and one word payload - charlike_amode = cmmLabelOffW dflags charlike_lbl offsetW - ; return ( litIdInfo dflags binder (mkConLFInfo con) charlike_amode - , return mkNop) } - --------- buildDynCon': the general case ----------- -buildDynCon' dflags _ binder actually_bound ccs con args - = do { (id_info, reg) <- rhsIdInfo binder lf_info - ; return (id_info, gen_code reg) - } - where - lf_info = mkConLFInfo con - - gen_code reg - = do { let (tot_wds, ptr_wds, args_w_offsets) - = mkVirtConstrOffsets dflags (addArgReps args) - nonptr_wds = tot_wds - ptr_wds - info_tbl = mkDataConInfoTable dflags con False - ptr_wds nonptr_wds - ; let ticky_name | actually_bound = Just binder - | otherwise = Nothing - - ; hp_plus_n <- allocDynClosure ticky_name info_tbl lf_info - use_cc blame_cc args_w_offsets - ; return (mkRhsInit dflags reg lf_info hp_plus_n) } - where - use_cc -- cost-centre to stick in the object - | isCurrentCCS ccs = cccsExpr - | otherwise = panic "buildDynCon: non-current CCS not implemented" - - blame_cc = use_cc -- cost-centre on which to blame the alloc (same) - - ---------------------------------------------------------------- --- Binding constructor arguments ---------------------------------------------------------------- - -bindConArgs :: AltCon -> LocalReg -> [NonVoid Id] -> FCode [LocalReg] --- bindConArgs is called from cgAlt of a case --- (bindConArgs con args) augments the environment with bindings for the --- binders args, assuming that we have just returned from a 'case' which --- found a con -bindConArgs (DataAlt con) base args - = ASSERT(not (isUnboxedTupleCon con)) - do dflags <- getDynFlags - let (_, _, args_w_offsets) = mkVirtConstrOffsets dflags (addIdReps args) - tag = tagForCon dflags con - - -- The binding below forces the masking out of the tag bits - -- when accessing the constructor field. - bind_arg :: (NonVoid Id, ByteOff) -> FCode (Maybe LocalReg) - bind_arg (arg@(NonVoid b), offset) - | isDeadBinder b -- See Note [Dead-binder optimisation] in StgCmmExpr - = return Nothing - | otherwise - = do { emit $ mkTaggedObjectLoad dflags (idToReg dflags arg) - base offset tag - ; Just <$> bindArgToReg arg } - - mapMaybeM bind_arg args_w_offsets - -bindConArgs _other_con _base args - = ASSERT( null args ) return [] diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs deleted file mode 100644 index e605762f1f..0000000000 --- a/compiler/codeGen/StgCmmEnv.hs +++ /dev/null @@ -1,208 +0,0 @@ -{-# LANGUAGE CPP #-} - ------------------------------------------------------------------------------ --- --- Stg to C-- code generation: the binding environment --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ -module StgCmmEnv ( - CgIdInfo, - - litIdInfo, lneIdInfo, rhsIdInfo, mkRhsInit, - idInfoToAmode, - - addBindC, addBindsC, - - bindArgsToRegs, bindToReg, rebindToReg, - bindArgToReg, idToReg, - getArgAmode, getNonVoidArgAmodes, - getCgIdInfo, - maybeLetNoEscape, - ) where - -#include "HsVersions.h" - -import GhcPrelude - -import TyCon -import StgCmmMonad -import StgCmmUtils -import StgCmmClosure - -import CLabel - -import BlockId -import CmmExpr -import CmmUtils -import DynFlags -import Id -import MkGraph -import Name -import Outputable -import StgSyn -import Type -import TysPrim -import UniqFM -import Util -import VarEnv - -------------------------------------- --- Manipulating CgIdInfo -------------------------------------- - -mkCgIdInfo :: Id -> LambdaFormInfo -> CmmExpr -> CgIdInfo -mkCgIdInfo id lf expr - = CgIdInfo { cg_id = id, cg_lf = lf - , cg_loc = CmmLoc expr } - -litIdInfo :: DynFlags -> Id -> LambdaFormInfo -> CmmLit -> CgIdInfo -litIdInfo dflags id lf lit - = CgIdInfo { cg_id = id, cg_lf = lf - , cg_loc = CmmLoc (addDynTag dflags (CmmLit lit) tag) } - where - tag = lfDynTag dflags lf - -lneIdInfo :: DynFlags -> Id -> [NonVoid Id] -> CgIdInfo -lneIdInfo dflags id regs - = CgIdInfo { cg_id = id, cg_lf = lf - , cg_loc = LneLoc blk_id (map (idToReg dflags) regs) } - where - lf = mkLFLetNoEscape - blk_id = mkBlockId (idUnique id) - - -rhsIdInfo :: Id -> LambdaFormInfo -> FCode (CgIdInfo, LocalReg) -rhsIdInfo id lf_info - = do dflags <- getDynFlags - reg <- newTemp (gcWord dflags) - return (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg)), reg) - -mkRhsInit :: DynFlags -> LocalReg -> LambdaFormInfo -> CmmExpr -> CmmAGraph -mkRhsInit dflags reg lf_info expr - = mkAssign (CmmLocal reg) (addDynTag dflags expr (lfDynTag dflags lf_info)) - -idInfoToAmode :: CgIdInfo -> CmmExpr --- Returns a CmmExpr for the *tagged* pointer -idInfoToAmode (CgIdInfo { cg_loc = CmmLoc e }) = e -idInfoToAmode cg_info - = pprPanic "idInfoToAmode" (ppr (cg_id cg_info)) -- LneLoc - -addDynTag :: DynFlags -> CmmExpr -> DynTag -> CmmExpr --- A tag adds a byte offset to the pointer -addDynTag dflags expr tag = cmmOffsetB dflags expr tag - -maybeLetNoEscape :: CgIdInfo -> Maybe (BlockId, [LocalReg]) -maybeLetNoEscape (CgIdInfo { cg_loc = LneLoc blk_id args}) = Just (blk_id, args) -maybeLetNoEscape _other = Nothing - - - ---------------------------------------------------------- --- The binding environment --- --- There are three basic routines, for adding (addBindC), --- modifying(modifyBindC) and looking up (getCgIdInfo) bindings. ---------------------------------------------------------- - -addBindC :: CgIdInfo -> FCode () -addBindC stuff_to_bind = do - binds <- getBinds - setBinds $ extendVarEnv binds (cg_id stuff_to_bind) stuff_to_bind - -addBindsC :: [CgIdInfo] -> FCode () -addBindsC new_bindings = do - binds <- getBinds - let new_binds = foldl' (\ binds info -> extendVarEnv binds (cg_id info) info) - binds - new_bindings - setBinds new_binds - -getCgIdInfo :: Id -> FCode CgIdInfo -getCgIdInfo id - = do { dflags <- getDynFlags - ; local_binds <- getBinds -- Try local bindings first - ; case lookupVarEnv local_binds id of { - Just info -> return info ; - Nothing -> do { - - -- Should be imported; make up a CgIdInfo for it - let name = idName id - ; if isExternalName name then - let ext_lbl - | isUnliftedType (idType id) = - -- An unlifted external Id must refer to a top-level - -- string literal. See Note [Bytes label] in CLabel. - ASSERT( idType id `eqType` addrPrimTy ) - mkBytesLabel name - | otherwise = mkClosureLabel name $ idCafInfo id - in return $ - litIdInfo dflags id (mkLFImported id) (CmmLabel ext_lbl) - else - cgLookupPanic id -- Bug - }}} - -cgLookupPanic :: Id -> FCode a -cgLookupPanic id - = do local_binds <- getBinds - pprPanic "StgCmmEnv: variable not found" - (vcat [ppr id, - text "local binds for:", - pprUFM local_binds $ \infos -> - vcat [ ppr (cg_id info) | info <- infos ] - ]) - - --------------------- -getArgAmode :: NonVoid StgArg -> FCode CmmExpr -getArgAmode (NonVoid (StgVarArg var)) = idInfoToAmode <$> getCgIdInfo var -getArgAmode (NonVoid (StgLitArg lit)) = CmmLit <$> cgLit lit - -getNonVoidArgAmodes :: [StgArg] -> FCode [CmmExpr] --- NB: Filters out void args, --- so the result list may be shorter than the argument list -getNonVoidArgAmodes [] = return [] -getNonVoidArgAmodes (arg:args) - | isVoidRep (argPrimRep arg) = getNonVoidArgAmodes args - | otherwise = do { amode <- getArgAmode (NonVoid arg) - ; amodes <- getNonVoidArgAmodes args - ; return ( amode : amodes ) } - - ------------------------------------------------------------------------- --- Interface functions for binding and re-binding names ------------------------------------------------------------------------- - -bindToReg :: NonVoid Id -> LambdaFormInfo -> FCode LocalReg --- Bind an Id to a fresh LocalReg -bindToReg nvid@(NonVoid id) lf_info - = do dflags <- getDynFlags - let reg = idToReg dflags nvid - addBindC (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg))) - return reg - -rebindToReg :: NonVoid Id -> FCode LocalReg --- Like bindToReg, but the Id is already in scope, so --- get its LF info from the envt -rebindToReg nvid@(NonVoid id) - = do { info <- getCgIdInfo id - ; bindToReg nvid (cg_lf info) } - -bindArgToReg :: NonVoid Id -> FCode LocalReg -bindArgToReg nvid@(NonVoid id) = bindToReg nvid (mkLFArgument id) - -bindArgsToRegs :: [NonVoid Id] -> FCode [LocalReg] -bindArgsToRegs args = mapM bindArgToReg args - -idToReg :: DynFlags -> NonVoid Id -> LocalReg --- Make a register from an Id, typically a function argument, --- free variable, or case binder --- --- We re-use the Unique from the Id to make it easier to see what is going on --- --- By now the Ids should be uniquely named; else one would worry --- about accidental collision -idToReg dflags (NonVoid id) - = LocalReg (idUnique id) - (primRepCmmType dflags (idPrimRep id)) diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs deleted file mode 100644 index 70a044a7ab..0000000000 --- a/compiler/codeGen/StgCmmExpr.hs +++ /dev/null @@ -1,992 +0,0 @@ -{-# LANGUAGE CPP #-} - ------------------------------------------------------------------------------ --- --- Stg to C-- code generation: expressions --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ - -module StgCmmExpr ( cgExpr ) where - -#include "HsVersions.h" - -import GhcPrelude hiding ((<*>)) - -import {-# SOURCE #-} StgCmmBind ( cgBind ) - -import StgCmmMonad -import StgCmmHeap -import StgCmmEnv -import StgCmmCon -import StgCmmProf (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC) -import StgCmmLayout -import StgCmmPrim -import StgCmmHpc -import StgCmmTicky -import StgCmmUtils -import StgCmmClosure - -import StgSyn - -import MkGraph -import BlockId -import Cmm -import CmmInfo -import CoreSyn -import DataCon -import ForeignCall -import Id -import PrimOp -import TyCon -import Type ( isUnliftedType ) -import RepType ( isVoidTy, countConRepArgs, primRepSlot ) -import CostCentre ( CostCentreStack, currentCCS ) -import Maybes -import Util -import FastString -import Outputable - -import Control.Monad (unless,void) -import Control.Arrow (first) -import Data.Function ( on ) - ------------------------------------------------------------------------- --- cgExpr: the main function ------------------------------------------------------------------------- - -cgExpr :: CgStgExpr -> FCode ReturnKind - -cgExpr (StgApp fun args) = cgIdApp fun args - --- seq# a s ==> a --- See Note [seq# magic] in PrelRules -cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) = - cgIdApp a [] - --- dataToTag# :: a -> Int# --- See Note [dataToTag#] in primops.txt.pp -cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do - dflags <- getDynFlags - emitComment (mkFastString "dataToTag#") - tmp <- newTemp (bWord dflags) - _ <- withSequel (AssignTo [tmp] False) (cgIdApp a []) - -- TODO: For small types look at the tag bits instead of reading info table - emitReturn [getConstrTag dflags (cmmUntag dflags (CmmReg (CmmLocal tmp)))] - -cgExpr (StgOpApp op args ty) = cgOpApp op args ty -cgExpr (StgConApp con args _)= cgConApp con args -cgExpr (StgTick t e) = cgTick t >> cgExpr e -cgExpr (StgLit lit) = do cmm_lit <- cgLit lit - emitReturn [CmmLit cmm_lit] - -cgExpr (StgLet _ binds expr) = do { cgBind binds; cgExpr expr } -cgExpr (StgLetNoEscape _ binds expr) = - do { u <- newUnique - ; let join_id = mkBlockId u - ; cgLneBinds join_id binds - ; r <- cgExpr expr - ; emitLabel join_id - ; return r } - -cgExpr (StgCase expr bndr alt_type alts) = - cgCase expr bndr alt_type alts - -cgExpr (StgLam {}) = panic "cgExpr: StgLam" - ------------------------------------------------------------------------- --- Let no escape ------------------------------------------------------------------------- - -{- Generating code for a let-no-escape binding, aka join point is very -very similar to what we do for a case expression. The duality is -between - let-no-escape x = b - in e -and - case e of ... -> b - -That is, the RHS of 'x' (ie 'b') will execute *later*, just like -the alternative of the case; it needs to be compiled in an environment -in which all volatile bindings are forgotten, and the free vars are -bound only to stable things like stack locations.. The 'e' part will -execute *next*, just like the scrutinee of a case. -} - -------------------------- -cgLneBinds :: BlockId -> CgStgBinding -> FCode () -cgLneBinds join_id (StgNonRec bndr rhs) - = do { local_cc <- saveCurrentCostCentre - -- See Note [Saving the current cost centre] - ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs - ; fcode - ; addBindC info } - -cgLneBinds join_id (StgRec pairs) - = do { local_cc <- saveCurrentCostCentre - ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs - ; let (infos, fcodes) = unzip r - ; addBindsC infos - ; sequence_ fcodes - } - -------------------------- -cgLetNoEscapeRhs - :: BlockId -- join point for successor of let-no-escape - -> Maybe LocalReg -- Saved cost centre - -> Id - -> CgStgRhs - -> FCode (CgIdInfo, FCode ()) - -cgLetNoEscapeRhs join_id local_cc bndr rhs = - do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs - ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info - ; let code = do { (_, body) <- getCodeScoped rhs_code - ; emitOutOfLine bid (first (<*> mkBranch join_id) body) } - ; return (info, code) - } - -cgLetNoEscapeRhsBody - :: Maybe LocalReg -- Saved cost centre - -> Id - -> CgStgRhs - -> FCode (CgIdInfo, FCode ()) -cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure _ cc _upd args body) - = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body -cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args) - = cgLetNoEscapeClosure bndr local_cc cc [] - (StgConApp con args (pprPanic "cgLetNoEscapeRhsBody" $ - text "StgRhsCon doesn't have type args")) - -- For a constructor RHS we want to generate a single chunk of - -- code which can be jumped to from many places, which will - -- return the constructor. It's easy; just behave as if it - -- was an StgRhsClosure with a ConApp inside! - -------------------------- -cgLetNoEscapeClosure - :: Id -- binder - -> Maybe LocalReg -- Slot for saved current cost centre - -> CostCentreStack -- XXX: *** NOT USED *** why not? - -> [NonVoid Id] -- Args (as in \ args -> body) - -> CgStgExpr -- Body (as in above) - -> FCode (CgIdInfo, FCode ()) - -cgLetNoEscapeClosure bndr cc_slot _unused_cc args body - = do dflags <- getDynFlags - return ( lneIdInfo dflags bndr args - , code ) - where - code = forkLneBody $ do { - ; withNewTickyCounterLNE (idName bndr) args $ do - ; restoreCurrentCostCentre cc_slot - ; arg_regs <- bindArgsToRegs args - ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) } - - ------------------------------------------------------------------------- --- Case expressions ------------------------------------------------------------------------- - -{- Note [Compiling case expressions] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -It is quite interesting to decide whether to put a heap-check at the -start of each alternative. Of course we certainly have to do so if -the case forces an evaluation, or if there is a primitive op which can -trigger GC. - -A more interesting situation is this (a Plan-B situation) - - !P!; - ...P... - case x# of - 0# -> !Q!; ...Q... - default -> !R!; ...R... - -where !x! indicates a possible heap-check point. The heap checks -in the alternatives *can* be omitted, in which case the topmost -heapcheck will take their worst case into account. - -In favour of omitting !Q!, !R!: - - - *May* save a heap overflow test, - if ...P... allocates anything. - - - We can use relative addressing from a single Hp to - get at all the closures so allocated. - - - No need to save volatile vars etc across heap checks - in !Q!, !R! - -Against omitting !Q!, !R! - - - May put a heap-check into the inner loop. Suppose - the main loop is P -> R -> P -> R... - Q is the loop exit, and only it does allocation. - This only hurts us if P does no allocation. If P allocates, - then there is a heap check in the inner loop anyway. - - - May do more allocation than reqd. This sometimes bites us - badly. For example, nfib (ha!) allocates about 30\% more space if the - worst-casing is done, because many many calls to nfib are leaf calls - which don't need to allocate anything. - - We can un-allocate, but that costs an instruction - -Neither problem hurts us if there is only one alternative. - -Suppose the inner loop is P->R->P->R etc. Then here is -how many heap checks we get in the *inner loop* under various -conditions - - Alloc Heap check in branches (!Q!, !R!)? - P Q R yes no (absorb to !P!) --------------------------------------- - n n n 0 0 - n y n 0 1 - n . y 1 1 - y . y 2 1 - y . n 1 1 - -Best choices: absorb heap checks from Q and R into !P! iff - a) P itself does some allocation -or - b) P does allocation, or there is exactly one alternative - -We adopt (b) because that is more likely to put the heap check at the -entry to a function, when not many things are live. After a bunch of -single-branch cases, we may have lots of things live - -Hence: two basic plans for - - case e of r { alts } - ------- Plan A: the general case --------- - - ...save current cost centre... - - ...code for e, - with sequel (SetLocals r) - - ...restore current cost centre... - ...code for alts... - ...alts do their own heap checks - ------- Plan B: special case when --------- - (i) e does not allocate or call GC - (ii) either upstream code performs allocation - or there is just one alternative - - Then heap allocation in the (single) case branch - is absorbed by the upstream check. - Very common example: primops on unboxed values - - ...code for e, - with sequel (SetLocals r)... - - ...code for alts... - ...no heap check... --} - - - -------------------------------------- -data GcPlan - = GcInAlts -- Put a GC check at the start the case alternatives, - [LocalReg] -- which binds these registers - | NoGcInAlts -- The scrutinee is a primitive value, or a call to a - -- primitive op which does no GC. Absorb the allocation - -- of the case alternative(s) into the upstream check - -------------------------------------- -cgCase :: CgStgExpr -> Id -> AltType -> [CgStgAlt] -> FCode ReturnKind - -cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts - | isEnumerationTyCon tycon -- Note [case on bool] - = do { tag_expr <- do_enum_primop op args - - -- If the binder is not dead, convert the tag to a constructor - -- and assign it. See Note [Dead-binder optimisation] - ; unless (isDeadBinder bndr) $ do - { dflags <- getDynFlags - ; tmp_reg <- bindArgToReg (NonVoid bndr) - ; emitAssign (CmmLocal tmp_reg) - (tagToClosure dflags tycon tag_expr) } - - ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly) - (NonVoid bndr) alts - -- See Note [GC for conditionals] - ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1) - ; return AssignedDirectly - } - where - do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr - do_enum_primop TagToEnumOp [arg] -- No code! - = getArgAmode (NonVoid arg) - do_enum_primop primop args - = do dflags <- getDynFlags - tmp <- newTemp (bWord dflags) - cgPrimOp [tmp] primop args - return (CmmReg (CmmLocal tmp)) - -{- -Note [case on bool] -~~~~~~~~~~~~~~~~~~~ -This special case handles code like - - case a <# b of - True -> - False -> - ---> case tagToEnum# (a <$# b) of - True -> .. ; False -> ... - ---> case (a <$# b) of r -> - case tagToEnum# r of - True -> .. ; False -> ... - -If we let the ordinary case code handle it, we'll get something like - - tmp1 = a < b - tmp2 = Bool_closure_tbl[tmp1] - if (tmp2 & 7 != 0) then ... // normal tagged case - -but this junk won't optimise away. What we really want is just an -inline comparison: - - if (a < b) then ... - -So we add a special case to generate - - tmp1 = a < b - if (tmp1 == 0) then ... - -and later optimisations will further improve this. - -Now that #6135 has been resolved it should be possible to remove that -special case. The idea behind this special case and pre-6135 implementation -of Bool-returning primops was that tagToEnum# was added implicitly in the -codegen and then optimized away. Now the call to tagToEnum# is explicit -in the source code, which allows to optimize it away at the earlier stages -of compilation (i.e. at the Core level). - -Note [Scrutinising VoidRep] -~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Suppose we have this STG code: - f = \[s : State# RealWorld] -> - case s of _ -> blah -This is very odd. Why are we scrutinising a state token? But it -can arise with bizarre NOINLINE pragmas (#9964) - crash :: IO () - crash = IO (\s -> let {-# NOINLINE s' #-} - s' = s - in (# s', () #)) - -Now the trouble is that 's' has VoidRep, and we do not bind void -arguments in the environment; they don't live anywhere. See the -calls to nonVoidIds in various places. So we must not look up -'s' in the environment. Instead, just evaluate the RHS! Simple. - -Note [Dead-binder optimisation] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -A case-binder, or data-constructor argument, may be marked as dead, -because we preserve occurrence-info on binders in CoreTidy (see -CoreTidy.tidyIdBndr). - -If the binder is dead, we can sometimes eliminate a load. While -CmmSink will eliminate that load, it's very easy to kill it at source -(giving CmmSink less work to do), and in any case CmmSink only runs -with -O. Since the majority of case binders are dead, this -optimisation probably still has a great benefit-cost ratio and we want -to keep it for -O0. See also Phab:D5358. - -This probably also was the reason for occurrence hack in Phab:D5339 to -exist, perhaps because the occurrence information preserved by -'CoreTidy.tidyIdBndr' was insufficient. But now that CmmSink does the -job we deleted the hacks. --} - -cgCase (StgApp v []) _ (PrimAlt _) alts - | isVoidRep (idPrimRep v) -- See Note [Scrutinising VoidRep] - , [(DEFAULT, _, rhs)] <- alts - = cgExpr rhs - -{- Note [Dodgy unsafeCoerce 1] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Consider - case (x :: HValue) |> co of (y :: MutVar# Int) - DEFAULT -> ... -We want to gnerate an assignment - y := x -We want to allow this assignment to be generated in the case when the -types are compatible, because this allows some slightly-dodgy but -occasionally-useful casts to be used, such as in RtClosureInspect -where we cast an HValue to a MutVar# so we can print out the contents -of the MutVar#. If instead we generate code that enters the HValue, -then we'll get a runtime panic, because the HValue really is a -MutVar#. The types are compatible though, so we can just generate an -assignment. --} -cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts - | isUnliftedType (idType v) -- Note [Dodgy unsafeCoerce 1] - || reps_compatible - = -- assignment suffices for unlifted types - do { dflags <- getDynFlags - ; unless reps_compatible $ - pprPanic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?" - (pp_bndr v $$ pp_bndr bndr) - ; v_info <- getCgIdInfo v - ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr))) - (idInfoToAmode v_info) - -- Add bndr to the environment - ; _ <- bindArgToReg (NonVoid bndr) - ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts } - where - reps_compatible = ((==) `on` (primRepSlot . idPrimRep)) v bndr - -- Must compare SlotTys, not proper PrimReps, because with unboxed sums, - -- the types of the binders are generated from slotPrimRep and might not - -- match. Test case: - -- swap :: (# Int | Int #) -> (# Int | Int #) - -- swap (# x | #) = (# | x #) - -- swap (# | y #) = (# y | #) - - pp_bndr id = ppr id <+> dcolon <+> ppr (idType id) <+> parens (ppr (idPrimRep id)) - -{- Note [Dodgy unsafeCoerce 2, #3132] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In all other cases of a lifted Id being cast to an unlifted type, the -Id should be bound to bottom, otherwise this is an unsafe use of -unsafeCoerce. We can generate code to enter the Id and assume that -it will never return. Hence, we emit the usual enter/return code, and -because bottom must be untagged, it will be entered. The Sequel is a -type-correct assignment, albeit bogus. The (dead) continuation loops; -it would be better to invoke some kind of panic function here. --} -cgCase scrut@(StgApp v []) _ (PrimAlt _) _ - = do { dflags <- getDynFlags - ; mb_cc <- maybeSaveCostCentre True - ; _ <- withSequel - (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut) - ; restoreCurrentCostCentre mb_cc - ; emitComment $ mkFastString "should be unreachable code" - ; l <- newBlockId - ; emitLabel l - ; emit (mkBranch l) -- an infinite loop - ; return AssignedDirectly - } - -{- Note [Handle seq#] -~~~~~~~~~~~~~~~~~~~~~ -See Note [seq# magic] in PrelRules. -The special case for seq# in cgCase does this: - - case seq# a s of v - (# s', a' #) -> e -==> - case a of v - (# s', a' #) -> e - -(taking advantage of the fact that the return convention for (# State#, a #) -is the same as the return convention for just 'a') --} - -cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts - = -- Note [Handle seq#] - -- And see Note [seq# magic] in PrelRules - -- Use the same return convention as vanilla 'a'. - cgCase (StgApp a []) bndr alt_type alts - -cgCase scrut bndr alt_type alts - = -- the general case - do { dflags <- getDynFlags - ; up_hp_usg <- getVirtHp -- Upstream heap usage - ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts - alt_regs = map (idToReg dflags) ret_bndrs - ; simple_scrut <- isSimpleScrut scrut alt_type - ; let do_gc | is_cmp_op scrut = False -- See Note [GC for conditionals] - | not simple_scrut = True - | isSingleton alts = False - | up_hp_usg > 0 = False - | otherwise = True - -- cf Note [Compiling case expressions] - gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts - - ; mb_cc <- maybeSaveCostCentre simple_scrut - - ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -} - ; ret_kind <- withSequel sequel (cgExpr scrut) - ; restoreCurrentCostCentre mb_cc - ; _ <- bindArgsToRegs ret_bndrs - ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts - } - where - is_cmp_op (StgOpApp (StgPrimOp op) _ _) = isComparisonPrimOp op - is_cmp_op _ = False - -{- Note [GC for conditionals] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -For boolean conditionals it seems that we have always done NoGcInAlts. -That is, we have always done the GC check before the conditional. -This is enshrined in the special case for - case tagToEnum# (a>b) of ... -See Note [case on bool] - -It's odd, and it's flagrantly inconsistent with the rules described -Note [Compiling case expressions]. However, after eliminating the -tagToEnum# (#13397) we will have: - case (a>b) of ... -Rather than make it behave quite differently, I am testing for a -comparison operator here in in the general case as well. - -ToDo: figure out what the Right Rule should be. - -Note [scrut sequel] -~~~~~~~~~~~~~~~~~~~ -The job of the scrutinee is to assign its value(s) to alt_regs. -Additionally, if we plan to do a heap-check in the alternatives (see -Note [Compiling case expressions]), then we *must* retreat Hp to -recover any unused heap before passing control to the sequel. If we -don't do this, then any unused heap will become slop because the heap -check will reset the heap usage. Slop in the heap breaks LDV profiling -(+RTS -hb) which needs to do a linear sweep through the nursery. - - -Note [Inlining out-of-line primops and heap checks] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If shouldInlinePrimOp returns True when called from StgCmmExpr for the -purpose of heap check placement, we *must* inline the primop later in -StgCmmPrim. If we don't things will go wrong. --} - ------------------ -maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg) -maybeSaveCostCentre simple_scrut - | simple_scrut = return Nothing - | otherwise = saveCurrentCostCentre - - ------------------ -isSimpleScrut :: CgStgExpr -> AltType -> FCode Bool --- Simple scrutinee, does not block or allocate; hence safe to amalgamate --- heap usage from alternatives into the stuff before the case --- NB: if you get this wrong, and claim that the expression doesn't allocate --- when it does, you'll deeply mess up allocation -isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args -isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... } -isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... } -isSimpleScrut _ _ = return False - -isSimpleOp :: StgOp -> [StgArg] -> FCode Bool --- True iff the op cannot block or allocate -isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe) --- dataToTag# evalautes its argument, see Note [dataToTag#] in primops.txt.pp -isSimpleOp (StgPrimOp DataToTagOp) _ = return False -isSimpleOp (StgPrimOp op) stg_args = do - arg_exprs <- getNonVoidArgAmodes stg_args - dflags <- getDynFlags - -- See Note [Inlining out-of-line primops and heap checks] - return $! isJust $ shouldInlinePrimOp dflags op arg_exprs -isSimpleOp (StgPrimCallOp _) _ = return False - ------------------ -chooseReturnBndrs :: Id -> AltType -> [CgStgAlt] -> [NonVoid Id] --- These are the binders of a case that are assigned by the evaluation of the --- scrutinee. --- They're non-void, see Note [Post-unarisation invariants] in UnariseStg. -chooseReturnBndrs bndr (PrimAlt _) _alts - = assertNonVoidIds [bndr] - -chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _)] - = ASSERT2(ids `lengthIs` n, ppr n $$ ppr ids $$ ppr _bndr) - assertNonVoidIds ids -- 'bndr' is not assigned! - -chooseReturnBndrs bndr (AlgAlt _) _alts - = assertNonVoidIds [bndr] -- Only 'bndr' is assigned - -chooseReturnBndrs bndr PolyAlt _alts - = assertNonVoidIds [bndr] -- Only 'bndr' is assigned - -chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs" - -- MultiValAlt has only one alternative - -------------------------------------- -cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [CgStgAlt] - -> FCode ReturnKind --- At this point the result of the case are in the binders -cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)] - = maybeAltHeapCheck gc_plan (cgExpr rhs) - -cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs)] - = maybeAltHeapCheck gc_plan (cgExpr rhs) - -- Here bndrs are *already* in scope, so don't rebind them - -cgAlts gc_plan bndr (PrimAlt _) alts - = do { dflags <- getDynFlags - - ; tagged_cmms <- cgAltRhss gc_plan bndr alts - - ; let bndr_reg = CmmLocal (idToReg dflags bndr) - (DEFAULT,deflt) = head tagged_cmms - -- PrimAlts always have a DEFAULT case - -- and it always comes first - - tagged_cmms' = [(lit,code) - | (LitAlt lit, code) <- tagged_cmms] - ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt - ; return AssignedDirectly } - -cgAlts gc_plan bndr (AlgAlt tycon) alts - = do { dflags <- getDynFlags - - ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts - - ; let fam_sz = tyConFamilySize tycon - bndr_reg = CmmLocal (idToReg dflags bndr) - - -- Is the constructor tag in the node reg? - ; if isSmallFamily dflags fam_sz - then do - let -- Yes, bndr_reg has constr. tag in ls bits - tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg) - branches' = [(tag+1,branch) | (tag,branch) <- branches] - emitSwitch tag_expr branches' mb_deflt 1 fam_sz - - else -- No, get tag from info table - let -- Note that ptr _always_ has tag 1 - -- when the family size is big enough - untagged_ptr = cmmRegOffB bndr_reg (-1) - tag_expr = getConstrTag dflags (untagged_ptr) - in emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) - - ; return AssignedDirectly } - -cgAlts _ _ _ _ = panic "cgAlts" - -- UbxTupAlt and PolyAlt have only one alternative - - --- Note [alg-alt heap check] --- --- In an algebraic case with more than one alternative, we will have --- code like --- --- L0: --- x = R1 --- goto L1 --- L1: --- if (x & 7 >= 2) then goto L2 else goto L3 --- L2: --- Hp = Hp + 16 --- if (Hp > HpLim) then goto L4 --- ... --- L4: --- call gc() returns to L5 --- L5: --- x = R1 --- goto L1 - -------------------- -cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt] - -> FCode ( Maybe CmmAGraphScoped - , [(ConTagZ, CmmAGraphScoped)] ) -cgAlgAltRhss gc_plan bndr alts - = do { tagged_cmms <- cgAltRhss gc_plan bndr alts - - ; let { mb_deflt = case tagged_cmms of - ((DEFAULT,rhs) : _) -> Just rhs - _other -> Nothing - -- DEFAULT is always first, if present - - ; branches = [ (dataConTagZ con, cmm) - | (DataAlt con, cmm) <- tagged_cmms ] - } - - ; return (mb_deflt, branches) - } - - -------------------- -cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt] - -> FCode [(AltCon, CmmAGraphScoped)] -cgAltRhss gc_plan bndr alts = do - dflags <- getDynFlags - let - base_reg = idToReg dflags bndr - cg_alt :: CgStgAlt -> FCode (AltCon, CmmAGraphScoped) - cg_alt (con, bndrs, rhs) - = getCodeScoped $ - maybeAltHeapCheck gc_plan $ - do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs) - -- alt binders are always non-void, - -- see Note [Post-unarisation invariants] in UnariseStg - ; _ <- cgExpr rhs - ; return con } - forkAlts (map cg_alt alts) - -maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a -maybeAltHeapCheck (NoGcInAlts,_) code = code -maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code = - altHeapCheck regs code -maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code = - altHeapCheckReturnsTo regs lret off code - ------------------------------------------------------------------------------ --- Tail calls ------------------------------------------------------------------------------ - -cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind -cgConApp con stg_args - | isUnboxedTupleCon con -- Unboxed tuple: assign and return - = do { arg_exprs <- getNonVoidArgAmodes stg_args - ; tickyUnboxedTupleReturn (length arg_exprs) - ; emitReturn arg_exprs } - - | otherwise -- Boxed constructors; allocate and return - = ASSERT2( stg_args `lengthIs` countConRepArgs con, ppr con <> parens (ppr (countConRepArgs con)) <+> ppr stg_args ) - do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False - currentCCS con (assertNonVoidStgArgs stg_args) - -- con args are always non-void, - -- see Note [Post-unarisation invariants] in UnariseStg - -- The first "con" says that the name bound to this - -- closure is "con", which is a bit of a fudge, but - -- it only affects profiling (hence the False) - - ; emit =<< fcode_init - ; tickyReturnNewCon (length stg_args) - ; emitReturn [idInfoToAmode idinfo] } - -cgIdApp :: Id -> [StgArg] -> FCode ReturnKind -cgIdApp fun_id args = do - dflags <- getDynFlags - fun_info <- getCgIdInfo fun_id - self_loop_info <- getSelfLoop - let fun_arg = StgVarArg fun_id - fun_name = idName fun_id - fun = idInfoToAmode fun_info - lf_info = cg_lf fun_info - n_args = length args - v_args = length $ filter (isVoidTy . stgArgType) args - node_points dflags = nodeMustPointToIt dflags lf_info - case getCallMethod dflags fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of - -- A value in WHNF, so we can just return it. - ReturnIt - | isVoidTy (idType fun_id) -> emitReturn [] - | otherwise -> emitReturn [fun] - -- ToDo: does ReturnIt guarantee tagged? - - EnterIt -> ASSERT( null args ) -- Discarding arguments - emitEnter fun - - SlowCall -> do -- A slow function call via the RTS apply routines - { tickySlowCall lf_info args - ; emitComment $ mkFastString "slowCall" - ; slowCall fun args } - - -- A direct function call (possibly with some left-over arguments) - DirectEntry lbl arity -> do - { tickyDirectCall arity args - ; if node_points dflags - then directCall NativeNodeCall lbl arity (fun_arg:args) - else directCall NativeDirectCall lbl arity args } - - -- Let-no-escape call or self-recursive tail-call - JumpToIt blk_id lne_regs -> do - { adjustHpBackwards -- always do this before a tail-call - ; cmm_args <- getNonVoidArgAmodes args - ; emitMultiAssign lne_regs cmm_args - ; emit (mkBranch blk_id) - ; return AssignedDirectly } - --- Note [Self-recursive tail calls] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- --- Self-recursive tail calls can be optimized into a local jump in the same --- way as let-no-escape bindings (see Note [What is a non-escaping let] in --- stgSyn/CoreToStg.hs). Consider this: --- --- foo.info: --- a = R1 // calling convention --- b = R2 --- goto L1 --- L1: ... --- ... --- ... --- L2: R1 = x --- R2 = y --- call foo(R1,R2) --- --- Instead of putting x and y into registers (or other locations required by the --- calling convention) and performing a call we can put them into local --- variables a and b and perform jump to L1: --- --- foo.info: --- a = R1 --- b = R2 --- goto L1 --- L1: ... --- ... --- ... --- L2: a = x --- b = y --- goto L1 --- --- This can be done only when function is calling itself in a tail position --- and only if the call passes number of parameters equal to function's arity. --- Note that this cannot be performed if a function calls itself with a --- continuation. --- --- This in fact implements optimization known as "loopification". It was --- described in "Low-level code optimizations in the Glasgow Haskell Compiler" --- by Krzysztof WoÅ›, though we use different approach. Krzysztof performed his --- optimization at the Cmm level, whereas we perform ours during code generation --- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is --- generated in the first place. --- --- Implementation is spread across a couple of places in the code: --- --- * FCode monad stores additional information in its reader environment --- (cgd_self_loop field). This information tells us which function can --- tail call itself in an optimized way (it is the function currently --- being compiled), what is the label of a loop header (L1 in example above) --- and information about local registers in which we should arguments --- before making a call (this would be a and b in example above). --- --- * Whenever we are compiling a function, we set that information to reflect --- the fact that function currently being compiled can be jumped to, instead --- of called. This is done in closureCodyBody in StgCmmBind. --- --- * We also have to emit a label to which we will be jumping. We make sure --- that the label is placed after a stack check but before the heap --- check. The reason is that making a recursive tail-call does not increase --- the stack so we only need to check once. But it may grow the heap, so we --- have to repeat the heap check in every self-call. This is done in --- do_checks in StgCmmHeap. --- --- * When we begin compilation of another closure we remove the additional --- information from the environment. This is done by forkClosureBody --- in StgCmmMonad. Other functions that duplicate the environment - --- forkLneBody, forkAlts, codeOnly - duplicate that information. In other --- words, we only need to clean the environment of the self-loop information --- when compiling right hand side of a closure (binding). --- --- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind --- of call will be generated. getCallMethod decides to generate a self --- recursive tail call when (a) environment stores information about --- possible self tail-call; (b) that tail call is to a function currently --- being compiled; (c) number of passed non-void arguments is equal to --- function's arity. (d) loopification is turned on via -floopification --- command-line option. --- --- * Command line option to turn loopification on and off is implemented in --- DynFlags. --- --- --- Note [Void arguments in self-recursive tail calls] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- --- State# tokens can get in the way of the loopification optimization as seen in --- #11372. Consider this: --- --- foo :: [a] --- -> (a -> State# s -> (# State s, Bool #)) --- -> State# s --- -> (# State# s, Maybe a #) --- foo [] f s = (# s, Nothing #) --- foo (x:xs) f s = case f x s of --- (# s', b #) -> case b of --- True -> (# s', Just x #) --- False -> foo xs f s' --- --- We would like to compile the call to foo as a local jump instead of a call --- (see Note [Self-recursive tail calls]). However, the generated function has --- an arity of 2 while we apply it to 3 arguments, one of them being of void --- type. Thus, we mustn't count arguments of void type when checking whether --- we can turn a call into a self-recursive jump. --- - -emitEnter :: CmmExpr -> FCode ReturnKind -emitEnter fun = do - { dflags <- getDynFlags - ; adjustHpBackwards - ; sequel <- getSequel - ; updfr_off <- getUpdFrameOff - ; case sequel of - -- For a return, we have the option of generating a tag-test or - -- not. If the value is tagged, we can return directly, which - -- is quicker than entering the value. This is a code - -- size/speed trade-off: when optimising for speed rather than - -- size we could generate the tag test. - -- - -- Right now, we do what the old codegen did, and omit the tag - -- test, just generating an enter. - Return -> do - { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg - ; emit $ mkJump dflags NativeNodeCall entry - [cmmUntag dflags fun] updfr_off - ; return AssignedDirectly - } - - -- The result will be scrutinised in the sequel. This is where - -- we generate a tag-test to avoid entering the closure if - -- possible. - -- - -- The generated code will be something like this: - -- - -- R1 = fun -- copyout - -- if (fun & 7 != 0) goto Lret else goto Lcall - -- Lcall: - -- call [fun] returns to Lret - -- Lret: - -- fun' = R1 -- copyin - -- ... - -- - -- Note in particular that the label Lret is used as a - -- destination by both the tag-test and the call. This is - -- because Lret will necessarily be a proc-point, and we want to - -- ensure that we generate only one proc-point for this - -- sequence. - -- - -- Furthermore, we tell the caller that we generated a native - -- return continuation by returning (ReturnedTo Lret off), so - -- that the continuation can be reused by the heap-check failure - -- code in the enclosing case expression. - -- - AssignTo res_regs _ -> do - { lret <- newBlockId - ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs [] - ; lcall <- newBlockId - ; updfr_off <- getUpdFrameOff - ; let area = Young lret - ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area - [fun] updfr_off [] - -- refer to fun via nodeReg after the copyout, to avoid having - -- both live simultaneously; this sometimes enables fun to be - -- inlined in the RHS of the R1 assignment. - ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg)) - the_call = toCall entry (Just lret) updfr_off off outArgs regs - ; tscope <- getTickScope - ; emit $ - copyout <*> - mkCbranch (cmmIsTagged dflags (CmmReg nodeReg)) - lret lcall Nothing <*> - outOfLine lcall (the_call,tscope) <*> - mkLabel lret tscope <*> - copyin - ; return (ReturnedTo lret off) - } - } - ------------------------------------------------------------------------- --- Ticks ------------------------------------------------------------------------- - --- | Generate Cmm code for a tick. Depending on the type of Tickish, --- this will either generate actual Cmm instrumentation code, or --- simply pass on the annotation as a @CmmTickish@. -cgTick :: Tickish Id -> FCode () -cgTick tick - = do { dflags <- getDynFlags - ; case tick of - ProfNote cc t p -> emitSetCCC cc t p - HpcTick m n -> emit (mkTickBox dflags m n) - SourceNote s n -> emitTick $ SourceNote s n - _other -> return () -- ignore - } diff --git a/compiler/codeGen/StgCmmExtCode.hs b/compiler/codeGen/StgCmmExtCode.hs deleted file mode 100644 index 1d35c3454e..0000000000 --- a/compiler/codeGen/StgCmmExtCode.hs +++ /dev/null @@ -1,252 +0,0 @@ -{-# LANGUAGE DeriveFunctor #-} --- | Our extended FCode monad. - --- We add a mapping from names to CmmExpr, to support local variable names in --- the concrete C-- code. The unique supply of the underlying FCode monad --- is used to grab a new unique for each local variable. - --- In C--, a local variable can be declared anywhere within a proc, --- and it scopes from the beginning of the proc to the end. Hence, we have --- to collect declarations as we parse the proc, and feed the environment --- back in circularly (to avoid a two-pass algorithm). - -module StgCmmExtCode ( - CmmParse, unEC, - Named(..), Env, - - loopDecls, - getEnv, - - withName, - getName, - - newLocal, - newLabel, - newBlockId, - newFunctionName, - newImport, - lookupLabel, - lookupName, - - code, - emit, emitLabel, emitAssign, emitStore, - getCode, getCodeR, getCodeScoped, - emitOutOfLine, - withUpdFrameOff, getUpdFrameOff -) - -where - -import GhcPrelude - -import qualified StgCmmMonad as F -import StgCmmMonad (FCode, newUnique) - -import Cmm -import CLabel -import MkGraph - -import BlockId -import DynFlags -import FastString -import Module -import UniqFM -import Unique -import UniqSupply - -import Control.Monad (ap) - --- | The environment contains variable definitions or blockids. -data Named - = VarN CmmExpr -- ^ Holds CmmLit(CmmLabel ..) which gives the label type, - -- eg, RtsLabel, ForeignLabel, CmmLabel etc. - - | FunN UnitId -- ^ A function name from this package - | LabelN BlockId -- ^ A blockid of some code or data. - --- | An environment of named things. -type Env = UniqFM Named - --- | Local declarations that are in scope during code generation. -type Decls = [(FastString,Named)] - --- | Does a computation in the FCode monad, with a current environment --- and a list of local declarations. Returns the resulting list of declarations. -newtype CmmParse a - = EC { unEC :: String -> Env -> Decls -> FCode (Decls, a) } - deriving (Functor) - -type ExtCode = CmmParse () - -returnExtFC :: a -> CmmParse a -returnExtFC a = EC $ \_ _ s -> return (s, a) - -thenExtFC :: CmmParse a -> (a -> CmmParse b) -> CmmParse b -thenExtFC (EC m) k = EC $ \c e s -> do (s',r) <- m c e s; unEC (k r) c e s' - -instance Applicative CmmParse where - pure = returnExtFC - (<*>) = ap - -instance Monad CmmParse where - (>>=) = thenExtFC - -instance MonadUnique CmmParse where - getUniqueSupplyM = code getUniqueSupplyM - getUniqueM = EC $ \_ _ decls -> do - u <- getUniqueM - return (decls, u) - -instance HasDynFlags CmmParse where - getDynFlags = EC (\_ _ d -> do dflags <- getDynFlags - return (d, dflags)) - - --- | Takes the variable decarations and imports from the monad --- and makes an environment, which is looped back into the computation. --- In this way, we can have embedded declarations that scope over the whole --- procedure, and imports that scope over the entire module. --- Discards the local declaration contained within decl' --- -loopDecls :: CmmParse a -> CmmParse a -loopDecls (EC fcode) = - EC $ \c e globalDecls -> do - (_, a) <- F.fixC $ \ ~(decls, _) -> - fcode c (addListToUFM e decls) globalDecls - return (globalDecls, a) - - --- | Get the current environment from the monad. -getEnv :: CmmParse Env -getEnv = EC $ \_ e s -> return (s, e) - --- | Get the current context name from the monad -getName :: CmmParse String -getName = EC $ \c _ s -> return (s, c) - --- | Set context name for a sub-parse -withName :: String -> CmmParse a -> CmmParse a -withName c' (EC fcode) = EC $ \_ e s -> fcode c' e s - -addDecl :: FastString -> Named -> ExtCode -addDecl name named = EC $ \_ _ s -> return ((name, named) : s, ()) - - --- | Add a new variable to the list of local declarations. --- The CmmExpr says where the value is stored. -addVarDecl :: FastString -> CmmExpr -> ExtCode -addVarDecl var expr = addDecl var (VarN expr) - --- | Add a new label to the list of local declarations. -addLabel :: FastString -> BlockId -> ExtCode -addLabel name block_id = addDecl name (LabelN block_id) - - --- | Create a fresh local variable of a given type. -newLocal - :: CmmType -- ^ data type - -> FastString -- ^ name of variable - -> CmmParse LocalReg -- ^ register holding the value - -newLocal ty name = do - u <- code newUnique - let reg = LocalReg u ty - addVarDecl name (CmmReg (CmmLocal reg)) - return reg - - --- | Allocate a fresh label. -newLabel :: FastString -> CmmParse BlockId -newLabel name = do - u <- code newUnique - addLabel name (mkBlockId u) - return (mkBlockId u) - --- | Add add a local function to the environment. -newFunctionName - :: FastString -- ^ name of the function - -> UnitId -- ^ package of the current module - -> ExtCode - -newFunctionName name pkg = addDecl name (FunN pkg) - - --- | Add an imported foreign label to the list of local declarations. --- If this is done at the start of the module the declaration will scope --- over the whole module. -newImport - :: (FastString, CLabel) - -> CmmParse () - -newImport (name, cmmLabel) - = addVarDecl name (CmmLit (CmmLabel cmmLabel)) - - --- | Lookup the BlockId bound to the label with this name. --- If one hasn't been bound yet, create a fresh one based on the --- Unique of the name. -lookupLabel :: FastString -> CmmParse BlockId -lookupLabel name = do - env <- getEnv - return $ - case lookupUFM env name of - Just (LabelN l) -> l - _other -> mkBlockId (newTagUnique (getUnique name) 'L') - - --- | Lookup the location of a named variable. --- Unknown names are treated as if they had been 'import'ed from the runtime system. --- This saves us a lot of bother in the RTS sources, at the expense of --- deferring some errors to link time. -lookupName :: FastString -> CmmParse CmmExpr -lookupName name = do - env <- getEnv - return $ - case lookupUFM env name of - Just (VarN e) -> e - Just (FunN pkg) -> CmmLit (CmmLabel (mkCmmCodeLabel pkg name)) - _other -> CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId name)) - - --- | Lift an FCode computation into the CmmParse monad -code :: FCode a -> CmmParse a -code fc = EC $ \_ _ s -> do - r <- fc - return (s, r) - -emit :: CmmAGraph -> CmmParse () -emit = code . F.emit - -emitLabel :: BlockId -> CmmParse () -emitLabel = code . F.emitLabel - -emitAssign :: CmmReg -> CmmExpr -> CmmParse () -emitAssign l r = code (F.emitAssign l r) - -emitStore :: CmmExpr -> CmmExpr -> CmmParse () -emitStore l r = code (F.emitStore l r) - -getCode :: CmmParse a -> CmmParse CmmAGraph -getCode (EC ec) = EC $ \c e s -> do - ((s',_), gr) <- F.getCodeR (ec c e s) - return (s', gr) - -getCodeR :: CmmParse a -> CmmParse (a, CmmAGraph) -getCodeR (EC ec) = EC $ \c e s -> do - ((s', r), gr) <- F.getCodeR (ec c e s) - return (s', (r,gr)) - -getCodeScoped :: CmmParse a -> CmmParse (a, CmmAGraphScoped) -getCodeScoped (EC ec) = EC $ \c e s -> do - ((s', r), gr) <- F.getCodeScoped (ec c e s) - return (s', (r,gr)) - -emitOutOfLine :: BlockId -> CmmAGraphScoped -> CmmParse () -emitOutOfLine l g = code (F.emitOutOfLine l g) - -withUpdFrameOff :: UpdFrameOffset -> CmmParse () -> CmmParse () -withUpdFrameOff size inner - = EC $ \c e s -> F.withUpdFrameOff size $ (unEC inner) c e s - -getUpdFrameOff :: CmmParse UpdFrameOffset -getUpdFrameOff = code $ F.getUpdFrameOff diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs deleted file mode 100644 index 172dcba219..0000000000 --- a/compiler/codeGen/StgCmmForeign.hs +++ /dev/null @@ -1,627 +0,0 @@ ------------------------------------------------------------------------------ --- --- Code generation for foreign calls. --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ - -module StgCmmForeign ( - cgForeignCall, - emitPrimCall, emitCCall, - emitForeignCall, -- For CmmParse - emitSaveThreadState, - saveThreadState, - emitLoadThreadState, - loadThreadState, - emitOpenNursery, - emitCloseNursery, - ) where - -import GhcPrelude hiding( succ, (<*>) ) - -import StgSyn -import StgCmmProf (storeCurCCS, ccsType) -import StgCmmEnv -import StgCmmMonad -import StgCmmUtils -import StgCmmClosure -import StgCmmLayout - -import BlockId (newBlockId) -import Cmm -import CmmUtils -import MkGraph -import Type -import RepType -import CLabel -import SMRep -import ForeignCall -import DynFlags -import Maybes -import Outputable -import UniqSupply -import BasicTypes - -import TyCoRep -import TysPrim -import Util (zipEqual) - -import Control.Monad - ------------------------------------------------------------------------------ --- Code generation for Foreign Calls ------------------------------------------------------------------------------ - --- | Emit code for a foreign call, and return the results to the sequel. --- Precondition: the length of the arguments list is the same as the --- arity of the foreign function. -cgForeignCall :: ForeignCall -- the op - -> Type -- type of foreign function - -> [StgArg] -- x,y arguments - -> Type -- result type - -> FCode ReturnKind - -cgForeignCall (CCall (CCallSpec target cconv safety)) typ stg_args res_ty - = do { dflags <- getDynFlags - ; let -- in the stdcall calling convention, the symbol needs @size appended - -- to it, where size is the total number of bytes of arguments. We - -- attach this info to the CLabel here, and the CLabel pretty printer - -- will generate the suffix when the label is printed. - call_size args - | StdCallConv <- cconv = Just (sum (map arg_size args)) - | otherwise = Nothing - - -- ToDo: this might not be correct for 64-bit API - arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType dflags arg) - (wORD_SIZE dflags) - ; cmm_args <- getFCallArgs stg_args typ - ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty - ; let ((call_args, arg_hints), cmm_target) - = case target of - StaticTarget _ _ _ False -> - panic "cgForeignCall: unexpected FFI value import" - StaticTarget _ lbl mPkgId True - -> let labelSource - = case mPkgId of - Nothing -> ForeignLabelInThisPackage - Just pkgId -> ForeignLabelInPackage pkgId - size = call_size cmm_args - in ( unzip cmm_args - , CmmLit (CmmLabel - (mkForeignLabel lbl size labelSource IsFunction))) - - DynamicTarget -> case cmm_args of - (fn,_):rest -> (unzip rest, fn) - [] -> panic "cgForeignCall []" - fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn - call_target = ForeignTarget cmm_target fc - - -- we want to emit code for the call, and then emitReturn. - -- However, if the sequel is AssignTo, we shortcut a little - -- and generate a foreign call that assigns the results - -- directly. Otherwise we end up generating a bunch of - -- useless "r = r" assignments, which are not merely annoying: - -- they prevent the common block elimination from working correctly - -- in the case of a safe foreign call. - -- See Note [safe foreign call convention] - -- - ; sequel <- getSequel - ; case sequel of - AssignTo assign_to_these _ -> - emitForeignCall safety assign_to_these call_target call_args - - _something_else -> - do { _ <- emitForeignCall safety res_regs call_target call_args - ; emitReturn (map (CmmReg . CmmLocal) res_regs) - } - } - -{- Note [safe foreign call convention] - -The simple thing to do for a safe foreign call would be the same as an -unsafe one: just - - emitForeignCall ... - emitReturn ... - -but consider what happens in this case - - case foo x y z of - (# s, r #) -> ... - -The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r] -as the result reg, and we generate - - r = foo(x,y,z) returns to L1 -- emitForeignCall - L1: - r = r -- emitReturn - goto L2 -L2: - ... - -Now L1 is a proc point (by definition, it is the continuation of the -safe foreign call). If L2 does a heap check, then L2 will also be a -proc point. - -Furthermore, the stack layout algorithm has to arrange to save r -somewhere between the call and the jump to L1, which is annoying: we -would have to treat r differently from the other live variables, which -have to be saved *before* the call. - -So we adopt a special convention for safe foreign calls: the results -are copied out according to the NativeReturn convention by the call, -and the continuation of the call should copyIn the results. (The -copyOut code is actually inserted when the safe foreign call is -lowered later). The result regs attached to the safe foreign call are -only used temporarily to hold the results before they are copied out. - -We will now generate this: - - r = foo(x,y,z) returns to L1 - L1: - r = R1 -- copyIn, inserted by mkSafeCall - goto L2 - L2: - ... r ... - -And when the safe foreign call is lowered later (see Note [lower safe -foreign calls]) we get this: - - suspendThread() - r = foo(x,y,z) - resumeThread() - R1 = r -- copyOut, inserted by lowerSafeForeignCall - jump L1 - L1: - r = R1 -- copyIn, inserted by mkSafeCall - goto L2 - L2: - ... r ... - -Now consider what happens if L2 does a heap check: the Adams -optimisation kicks in and commons up L1 with the heap-check -continuation, resulting in just one proc point instead of two. Yay! --} - - -emitCCall :: [(CmmFormal,ForeignHint)] - -> CmmExpr - -> [(CmmActual,ForeignHint)] - -> FCode () -emitCCall hinted_results fn hinted_args - = void $ emitForeignCall PlayRisky results target args - where - (args, arg_hints) = unzip hinted_args - (results, result_hints) = unzip hinted_results - target = ForeignTarget fn fc - fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn - - -emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode () -emitPrimCall res op args - = void $ emitForeignCall PlayRisky res (PrimTarget op) args - --- alternative entry point, used by CmmParse -emitForeignCall - :: Safety - -> [CmmFormal] -- where to put the results - -> ForeignTarget -- the op - -> [CmmActual] -- arguments - -> FCode ReturnKind -emitForeignCall safety results target args - | not (playSafe safety) = do - dflags <- getDynFlags - let (caller_save, caller_load) = callerSaveVolatileRegs dflags - emit caller_save - target' <- load_target_into_temp target - args' <- mapM maybe_assign_temp args - emit $ mkUnsafeCall target' results args' - emit caller_load - return AssignedDirectly - - | otherwise = do - dflags <- getDynFlags - updfr_off <- getUpdFrameOff - target' <- load_target_into_temp target - args' <- mapM maybe_assign_temp args - k <- newBlockId - let (off, _, copyout) = copyInOflow dflags NativeReturn (Young k) results [] - -- see Note [safe foreign call convention] - tscope <- getTickScope - emit $ - ( mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth dflags))) - (CmmLit (CmmBlock k)) - <*> mkLast (CmmForeignCall { tgt = target' - , res = results - , args = args' - , succ = k - , ret_args = off - , ret_off = updfr_off - , intrbl = playInterruptible safety }) - <*> mkLabel k tscope - <*> copyout - ) - return (ReturnedTo k off) - -load_target_into_temp :: ForeignTarget -> FCode ForeignTarget -load_target_into_temp (ForeignTarget expr conv) = do - tmp <- maybe_assign_temp expr - return (ForeignTarget tmp conv) -load_target_into_temp other_target@(PrimTarget _) = - return other_target - --- What we want to do here is create a new temporary for the foreign --- call argument if it is not safe to use the expression directly, --- because the expression mentions caller-saves GlobalRegs (see --- Note [Register Parameter Passing]). --- --- However, we can't pattern-match on the expression here, because --- this is used in a loop by CmmParse, and testing the expression --- results in a black hole. So we always create a temporary, and rely --- on CmmSink to clean it up later. (Yuck, ToDo). The generated code --- ends up being the same, at least for the RTS .cmm code. --- -maybe_assign_temp :: CmmExpr -> FCode CmmExpr -maybe_assign_temp e = do - dflags <- getDynFlags - reg <- newTemp (cmmExprType dflags e) - emitAssign (CmmLocal reg) e - return (CmmReg (CmmLocal reg)) - --- ----------------------------------------------------------------------------- --- Save/restore the thread state in the TSO - --- This stuff can't be done in suspendThread/resumeThread, because it --- refers to global registers which aren't available in the C world. - -emitSaveThreadState :: FCode () -emitSaveThreadState = do - dflags <- getDynFlags - code <- saveThreadState dflags - emit code - --- | Produce code to save the current thread state to @CurrentTSO@ -saveThreadState :: MonadUnique m => DynFlags -> m CmmAGraph -saveThreadState dflags = do - tso <- newTemp (gcWord dflags) - close_nursery <- closeNursery dflags tso - pure $ catAGraphs [ - -- tso = CurrentTSO; - mkAssign (CmmLocal tso) currentTSOExpr, - -- tso->stackobj->sp = Sp; - mkStore (cmmOffset dflags - (CmmLoad (cmmOffset dflags - (CmmReg (CmmLocal tso)) - (tso_stackobj dflags)) - (bWord dflags)) - (stack_SP dflags)) - spExpr, - close_nursery, - -- and save the current cost centre stack in the TSO when profiling: - if gopt Opt_SccProfilingOn dflags then - mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) cccsExpr - else mkNop - ] - -emitCloseNursery :: FCode () -emitCloseNursery = do - dflags <- getDynFlags - tso <- newTemp (bWord dflags) - code <- closeNursery dflags tso - emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code - -{- | -@closeNursery dflags tso@ produces code to close the nursery. -A local register holding the value of @CurrentTSO@ is expected for -efficiency. - -Closing the nursery corresponds to the following code: - -@ - tso = CurrentTSO; - cn = CurrentNuresry; - - // Update the allocation limit for the current thread. We don't - // check to see whether it has overflowed at this point, that check is - // made when we run out of space in the current heap block (stg_gc_noregs) - // and in the scheduler when context switching (schedulePostRunThread). - tso->alloc_limit -= Hp + WDS(1) - cn->start; - - // Set cn->free to the next unoccupied word in the block - cn->free = Hp + WDS(1); -@ --} -closeNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph -closeNursery df tso = do - let tsoreg = CmmLocal tso - cnreg <- CmmLocal <$> newTemp (bWord df) - pure $ catAGraphs [ - mkAssign cnreg currentNurseryExpr, - - -- CurrentNursery->free = Hp+1; - mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df hpExpr 1), - - let alloc = - CmmMachOp (mo_wordSub df) - [ cmmOffsetW df hpExpr 1 - , CmmLoad (nursery_bdescr_start df cnreg) (bWord df) - ] - - alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df) - in - - -- tso->alloc_limit += alloc - mkStore alloc_limit (CmmMachOp (MO_Sub W64) - [ CmmLoad alloc_limit b64 - , CmmMachOp (mo_WordTo64 df) [alloc] ]) - ] - -emitLoadThreadState :: FCode () -emitLoadThreadState = do - dflags <- getDynFlags - code <- loadThreadState dflags - emit code - --- | Produce code to load the current thread state from @CurrentTSO@ -loadThreadState :: MonadUnique m => DynFlags -> m CmmAGraph -loadThreadState dflags = do - tso <- newTemp (gcWord dflags) - stack <- newTemp (gcWord dflags) - open_nursery <- openNursery dflags tso - pure $ catAGraphs [ - -- tso = CurrentTSO; - mkAssign (CmmLocal tso) currentTSOExpr, - -- stack = tso->stackobj; - mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)), - -- Sp = stack->sp; - mkAssign spReg (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)), - -- SpLim = stack->stack + RESERVED_STACK_WORDS; - mkAssign spLimReg (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags)) - (rESERVED_STACK_WORDS dflags)), - -- HpAlloc = 0; - -- HpAlloc is assumed to be set to non-zero only by a failed - -- a heap check, see HeapStackCheck.cmm:GC_GENERIC - mkAssign hpAllocReg (zeroExpr dflags), - open_nursery, - -- and load the current cost centre stack from the TSO when profiling: - if gopt Opt_SccProfilingOn dflags - then storeCurCCS - (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) - (tso_CCCS dflags)) (ccsType dflags)) - else mkNop - ] - - -emitOpenNursery :: FCode () -emitOpenNursery = do - dflags <- getDynFlags - tso <- newTemp (bWord dflags) - code <- openNursery dflags tso - emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code - -{- | -@openNursery dflags tso@ produces code to open the nursery. A local register -holding the value of @CurrentTSO@ is expected for efficiency. - -Opening the nursery corresponds to the following code: - -@ - tso = CurrentTSO; - cn = CurrentNursery; - bdfree = CurrentNursery->free; - bdstart = CurrentNursery->start; - - // We *add* the currently occupied portion of the nursery block to - // the allocation limit, because we will subtract it again in - // closeNursery. - tso->alloc_limit += bdfree - bdstart; - - // Set Hp to the last occupied word of the heap block. Why not the - // next unocupied word? Doing it this way means that we get to use - // an offset of zero more often, which might lead to slightly smaller - // code on some architectures. - Hp = bdfree - WDS(1); - - // Set HpLim to the end of the current nursery block (note that this block - // might be a block group, consisting of several adjacent blocks. - HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1; -@ --} -openNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph -openNursery df tso = do - let tsoreg = CmmLocal tso - cnreg <- CmmLocal <$> newTemp (bWord df) - bdfreereg <- CmmLocal <$> newTemp (bWord df) - bdstartreg <- CmmLocal <$> newTemp (bWord df) - - -- These assignments are carefully ordered to reduce register - -- pressure and generate not completely awful code on x86. To see - -- what code we generate, look at the assembly for - -- stg_returnToStackTop in rts/StgStartup.cmm. - pure $ catAGraphs [ - mkAssign cnreg currentNurseryExpr, - mkAssign bdfreereg (CmmLoad (nursery_bdescr_free df cnreg) (bWord df)), - - -- Hp = CurrentNursery->free - 1; - mkAssign hpReg (cmmOffsetW df (CmmReg bdfreereg) (-1)), - - mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)), - - -- HpLim = CurrentNursery->start + - -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; - mkAssign hpLimReg - (cmmOffsetExpr df - (CmmReg bdstartreg) - (cmmOffset df - (CmmMachOp (mo_wordMul df) [ - CmmMachOp (MO_SS_Conv W32 (wordWidth df)) - [CmmLoad (nursery_bdescr_blocks df cnreg) b32], - mkIntExpr df (bLOCK_SIZE df) - ]) - (-1) - ) - ), - - -- alloc = bd->free - bd->start - let alloc = - CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg] - - alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df) - in - - -- tso->alloc_limit += alloc - mkStore alloc_limit (CmmMachOp (MO_Add W64) - [ CmmLoad alloc_limit b64 - , CmmMachOp (mo_WordTo64 df) [alloc] ]) - - ] - -nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks - :: DynFlags -> CmmReg -> CmmExpr -nursery_bdescr_free dflags cn = - cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags) -nursery_bdescr_start dflags cn = - cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags) -nursery_bdescr_blocks dflags cn = - cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags) - -tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff -tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags) -tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags) -tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags) -stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags) -stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags) - - -closureField :: DynFlags -> ByteOff -> ByteOff -closureField dflags off = off + fixedHdrSize dflags - --- Note [Unlifted boxed arguments to foreign calls] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- --- For certain types passed to foreign calls, we adjust the actual --- value passed to the call. For ByteArray#, Array#, SmallArray#, --- and ArrayArray#, we pass the address of the array's payload, not --- the address of the heap object. For example, consider --- foreign import "c_foo" foo :: ByteArray# -> Int# -> IO () --- At a Haskell call like `foo x y`, we'll generate a C call that --- is more like --- c_foo( x+8, y ) --- where the "+8" takes the heap pointer (x :: ByteArray#) and moves --- it past the header words of the ByteArray object to point directly --- to the data inside the ByteArray#. (The exact offset depends --- on the target architecture and on profiling) By contrast, (y :: Int#) --- requires no such adjustment. --- --- This adjustment is performed by 'add_shim'. The size of the --- adjustment depends on the type of heap object. But --- how can we determine that type? There are two available options. --- We could use the types of the actual values that the foreign call --- has been applied to, or we could use the types present in the --- foreign function's type. Prior to GHC 8.10, we used the former --- strategy since it's a little more simple. However, in issue #16650 --- and more compellingly in the comments of --- https://gitlab.haskell.org/ghc/ghc/merge_requests/939, it was --- demonstrated that this leads to bad behavior in the presence --- of unsafeCoerce#. Returning to the above example, suppose the --- Haskell call looked like --- foo (unsafeCoerce# p) --- where the types of expressions comprising the arguments are --- p :: (Any :: TYPE 'UnliftedRep) --- i :: Int# --- so that the unsafe-coerce is between Any and ByteArray#. --- These two types have the same kind (they are both represented by --- a heap pointer) so no GC errors will occur if we do this unsafe coerce. --- By the time this gets to the code generator the cast has been --- discarded so we have --- foo p y --- But we *must* adjust the pointer to p by a ByteArray# shim, --- *not* by an Any shim (the Any shim involves no offset at all). --- --- To avoid this bad behavior, we adopt the second strategy: use --- the types present in the foreign function's type. --- In collectStgFArgTypes, we convert the foreign function's --- type to a list of StgFArgType. Then, in add_shim, we interpret --- these as numeric offsets. - -getFCallArgs :: - [StgArg] - -> Type -- the type of the foreign function - -> FCode [(CmmExpr, ForeignHint)] --- (a) Drop void args --- (b) Add foreign-call shim code --- It's (b) that makes this differ from getNonVoidArgAmodes --- Precondition: args and typs have the same length --- See Note [Unlifted boxed arguments to foreign calls] -getFCallArgs args typ - = do { mb_cmms <- mapM get (zipEqual "getFCallArgs" args (collectStgFArgTypes typ)) - ; return (catMaybes mb_cmms) } - where - get (arg,typ) - | null arg_reps - = return Nothing - | otherwise - = do { cmm <- getArgAmode (NonVoid arg) - ; dflags <- getDynFlags - ; return (Just (add_shim dflags typ cmm, hint)) } - where - arg_ty = stgArgType arg - arg_reps = typePrimRep arg_ty - hint = typeForeignHint arg_ty - --- The minimum amount of information needed to determine --- the offset to apply to an argument to a foreign call. --- See Note [Unlifted boxed arguments to foreign calls] -data StgFArgType - = StgPlainType - | StgArrayType - | StgSmallArrayType - | StgByteArrayType - --- See Note [Unlifted boxed arguments to foreign calls] -add_shim :: DynFlags -> StgFArgType -> CmmExpr -> CmmExpr -add_shim dflags ty expr = case ty of - StgPlainType -> expr - StgArrayType -> cmmOffsetB dflags expr (arrPtrsHdrSize dflags) - StgSmallArrayType -> cmmOffsetB dflags expr (smallArrPtrsHdrSize dflags) - StgByteArrayType -> cmmOffsetB dflags expr (arrWordsHdrSize dflags) - --- From a function, extract information needed to determine --- the offset of each argument when used as a C FFI argument. --- See Note [Unlifted boxed arguments to foreign calls] -collectStgFArgTypes :: Type -> [StgFArgType] -collectStgFArgTypes = go [] - where - -- Skip foralls - go bs (ForAllTy _ res) = go bs res - go bs (AppTy{}) = reverse bs - go bs (TyConApp{}) = reverse bs - go bs (LitTy{}) = reverse bs - go bs (TyVarTy{}) = reverse bs - go _ (CastTy{}) = panic "myCollectTypeArgs: CastTy" - go _ (CoercionTy{}) = panic "myCollectTypeArgs: CoercionTy" - go bs (FunTy {ft_arg = arg, ft_res=res}) = - go (typeToStgFArgType arg:bs) res - --- Choose the offset based on the type. For anything other --- than an unlifted boxed type, there is no offset. --- See Note [Unlifted boxed arguments to foreign calls] -typeToStgFArgType :: Type -> StgFArgType -typeToStgFArgType typ - | tycon == arrayPrimTyCon = StgArrayType - | tycon == mutableArrayPrimTyCon = StgArrayType - | tycon == arrayArrayPrimTyCon = StgArrayType - | tycon == mutableArrayArrayPrimTyCon = StgArrayType - | tycon == smallArrayPrimTyCon = StgSmallArrayType - | tycon == smallMutableArrayPrimTyCon = StgSmallArrayType - | tycon == byteArrayPrimTyCon = StgByteArrayType - | tycon == mutableByteArrayPrimTyCon = StgByteArrayType - | otherwise = StgPlainType - where - -- Should be a tycon app, since this is a foreign call. We look - -- through newtypes so the offset does not change if a user replaces - -- a type in a foreign function signature with a representationally - -- equivalent newtype. - tycon = tyConAppTyCon (unwrapType typ) - diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs deleted file mode 100644 index da9e85f1e7..0000000000 --- a/compiler/codeGen/StgCmmHeap.hs +++ /dev/null @@ -1,680 +0,0 @@ ------------------------------------------------------------------------------ --- --- Stg to C--: heap management functions --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ - -module StgCmmHeap ( - getVirtHp, setVirtHp, setRealHp, - getHpRelOffset, - - entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo, - heapStackCheckGen, - entryHeapCheck', - - mkStaticClosureFields, mkStaticClosure, - - allocDynClosure, allocDynClosureCmm, allocHeapClosure, - emitSetDynHdr - ) where - -import GhcPrelude hiding ((<*>)) - -import StgSyn -import CLabel -import StgCmmLayout -import StgCmmUtils -import StgCmmMonad -import StgCmmProf (profDynAlloc, dynProfHdr, staticProfHdr) -import StgCmmTicky -import StgCmmClosure -import StgCmmEnv - -import MkGraph - -import Hoopl.Label -import SMRep -import BlockId -import Cmm -import CmmUtils -import CostCentre -import IdInfo( CafInfo(..), mayHaveCafRefs ) -import Id ( Id ) -import Module -import DynFlags -import FastString( mkFastString, fsLit ) -import Panic( sorry ) - -import Control.Monad (when) -import Data.Maybe (isJust) - ------------------------------------------------------------ --- Initialise dynamic heap objects ------------------------------------------------------------ - -allocDynClosure - :: Maybe Id - -> CmmInfoTable - -> LambdaFormInfo - -> CmmExpr -- Cost Centre to stick in the object - -> CmmExpr -- Cost Centre to blame for this alloc - -- (usually the same; sometimes "OVERHEAD") - - -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object - -- ie Info ptr has offset zero. - -- No void args in here - -> FCode CmmExpr -- returns Hp+n - -allocDynClosureCmm - :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr - -> [(CmmExpr, ByteOff)] - -> FCode CmmExpr -- returns Hp+n - --- allocDynClosure allocates the thing in the heap, --- and modifies the virtual Hp to account for this. --- The second return value is the graph that sets the value of the --- returned LocalReg, which should point to the closure after executing --- the graph. - --- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is --- only valid until Hp is changed. The caller should assign the --- result to a LocalReg if it is required to remain live. --- --- The reason we don't assign it to a LocalReg here is that the caller --- is often about to call regIdInfo, which immediately assigns the --- result of allocDynClosure to a new temp in order to add the tag. --- So by not generating a LocalReg here we avoid a common source of --- new temporaries and save some compile time. This can be quite --- significant - see test T4801. - - -allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do - let (args, offsets) = unzip args_w_offsets - cmm_args <- mapM getArgAmode args -- No void args - allocDynClosureCmm mb_id info_tbl lf_info - use_cc _blame_cc (zip cmm_args offsets) - - -allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do - -- SAY WHAT WE ARE ABOUT TO DO - let rep = cit_rep info_tbl - tickyDynAlloc mb_id rep lf_info - let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl)) - allocHeapClosure rep info_ptr use_cc amodes_w_offsets - - --- | Low-level heap object allocation. -allocHeapClosure - :: SMRep -- ^ representation of the object - -> CmmExpr -- ^ info pointer - -> CmmExpr -- ^ cost centre - -> [(CmmExpr,ByteOff)] -- ^ payload - -> FCode CmmExpr -- ^ returns the address of the object -allocHeapClosure rep info_ptr use_cc payload = do - profDynAlloc rep use_cc - - virt_hp <- getVirtHp - - -- Find the offset of the info-ptr word - let info_offset = virt_hp + 1 - -- info_offset is the VirtualHpOffset of the first - -- word of the new object - -- Remember, virtHp points to last allocated word, - -- ie 1 *before* the info-ptr word of new object. - - base <- getHpRelOffset info_offset - emitComment $ mkFastString "allocHeapClosure" - emitSetDynHdr base info_ptr use_cc - - -- Fill in the fields - hpStore base payload - - -- Bump the virtual heap pointer - dflags <- getDynFlags - setVirtHp (virt_hp + heapClosureSizeW dflags rep) - - return base - - -emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () -emitSetDynHdr base info_ptr ccs - = do dflags <- getDynFlags - hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..]) - where - header :: DynFlags -> [CmmExpr] - header dflags = [info_ptr] ++ dynProfHdr dflags ccs - -- ToDo: Parallel stuff - -- No ticky header - --- Store the item (expr,off) in base[off] -hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode () -hpStore base vals = do - dflags <- getDynFlags - sequence_ $ - [ emitStore (cmmOffsetB dflags base off) val | (val,off) <- vals ] - ------------------------------------------------------------ --- Layout of static closures ------------------------------------------------------------ - --- Make a static closure, adding on any extra padding needed for CAFs, --- and adding a static link field if necessary. - -mkStaticClosureFields - :: DynFlags - -> CmmInfoTable - -> CostCentreStack - -> CafInfo - -> [CmmLit] -- Payload - -> [CmmLit] -- The full closure -mkStaticClosureFields dflags info_tbl ccs caf_refs payload - = mkStaticClosure dflags info_lbl ccs payload padding - static_link_field saved_info_field - where - info_lbl = cit_lbl info_tbl - - -- CAFs must have consistent layout, regardless of whether they - -- are actually updatable or not. The layout of a CAF is: - -- - -- 3 saved_info - -- 2 static_link - -- 1 indirectee - -- 0 info ptr - -- - -- the static_link and saved_info fields must always be in the - -- same place. So we use isThunkRep rather than closureUpdReqd - -- here: - - is_caf = isThunkRep (cit_rep info_tbl) - - padding - | is_caf && null payload = [mkIntCLit dflags 0] - | otherwise = [] - - static_link_field - | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl - = [static_link_value] - | otherwise - = [] - - saved_info_field - | is_caf = [mkIntCLit dflags 0] - | otherwise = [] - - -- For a static constructor which has NoCafRefs, we set the - -- static link field to a non-zero value so the garbage - -- collector will ignore it. - static_link_value - | mayHaveCafRefs caf_refs = mkIntCLit dflags 0 - | otherwise = mkIntCLit dflags 3 -- No CAF refs - -- See Note [STATIC_LINK fields] - -- in rts/sm/Storage.h - -mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit] - -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] -mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field - = [CmmLabel info_lbl] - ++ staticProfHdr dflags ccs - ++ payload - ++ padding - ++ static_link_field - ++ saved_info_field - ------------------------------------------------------------ --- Heap overflow checking ------------------------------------------------------------ - -{- Note [Heap checks] - ~~~~~~~~~~~~~~~~~~ -Heap checks come in various forms. We provide the following entry -points to the runtime system, all of which use the native C-- entry -convention. - - * gc() performs garbage collection and returns - nothing to its caller - - * A series of canned entry points like - r = gc_1p( r ) - where r is a pointer. This performs gc, and - then returns its argument r to its caller. - - * A series of canned entry points like - gcfun_2p( f, x, y ) - where f is a function closure of arity 2 - This performs garbage collection, keeping alive the - three argument ptrs, and then tail-calls f(x,y) - -These are used in the following circumstances - -* entryHeapCheck: Function entry - (a) With a canned GC entry sequence - f( f_clo, x:ptr, y:ptr ) { - Hp = Hp+8 - if Hp > HpLim goto L - ... - L: HpAlloc = 8 - jump gcfun_2p( f_clo, x, y ) } - Note the tail call to the garbage collector; - it should do no register shuffling - - (b) No canned sequence - f( f_clo, x:ptr, y:ptr, ...etc... ) { - T: Hp = Hp+8 - if Hp > HpLim goto L - ... - L: HpAlloc = 8 - call gc() -- Needs an info table - goto T } - -* altHeapCheck: Immediately following an eval - Started as - case f x y of r { (p,q) -> rhs } - (a) With a canned sequence for the results of f - (which is the very common case since - all boxed cases return just one pointer - ... - r = f( x, y ) - K: -- K needs an info table - Hp = Hp+8 - if Hp > HpLim goto L - ...code for rhs... - - L: r = gc_1p( r ) - goto K } - - Here, the info table needed by the call - to gc_1p should be the *same* as the - one for the call to f; the C-- optimiser - spots this sharing opportunity) - - (b) No canned sequence for results of f - Note second info table - ... - (r1,r2,r3) = call f( x, y ) - K: - Hp = Hp+8 - if Hp > HpLim goto L - ...code for rhs... - - L: call gc() -- Extra info table here - goto K - -* generalHeapCheck: Anywhere else - e.g. entry to thunk - case branch *not* following eval, - or let-no-escape - Exactly the same as the previous case: - - K: -- K needs an info table - Hp = Hp+8 - if Hp > HpLim goto L - ... - - L: call gc() - goto K --} - --------------------------------------------------------------- --- A heap/stack check at a function or thunk entry point. - -entryHeapCheck :: ClosureInfo - -> Maybe LocalReg -- Function (closure environment) - -> Int -- Arity -- not same as len args b/c of voids - -> [LocalReg] -- Non-void args (empty for thunk) - -> FCode () - -> FCode () - -entryHeapCheck cl_info nodeSet arity args code - = entryHeapCheck' is_fastf node arity args code - where - node = case nodeSet of - Just r -> CmmReg (CmmLocal r) - Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info) - - is_fastf = case closureFunInfo cl_info of - Just (_, ArgGen _) -> False - _otherwise -> True - --- | lower-level version for CmmParse -entryHeapCheck' :: Bool -- is a known function pattern - -> CmmExpr -- expression for the closure pointer - -> Int -- Arity -- not same as len args b/c of voids - -> [LocalReg] -- Non-void args (empty for thunk) - -> FCode () - -> FCode () -entryHeapCheck' is_fastf node arity args code - = do dflags <- getDynFlags - let is_thunk = arity == 0 - - args' = map (CmmReg . CmmLocal) args - stg_gc_fun = CmmReg (CmmGlobal GCFun) - stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1) - - {- Thunks: jump stg_gc_enter_1 - - Function (fast): call (NativeNode) stg_gc_fun(fun, args) - - Function (slow): call (slow) stg_gc_fun(fun, args) - -} - gc_call upd - | is_thunk - = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd - - | is_fastf - = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd - - | otherwise - = mkJump dflags Slow stg_gc_fun (node : args') upd - - updfr_sz <- getUpdFrameOff - - loop_id <- newBlockId - emitLabel loop_id - heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code - --- ------------------------------------------------------------ --- A heap/stack check in a case alternative - - --- If there are multiple alts and we need to GC, but don't have a --- continuation already (the scrut was simple), then we should --- pre-generate the continuation. (if there are multiple alts it is --- always a canned GC point). - --- altHeapCheck: --- If we have a return continuation, --- then if it is a canned GC pattern, --- then we do mkJumpReturnsTo --- else we do a normal call to stg_gc_noregs --- else if it is a canned GC pattern, --- then generate the continuation and do mkCallReturnsTo --- else we do a normal call to stg_gc_noregs - -altHeapCheck :: [LocalReg] -> FCode a -> FCode a -altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code - -altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a -altOrNoEscapeHeapCheck checkYield regs code = do - dflags <- getDynFlags - case cannedGCEntryPoint dflags regs of - Nothing -> genericGC checkYield code - Just gc -> do - lret <- newBlockId - let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs [] - lcont <- newBlockId - tscope <- getTickScope - emitOutOfLine lret (copyin <*> mkBranch lcont, tscope) - emitLabel lcont - cannedGCReturnsTo checkYield False gc regs lret off code - -altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a -altHeapCheckReturnsTo regs lret off code - = do dflags <- getDynFlags - case cannedGCEntryPoint dflags regs of - Nothing -> genericGC False code - Just gc -> cannedGCReturnsTo False True gc regs lret off code - --- noEscapeHeapCheck is implemented identically to altHeapCheck (which --- is more efficient), but cannot be optimized away in the non-allocating --- case because it may occur in a loop -noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a -noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code - -cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff - -> FCode a - -> FCode a -cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code - = do dflags <- getDynFlags - updfr_sz <- getUpdFrameOff - heapCheck False checkYield (gc_call dflags gc updfr_sz) code - where - reg_exprs = map (CmmReg . CmmLocal) regs - -- Note [stg_gc arguments] - - -- NB. we use the NativeReturn convention for passing arguments - -- to the canned heap-check routines, because we are in a case - -- alternative and hence the [LocalReg] was passed to us in the - -- NativeReturn convention. - gc_call dflags label sp - | cont_on_stack - = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp - | otherwise - = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp [] - -genericGC :: Bool -> FCode a -> FCode a -genericGC checkYield code - = do updfr_sz <- getUpdFrameOff - lretry <- newBlockId - emitLabel lretry - call <- mkCall generic_gc (GC, GC) [] [] updfr_sz [] - heapCheck False checkYield (call <*> mkBranch lretry) code - -cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr -cannedGCEntryPoint dflags regs - = case map localRegType regs of - [] -> Just (mkGcLabel "stg_gc_noregs") - [ty] - | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1") - | isFloatType ty -> case width of - W32 -> Just (mkGcLabel "stg_gc_f1") - W64 -> Just (mkGcLabel "stg_gc_d1") - _ -> Nothing - - | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1") - | width == W64 -> Just (mkGcLabel "stg_gc_l1") - | otherwise -> Nothing - where - width = typeWidth ty - [ty1,ty2] - | isGcPtrType ty1 - && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp") - [ty1,ty2,ty3] - | isGcPtrType ty1 - && isGcPtrType ty2 - && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp") - [ty1,ty2,ty3,ty4] - | isGcPtrType ty1 - && isGcPtrType ty2 - && isGcPtrType ty3 - && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp") - _otherwise -> Nothing - --- Note [stg_gc arguments] --- It might seem that we could avoid passing the arguments to the --- stg_gc function, because they are already in the right registers. --- While this is usually the case, it isn't always. Sometimes the --- code generator has cleverly avoided the eval in a case, e.g. in --- ffi/should_run/4221.hs we found --- --- case a_r1mb of z --- FunPtr x y -> ... --- --- where a_r1mb is bound a top-level constructor, and is known to be --- evaluated. The codegen just assigns x, y and z, and continues; --- R1 is never assigned. --- --- So we'll have to rely on optimisations to eliminatethese --- assignments where possible. - - --- | The generic GC procedure; no params, no results -generic_gc :: CmmExpr -generic_gc = mkGcLabel "stg_gc_noregs" - --- | Create a CLabel for calling a garbage collector entry point -mkGcLabel :: String -> CmmExpr -mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit s))) - -------------------------------- -heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a -heapCheck checkStack checkYield do_gc code - = getHeapUsage $ \ hpHw -> - -- Emit heap checks, but be sure to do it lazily so - -- that the conditionals on hpHw don't cause a black hole - do { dflags <- getDynFlags - ; let mb_alloc_bytes - | hpHw > mBLOCK_SIZE = sorry $ unlines - [" Trying to allocate more than "++show mBLOCK_SIZE++" bytes.", - "", - "This is currently not possible due to a limitation of GHC's code generator.", - "See https://gitlab.haskell.org/ghc/ghc/issues/4505 for details.", - "Suggestion: read data from a file instead of having large static data", - "structures in code."] - | hpHw > 0 = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags))) - | otherwise = Nothing - where mBLOCK_SIZE = bLOCKS_PER_MBLOCK dflags * bLOCK_SIZE_W dflags - stk_hwm | checkStack = Just (CmmLit CmmHighStackMark) - | otherwise = Nothing - ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc - ; tickyAllocHeap True hpHw - ; setRealHp hpHw - ; code } - -heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode () -heapStackCheckGen stk_hwm mb_bytes - = do updfr_sz <- getUpdFrameOff - lretry <- newBlockId - emitLabel lretry - call <- mkCall generic_gc (GC, GC) [] [] updfr_sz [] - do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry) - --- Note [Single stack check] --- ~~~~~~~~~~~~~~~~~~~~~~~~~ --- When compiling a function we can determine how much stack space it --- will use. We therefore need to perform only a single stack check at --- the beginning of a function to see if we have enough stack space. --- --- The check boils down to comparing Sp-N with SpLim, where N is the --- amount of stack space needed (see Note [Stack usage] below). *BUT* --- at this stage of the pipeline we are not supposed to refer to Sp --- itself, because the stack is not yet manifest, so we don't quite --- know where Sp pointing. - --- So instead of referring directly to Sp - as we used to do in the --- past - the code generator uses (old + 0) in the stack check. That --- is the address of the first word of the old area, so if we add N --- we'll get the address of highest used word. --- --- This makes the check robust. For example, while we need to perform --- only one stack check for each function, we could in theory place --- more stack checks later in the function. They would be redundant, --- but not incorrect (in a sense that they should not change program --- behaviour). We need to make sure however that a stack check --- inserted after incrementing the stack pointer checks for a --- respectively smaller stack space. This would not be the case if the --- code generator produced direct references to Sp. By referencing --- (old + 0) we make sure that we always check for a correct amount of --- stack: when converting (old + 0) to Sp the stack layout phase takes --- into account changes already made to stack pointer. The idea for --- this change came from observations made while debugging #8275. - --- Note [Stack usage] --- ~~~~~~~~~~~~~~~~~~ --- At the moment we convert from STG to Cmm we don't know N, the --- number of bytes of stack that the function will use, so we use a --- special late-bound CmmLit, namely --- CmmHighStackMark --- to stand for the number of bytes needed. When the stack is made --- manifest, the number of bytes needed is calculated, and used to --- replace occurrences of CmmHighStackMark --- --- The (Maybe CmmExpr) passed to do_checks is usually --- Just (CmmLit CmmHighStackMark) --- but can also (in certain hand-written RTS functions) --- Just (CmmLit 8) or some other fixed valuet --- If it is Nothing, we don't generate a stack check at all. - -do_checks :: Maybe CmmExpr -- Should we check the stack? - -- See Note [Stack usage] - -> Bool -- Should we check for preemption? - -> Maybe CmmExpr -- Heap headroom (bytes) - -> CmmAGraph -- What to do on failure - -> FCode () -do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do - dflags <- getDynFlags - gc_id <- newBlockId - - let - Just alloc_lit = mb_alloc_lit - - bump_hp = cmmOffsetExprB dflags hpExpr alloc_lit - - -- Sp overflow if ((old + 0) - CmmHighStack < SpLim) - -- At the beginning of a function old + 0 = Sp - -- See Note [Single stack check] - sp_oflo sp_hwm = - CmmMachOp (mo_wordULt dflags) - [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg))) - [CmmStackSlot Old 0, sp_hwm], - CmmReg spLimReg] - - -- Hp overflow if (Hp > HpLim) - -- (Hp has been incremented by now) - -- HpLim points to the LAST WORD of valid allocation space. - hp_oflo = CmmMachOp (mo_wordUGt dflags) [hpExpr, hpLimExpr] - - alloc_n = mkAssign hpAllocReg alloc_lit - - case mb_stk_hwm of - Nothing -> return () - Just stk_hwm -> tickyStackCheck - >> (emit =<< mkCmmIfGoto' (sp_oflo stk_hwm) gc_id (Just False) ) - - -- Emit new label that might potentially be a header - -- of a self-recursive tail call. - -- See Note [Self-recursive loop header]. - self_loop_info <- getSelfLoop - case self_loop_info of - Just (_, loop_header_id, _) - | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id - _otherwise -> return () - - if (isJust mb_alloc_lit) - then do - tickyHeapCheck - emitAssign hpReg bump_hp - emit =<< mkCmmIfThen' hp_oflo (alloc_n <*> mkBranch gc_id) (Just False) - else do - when (checkYield && not (gopt Opt_OmitYields dflags)) $ do - -- Yielding if HpLim == 0 - let yielding = CmmMachOp (mo_wordEq dflags) - [CmmReg hpLimReg, - CmmLit (zeroCLit dflags)] - emit =<< mkCmmIfGoto' yielding gc_id (Just False) - - tscope <- getTickScope - emitOutOfLine gc_id - (do_gc, tscope) -- this is expected to jump back somewhere - - -- Test for stack pointer exhaustion, then - -- bump heap pointer, and test for heap exhaustion - -- Note that we don't move the heap pointer unless the - -- stack check succeeds. Otherwise we might end up - -- with slop at the end of the current block, which can - -- confuse the LDV profiler. - --- Note [Self-recursive loop header] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- --- Self-recursive loop header is required by loopification optimization (See --- Note [Self-recursive tail calls] in StgCmmExpr). We emit it if: --- --- 1. There is information about self-loop in the FCode environment. We don't --- check the binder (first component of the self_loop_info) because we are --- certain that if the self-loop info is present then we are compiling the --- binder body. Reason: the only possible way to get here with the --- self_loop_info present is from closureCodeBody. --- --- 2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible --- to preempt the heap check (see #367 for motivation behind this check). It --- is True for heap checks placed at the entry to a function and --- let-no-escape heap checks but false for other heap checks (eg. in case --- alternatives or created from hand-written high-level Cmm). The second --- check (isJust mb_stk_hwm) is true for heap checks at the entry to a --- function and some heap checks created in hand-written Cmm. Otherwise it --- is Nothing. In other words the only situation when both conditions are --- true is when compiling stack and heap checks at the entry to a --- function. This is the only situation when we want to emit a self-loop --- label. diff --git a/compiler/codeGen/StgCmmHpc.hs b/compiler/codeGen/StgCmmHpc.hs deleted file mode 100644 index 8e9676bd33..0000000000 --- a/compiler/codeGen/StgCmmHpc.hs +++ /dev/null @@ -1,48 +0,0 @@ ------------------------------------------------------------------------------ --- --- Code generation for coverage --- --- (c) Galois Connections, Inc. 2006 --- ------------------------------------------------------------------------------ - -module StgCmmHpc ( initHpc, mkTickBox ) where - -import GhcPrelude - -import StgCmmMonad - -import MkGraph -import CmmExpr -import CLabel -import Module -import CmmUtils -import StgCmmUtils -import HscTypes -import DynFlags - -import Control.Monad - -mkTickBox :: DynFlags -> Module -> Int -> CmmAGraph -mkTickBox dflags mod n - = mkStore tick_box (CmmMachOp (MO_Add W64) - [ CmmLoad tick_box b64 - , CmmLit (CmmInt 1 W64) - ]) - where - tick_box = cmmIndex dflags W64 - (CmmLit $ CmmLabel $ mkHpcTicksLabel $ mod) - n - -initHpc :: Module -> HpcInfo -> FCode () --- Emit top-level tables for HPC and return code to initialise -initHpc _ (NoHpcInfo {}) - = return () -initHpc this_mod (HpcInfo tickCount _hashNo) - = do dflags <- getDynFlags - when (gopt Opt_Hpc dflags) $ - do emitDataLits (mkHpcTicksLabel this_mod) - [ (CmmInt 0 W64) - | _ <- take tickCount [0 :: Int ..] - ] - diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs deleted file mode 100644 index 78a7cf3f85..0000000000 --- a/compiler/codeGen/StgCmmLayout.hs +++ /dev/null @@ -1,623 +0,0 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE CPP #-} - ------------------------------------------------------------------------------ --- --- Building info tables. --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ - -module StgCmmLayout ( - mkArgDescr, - emitCall, emitReturn, adjustHpBackwards, - - emitClosureProcAndInfoTable, - emitClosureAndInfoTable, - - slowCall, directCall, - - FieldOffOrPadding(..), - ClosureHeader(..), - mkVirtHeapOffsets, - mkVirtHeapOffsetsWithPadding, - mkVirtConstrOffsets, - mkVirtConstrSizes, - getHpRelOffset, - - ArgRep(..), toArgRep, argRepSizeW -- re-exported from StgCmmArgRep - ) where - - -#include "HsVersions.h" - -import GhcPrelude hiding ((<*>)) - -import StgCmmClosure -import StgCmmEnv -import StgCmmArgRep -- notably: ( slowCallPattern ) -import StgCmmTicky -import StgCmmMonad -import StgCmmUtils - -import MkGraph -import SMRep -import BlockId -import Cmm -import CmmUtils -import CmmInfo -import CLabel -import StgSyn -import Id -import TyCon ( PrimRep(..), primRepSizeB ) -import BasicTypes ( RepArity ) -import DynFlags -import Module - -import Util -import Data.List -import Outputable -import FastString -import Control.Monad - ------------------------------------------------------------------------- --- Call and return sequences ------------------------------------------------------------------------- - --- | Return multiple values to the sequel --- --- If the sequel is @Return@ --- --- > return (x,y) --- --- If the sequel is @AssignTo [p,q]@ --- --- > p=x; q=y; --- -emitReturn :: [CmmExpr] -> FCode ReturnKind -emitReturn results - = do { dflags <- getDynFlags - ; sequel <- getSequel - ; updfr_off <- getUpdFrameOff - ; case sequel of - Return -> - do { adjustHpBackwards - ; let e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags) - ; emit (mkReturn dflags (entryCode dflags e) results updfr_off) - } - AssignTo regs adjust -> - do { when adjust adjustHpBackwards - ; emitMultiAssign regs results } - ; return AssignedDirectly - } - - --- | @emitCall conv fun args@ makes a call to the entry-code of @fun@, --- using the call/return convention @conv@, passing @args@, and --- returning the results to the current sequel. --- -emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ReturnKind -emitCall convs fun args - = emitCallWithExtraStack convs fun args noExtraStack - - --- | @emitCallWithExtraStack conv fun args stack@ makes a call to the --- entry-code of @fun@, using the call/return convention @conv@, --- passing @args@, pushing some extra stack frames described by --- @stack@, and returning the results to the current sequel. --- -emitCallWithExtraStack - :: (Convention, Convention) -> CmmExpr -> [CmmExpr] - -> [CmmExpr] -> FCode ReturnKind -emitCallWithExtraStack (callConv, retConv) fun args extra_stack - = do { dflags <- getDynFlags - ; adjustHpBackwards - ; sequel <- getSequel - ; updfr_off <- getUpdFrameOff - ; case sequel of - Return -> do - emit $ mkJumpExtra dflags callConv fun args updfr_off extra_stack - return AssignedDirectly - AssignTo res_regs _ -> do - k <- newBlockId - let area = Young k - (off, _, copyin) = copyInOflow dflags retConv area res_regs [] - copyout = mkCallReturnsTo dflags fun callConv args k off updfr_off - extra_stack - tscope <- getTickScope - emit (copyout <*> mkLabel k tscope <*> copyin) - return (ReturnedTo k off) - } - - -adjustHpBackwards :: FCode () --- This function adjusts the heap pointer just before a tail call or --- return. At a call or return, the virtual heap pointer may be less --- than the real Hp, because the latter was advanced to deal with --- the worst-case branch of the code, and we may be in a better-case --- branch. In that case, move the real Hp *back* and retract some --- ticky allocation count. --- --- It *does not* deal with high-water-mark adjustment. That's done by --- functions which allocate heap. -adjustHpBackwards - = do { hp_usg <- getHpUsage - ; let rHp = realHp hp_usg - vHp = virtHp hp_usg - adjust_words = vHp -rHp - ; new_hp <- getHpRelOffset vHp - - ; emit (if adjust_words == 0 - then mkNop - else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp - - ; tickyAllocHeap False adjust_words -- ...ditto - - ; setRealHp vHp - } - - -------------------------------------------------------------------------- --- Making calls: directCall and slowCall -------------------------------------------------------------------------- - --- General plan is: --- - we'll make *one* fast call, either to the function itself --- (directCall) or to stg_ap_<pat>_fast (slowCall) --- Any left-over arguments will be pushed on the stack, --- --- e.g. Sp[old+8] = arg1 --- Sp[old+16] = arg2 --- Sp[old+32] = stg_ap_pp_info --- R2 = arg3 --- R3 = arg4 --- call f() return to Nothing updfr_off: 32 - - -directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind --- (directCall f n args) --- calls f(arg1, ..., argn), and applies the result to the remaining args --- The function f has arity n, and there are guaranteed at least n args --- Both arity and args include void args -directCall conv lbl arity stg_args - = do { argreps <- getArgRepsAmodes stg_args - ; direct_call "directCall" conv lbl arity argreps } - - -slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind --- (slowCall fun args) applies fun to args, returning the results to Sequel -slowCall fun stg_args - = do dflags <- getDynFlags - argsreps <- getArgRepsAmodes stg_args - let (rts_fun, arity) = slowCallPattern (map fst argsreps) - - (r, slow_code) <- getCodeR $ do - r <- direct_call "slow_call" NativeNodeCall - (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps) - emitComment $ mkFastString ("slow_call for " ++ - showSDoc dflags (ppr fun) ++ - " with pat " ++ unpackFS rts_fun) - return r - - -- Note [avoid intermediate PAPs] - let n_args = length stg_args - if n_args > arity && optLevel dflags >= 2 - then do - funv <- (CmmReg . CmmLocal) `fmap` assignTemp fun - fun_iptr <- (CmmReg . CmmLocal) `fmap` - assignTemp (closureInfoPtr dflags (cmmUntag dflags funv)) - - -- ToDo: we could do slightly better here by reusing the - -- continuation from the slow call, which we have in r. - -- Also we'd like to push the continuation on the stack - -- before the branch, so that we only get one copy of the - -- code that saves all the live variables across the - -- call, but that might need some improvements to the - -- special case in the stack layout code to handle this - -- (see Note [diamond proc point]). - - fast_code <- getCode $ - emitCall (NativeNodeCall, NativeReturn) - (entryCode dflags fun_iptr) - (nonVArgs ((P,Just funv):argsreps)) - - slow_lbl <- newBlockId - fast_lbl <- newBlockId - is_tagged_lbl <- newBlockId - end_lbl <- newBlockId - - let correct_arity = cmmEqWord dflags (funInfoArity dflags fun_iptr) - (mkIntExpr dflags n_args) - - tscope <- getTickScope - emit (mkCbranch (cmmIsTagged dflags funv) - is_tagged_lbl slow_lbl (Just True) - <*> mkLabel is_tagged_lbl tscope - <*> mkCbranch correct_arity fast_lbl slow_lbl (Just True) - <*> mkLabel fast_lbl tscope - <*> fast_code - <*> mkBranch end_lbl - <*> mkLabel slow_lbl tscope - <*> slow_code - <*> mkLabel end_lbl tscope) - return r - - else do - emit slow_code - return r - - --- Note [avoid intermediate PAPs] --- --- A slow call which needs multiple generic apply patterns will be --- almost guaranteed to create one or more intermediate PAPs when --- applied to a function that takes the correct number of arguments. --- We try to avoid this situation by generating code to test whether --- we are calling a function with the correct number of arguments --- first, i.e.: --- --- if (TAG(f) != 0} { // f is not a thunk --- if (f->info.arity == n) { --- ... make a fast call to f ... --- } --- } --- ... otherwise make the slow call ... --- --- We *only* do this when the call requires multiple generic apply --- functions, which requires pushing extra stack frames and probably --- results in intermediate PAPs. (I say probably, because it might be --- that we're over-applying a function, but that seems even less --- likely). --- --- This very rarely applies, but if it does happen in an inner loop it --- can have a severe impact on performance (#6084). - - --------------- -direct_call :: String - -> Convention -- e.g. NativeNodeCall or NativeDirectCall - -> CLabel -> RepArity - -> [(ArgRep,Maybe CmmExpr)] -> FCode ReturnKind -direct_call caller call_conv lbl arity args - | debugIsOn && args `lengthLessThan` real_arity -- Too few args - = do -- Caller should ensure that there enough args! - pprPanic "direct_call" $ - text caller <+> ppr arity <+> - ppr lbl <+> ppr (length args) <+> - ppr (map snd args) <+> ppr (map fst args) - - | null rest_args -- Precisely the right number of arguments - = emitCall (call_conv, NativeReturn) target (nonVArgs args) - - | otherwise -- Note [over-saturated calls] - = do dflags <- getDynFlags - emitCallWithExtraStack (call_conv, NativeReturn) - target - (nonVArgs fast_args) - (nonVArgs (stack_args dflags)) - where - target = CmmLit (CmmLabel lbl) - (fast_args, rest_args) = splitAt real_arity args - stack_args dflags = slowArgs dflags rest_args - real_arity = case call_conv of - NativeNodeCall -> arity+1 - _ -> arity - - --- When constructing calls, it is easier to keep the ArgReps and the --- CmmExprs zipped together. However, a void argument has no --- representation, so we need to use Maybe CmmExpr (the alternative of --- using zeroCLit or even undefined would work, but would be ugly). --- -getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)] -getArgRepsAmodes = mapM getArgRepAmode - where getArgRepAmode arg - | V <- rep = return (V, Nothing) - | otherwise = do expr <- getArgAmode (NonVoid arg) - return (rep, Just expr) - where rep = toArgRep (argPrimRep arg) - -nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr] -nonVArgs [] = [] -nonVArgs ((_,Nothing) : args) = nonVArgs args -nonVArgs ((_,Just arg) : args) = arg : nonVArgs args - -{- -Note [over-saturated calls] - -The natural thing to do for an over-saturated call would be to call -the function with the correct number of arguments, and then apply the -remaining arguments to the value returned, e.g. - - f a b c d (where f has arity 2) - --> - r = call f(a,b) - call r(c,d) - -but this entails - - saving c and d on the stack - - making a continuation info table - - at the continuation, loading c and d off the stack into regs - - finally, call r - -Note that since there are a fixed number of different r's -(e.g. stg_ap_pp_fast), we can also pre-compile continuations -that correspond to each of them, rather than generating a fresh -one for each over-saturated call. - -Not only does this generate much less code, it is faster too. We will -generate something like: - -Sp[old+16] = c -Sp[old+24] = d -Sp[old+32] = stg_ap_pp_info -call f(a,b) -- usual calling convention - -For the purposes of the CmmCall node, we count this extra stack as -just more arguments that we are passing on the stack (cml_args). --} - --- | 'slowArgs' takes a list of function arguments and prepares them for --- pushing on the stack for "extra" arguments to a function which requires --- fewer arguments than we currently have. -slowArgs :: DynFlags -> [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)] -slowArgs _ [] = [] -slowArgs dflags args -- careful: reps contains voids (V), but args does not - | gopt Opt_SccProfilingOn dflags - = save_cccs ++ this_pat ++ slowArgs dflags rest_args - | otherwise = this_pat ++ slowArgs dflags rest_args - where - (arg_pat, n) = slowCallPattern (map fst args) - (call_args, rest_args) = splitAt n args - - stg_ap_pat = mkCmmRetInfoLabel rtsUnitId arg_pat - this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args - save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just cccsExpr)] - save_cccs_lbl = mkCmmRetInfoLabel rtsUnitId (fsLit "stg_restore_cccs") - -------------------------------------------------------------------------- ----- Laying out objects on the heap and stack -------------------------------------------------------------------------- - --- The heap always grows upwards, so hpRel is easy to compute -hpRel :: VirtualHpOffset -- virtual offset of Hp - -> VirtualHpOffset -- virtual offset of The Thing - -> WordOff -- integer word offset -hpRel hp off = off - hp - -getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr --- See Note [Virtual and real heap pointers] in StgCmmMonad -getHpRelOffset virtual_offset - = do dflags <- getDynFlags - hp_usg <- getHpUsage - return (cmmRegOffW dflags hpReg (hpRel (realHp hp_usg) virtual_offset)) - -data FieldOffOrPadding a - = FieldOff (NonVoid a) -- Something that needs an offset. - ByteOff -- Offset in bytes. - | Padding ByteOff -- Length of padding in bytes. - ByteOff -- Offset in bytes. - --- | Used to tell the various @mkVirtHeapOffsets@ functions what kind --- of header the object has. This will be accounted for in the --- offsets of the fields returned. -data ClosureHeader - = NoHeader - | StdHeader - | ThunkHeader - -mkVirtHeapOffsetsWithPadding - :: DynFlags - -> ClosureHeader -- What kind of header to account for - -> [NonVoid (PrimRep, a)] -- Things to make offsets for - -> ( WordOff -- Total number of words allocated - , WordOff -- Number of words allocated for *pointers* - , [FieldOffOrPadding a] -- Either an offset or padding. - ) - --- Things with their offsets from start of object in order of --- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER --- First in list gets lowest offset, which is initial offset + 1. --- --- mkVirtHeapOffsetsWithPadding always returns boxed things with smaller offsets --- than the unboxed things - -mkVirtHeapOffsetsWithPadding dflags header things = - ASSERT(not (any (isVoidRep . fst . fromNonVoid) things)) - ( tot_wds - , bytesToWordsRoundUp dflags bytes_of_ptrs - , concat (ptrs_w_offsets ++ non_ptrs_w_offsets) ++ final_pad - ) - where - hdr_words = case header of - NoHeader -> 0 - StdHeader -> fixedHdrSizeW dflags - ThunkHeader -> thunkHdrSize dflags - hdr_bytes = wordsToBytes dflags hdr_words - - (ptrs, non_ptrs) = partition (isGcPtrRep . fst . fromNonVoid) things - - (bytes_of_ptrs, ptrs_w_offsets) = - mapAccumL computeOffset 0 ptrs - (tot_bytes, non_ptrs_w_offsets) = - mapAccumL computeOffset bytes_of_ptrs non_ptrs - - tot_wds = bytesToWordsRoundUp dflags tot_bytes - - final_pad_size = tot_wds * word_size - tot_bytes - final_pad - | final_pad_size > 0 = [(Padding final_pad_size - (hdr_bytes + tot_bytes))] - | otherwise = [] - - word_size = wORD_SIZE dflags - - computeOffset bytes_so_far nv_thing = - (new_bytes_so_far, with_padding field_off) - where - (rep, thing) = fromNonVoid nv_thing - - -- Size of the field in bytes. - !sizeB = primRepSizeB dflags rep - - -- Align the start offset (eg, 2-byte value should be 2-byte aligned). - -- But not more than to a word. - !align = min word_size sizeB - !start = roundUpTo bytes_so_far align - !padding = start - bytes_so_far - - -- Final offset is: - -- size of header + bytes_so_far + padding - !final_offset = hdr_bytes + bytes_so_far + padding - !new_bytes_so_far = start + sizeB - field_off = FieldOff (NonVoid thing) final_offset - - with_padding field_off - | padding == 0 = [field_off] - | otherwise = [ Padding padding (hdr_bytes + bytes_so_far) - , field_off - ] - - -mkVirtHeapOffsets - :: DynFlags - -> ClosureHeader -- What kind of header to account for - -> [NonVoid (PrimRep,a)] -- Things to make offsets for - -> (WordOff, -- _Total_ number of words allocated - WordOff, -- Number of words allocated for *pointers* - [(NonVoid a, ByteOff)]) -mkVirtHeapOffsets dflags header things = - ( tot_wds - , ptr_wds - , [ (field, offset) | (FieldOff field offset) <- things_offsets ] - ) - where - (tot_wds, ptr_wds, things_offsets) = - mkVirtHeapOffsetsWithPadding dflags header things - --- | Just like mkVirtHeapOffsets, but for constructors -mkVirtConstrOffsets - :: DynFlags -> [NonVoid (PrimRep, a)] - -> (WordOff, WordOff, [(NonVoid a, ByteOff)]) -mkVirtConstrOffsets dflags = mkVirtHeapOffsets dflags StdHeader - --- | Just like mkVirtConstrOffsets, but used when we don't have the actual --- arguments. Useful when e.g. generating info tables; we just need to know --- sizes of pointer and non-pointer fields. -mkVirtConstrSizes :: DynFlags -> [NonVoid PrimRep] -> (WordOff, WordOff) -mkVirtConstrSizes dflags field_reps - = (tot_wds, ptr_wds) - where - (tot_wds, ptr_wds, _) = - mkVirtConstrOffsets dflags - (map (\nv_rep -> NonVoid (fromNonVoid nv_rep, ())) field_reps) - -------------------------------------------------------------------------- --- --- Making argument descriptors --- --- An argument descriptor describes the layout of args on the stack, --- both for * GC (stack-layout) purposes, and --- * saving/restoring registers when a heap-check fails --- --- Void arguments aren't important, therefore (contrast constructSlowCall) --- -------------------------------------------------------------------------- - --- bring in ARG_P, ARG_N, etc. -#include "../includes/rts/storage/FunTypes.h" - -mkArgDescr :: DynFlags -> [Id] -> ArgDescr -mkArgDescr dflags args - = let arg_bits = argBits dflags arg_reps - arg_reps = filter isNonV (map idArgRep args) - -- Getting rid of voids eases matching of standard patterns - in case stdPattern arg_reps of - Just spec_id -> ArgSpec spec_id - Nothing -> ArgGen arg_bits - -argBits :: DynFlags -> [ArgRep] -> [Bool] -- True for non-ptr, False for ptr -argBits _ [] = [] -argBits dflags (P : args) = False : argBits dflags args -argBits dflags (arg : args) = take (argRepSizeW dflags arg) (repeat True) - ++ argBits dflags args - ----------------------- -stdPattern :: [ArgRep] -> Maybe Int -stdPattern reps - = case reps of - [] -> Just ARG_NONE -- just void args, probably - [N] -> Just ARG_N - [P] -> Just ARG_P - [F] -> Just ARG_F - [D] -> Just ARG_D - [L] -> Just ARG_L - [V16] -> Just ARG_V16 - [V32] -> Just ARG_V32 - [V64] -> Just ARG_V64 - - [N,N] -> Just ARG_NN - [N,P] -> Just ARG_NP - [P,N] -> Just ARG_PN - [P,P] -> Just ARG_PP - - [N,N,N] -> Just ARG_NNN - [N,N,P] -> Just ARG_NNP - [N,P,N] -> Just ARG_NPN - [N,P,P] -> Just ARG_NPP - [P,N,N] -> Just ARG_PNN - [P,N,P] -> Just ARG_PNP - [P,P,N] -> Just ARG_PPN - [P,P,P] -> Just ARG_PPP - - [P,P,P,P] -> Just ARG_PPPP - [P,P,P,P,P] -> Just ARG_PPPPP - [P,P,P,P,P,P] -> Just ARG_PPPPPP - - _ -> Nothing - -------------------------------------------------------------------------- --- --- Generating the info table and code for a closure --- -------------------------------------------------------------------------- - --- Here we make an info table of type 'CmmInfo'. The concrete --- representation as a list of 'CmmAddr' is handled later --- in the pipeline by 'cmmToRawCmm'. --- When loading the free variables, a function closure pointer may be tagged, --- so we must take it into account. - -emitClosureProcAndInfoTable :: Bool -- top-level? - -> Id -- name of the closure - -> LambdaFormInfo - -> CmmInfoTable - -> [NonVoid Id] -- incoming arguments - -> ((Int, LocalReg, [LocalReg]) -> FCode ()) -- function body - -> FCode () -emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body - = do { dflags <- getDynFlags - -- Bind the binder itself, but only if it's not a top-level - -- binding. We need non-top let-bindings to refer to the - -- top-level binding, which this binding would incorrectly shadow. - ; node <- if top_lvl then return $ idToReg dflags (NonVoid bndr) - else bindToReg (NonVoid bndr) lf_info - ; let node_points = nodeMustPointToIt dflags lf_info - ; arg_regs <- bindArgsToRegs args - ; let args' = if node_points then (node : arg_regs) else arg_regs - conv = if nodeMustPointToIt dflags lf_info then NativeNodeCall - else NativeDirectCall - (offset, _, _) = mkCallEntry dflags conv args' [] - ; emitClosureAndInfoTable info_tbl conv args' $ body (offset, node, arg_regs) - } - --- Data constructors need closures, but not with all the argument handling --- needed for functions. The shared part goes here. -emitClosureAndInfoTable :: - CmmInfoTable -> Convention -> [LocalReg] -> FCode () -> FCode () -emitClosureAndInfoTable info_tbl conv args body - = do { (_, blks) <- getCodeScoped body - ; let entry_lbl = toEntryLbl (cit_lbl info_tbl) - ; emitProcWithConvention conv (Just info_tbl) entry_lbl args blks - } diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs deleted file mode 100644 index d6f84c6a0a..0000000000 --- a/compiler/codeGen/StgCmmMonad.hs +++ /dev/null @@ -1,861 +0,0 @@ -{-# LANGUAGE DeriveFunctor #-} -{-# LANGUAGE GADTs #-} - ------------------------------------------------------------------------------ --- --- Monad for Stg to C-- code generation --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ - -module StgCmmMonad ( - FCode, -- type - - initC, runC, fixC, - newUnique, - - emitLabel, - - emit, emitDecl, - emitProcWithConvention, emitProcWithStackFrame, - emitOutOfLine, emitAssign, emitStore, - emitComment, emitTick, emitUnwind, - - getCmm, aGraphToGraph, - getCodeR, getCode, getCodeScoped, getHeapUsage, - - mkCmmIfThenElse, mkCmmIfThen, mkCmmIfGoto, - mkCmmIfThenElse', mkCmmIfThen', mkCmmIfGoto', - - mkCall, mkCmmCall, - - forkClosureBody, forkLneBody, forkAlts, forkAltPair, codeOnly, - - ConTagZ, - - Sequel(..), ReturnKind(..), - withSequel, getSequel, - - setTickyCtrLabel, getTickyCtrLabel, - tickScope, getTickScope, - - withUpdFrameOff, getUpdFrameOff, initUpdFrameOff, - - HeapUsage(..), VirtualHpOffset, initHpUsage, - getHpUsage, setHpUsage, heapHWM, - setVirtHp, getVirtHp, setRealHp, - - getModuleName, - - -- ideally we wouldn't export these, but some other modules access internal state - getState, setState, getSelfLoop, withSelfLoop, getInfoDown, getDynFlags, getThisPackage, - - -- more localised access to monad state - CgIdInfo(..), - getBinds, setBinds, - - -- out of general friendliness, we also export ... - CgInfoDownwards(..), CgState(..) -- non-abstract - ) where - -import GhcPrelude hiding( sequence, succ ) - -import Cmm -import StgCmmClosure -import DynFlags -import Hoopl.Collections -import MkGraph -import BlockId -import CLabel -import SMRep -import Module -import Id -import VarEnv -import OrdList -import BasicTypes( ConTagZ ) -import Unique -import UniqSupply -import FastString -import Outputable -import Util - -import Control.Monad -import Data.List - - - --------------------------------------------------------- --- The FCode monad and its types --- --- FCode is the monad plumbed through the Stg->Cmm code generator, and --- the Cmm parser. It contains the following things: --- --- - A writer monad, collecting: --- - code for the current function, in the form of a CmmAGraph. --- The function "emit" appends more code to this. --- - the top-level CmmDecls accumulated so far --- --- - A state monad with: --- - the local bindings in scope --- - the current heap usage --- - a UniqSupply --- --- - A reader monad, for CgInfoDownwards, containing --- - DynFlags, --- - the current Module --- - the update-frame offset --- - the ticky counter label --- - the Sequel (the continuation to return to) --- - the self-recursive tail call information - --------------------------------------------------------- - -newtype FCode a = FCode { doFCode :: CgInfoDownwards -> CgState -> (a, CgState) } - deriving (Functor) - -instance Applicative FCode where - pure val = FCode (\_info_down state -> (val, state)) - {-# INLINE pure #-} - (<*>) = ap - -instance Monad FCode where - FCode m >>= k = FCode $ - \info_down state -> - case m info_down state of - (m_result, new_state) -> - case k m_result of - FCode kcode -> kcode info_down new_state - {-# INLINE (>>=) #-} - -instance MonadUnique FCode where - getUniqueSupplyM = cgs_uniqs <$> getState - getUniqueM = FCode $ \_ st -> - let (u, us') = takeUniqFromSupply (cgs_uniqs st) - in (u, st { cgs_uniqs = us' }) - -initC :: IO CgState -initC = do { uniqs <- mkSplitUniqSupply 'c' - ; return (initCgState uniqs) } - -runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState) -runC dflags mod st fcode = doFCode fcode (initCgInfoDown dflags mod) st - -fixC :: (a -> FCode a) -> FCode a -fixC fcode = FCode $ - \info_down state -> let (v, s) = doFCode (fcode v) info_down state - in (v, s) - --------------------------------------------------------- --- The code generator environment --------------------------------------------------------- - --- This monadery has some information that it only passes --- *downwards*, as well as some ``state'' which is modified --- as we go along. - -data CgInfoDownwards -- information only passed *downwards* by the monad - = MkCgInfoDown { - cgd_dflags :: DynFlags, - cgd_mod :: Module, -- Module being compiled - cgd_updfr_off :: UpdFrameOffset, -- Size of current update frame - cgd_ticky :: CLabel, -- Current destination for ticky counts - cgd_sequel :: Sequel, -- What to do at end of basic block - cgd_self_loop :: Maybe SelfLoopInfo,-- Which tail calls can be compiled - -- as local jumps? See Note - -- [Self-recursive tail calls] in - -- StgCmmExpr - cgd_tick_scope:: CmmTickScope -- Tick scope for new blocks & ticks - } - -type CgBindings = IdEnv CgIdInfo - -data CgIdInfo - = CgIdInfo - { cg_id :: Id -- Id that this is the info for - , cg_lf :: LambdaFormInfo - , cg_loc :: CgLoc -- CmmExpr for the *tagged* value - } - -instance Outputable CgIdInfo where - ppr (CgIdInfo { cg_id = id, cg_loc = loc }) - = ppr id <+> text "-->" <+> ppr loc - --- Sequel tells what to do with the result of this expression -data Sequel - = Return -- Return result(s) to continuation found on the stack. - - | AssignTo - [LocalReg] -- Put result(s) in these regs and fall through - -- NB: no void arguments here - -- - Bool -- Should we adjust the heap pointer back to - -- recover space that's unused on this path? - -- We need to do this only if the expression - -- may allocate (e.g. it's a foreign call or - -- allocating primOp) - -instance Outputable Sequel where - ppr Return = text "Return" - ppr (AssignTo regs b) = text "AssignTo" <+> ppr regs <+> ppr b - --- See Note [sharing continuations] below -data ReturnKind - = AssignedDirectly - | ReturnedTo BlockId ByteOff - --- Note [sharing continuations] --- --- ReturnKind says how the expression being compiled returned its --- results: either by assigning directly to the registers specified --- by the Sequel, or by returning to a continuation that does the --- assignments. The point of this is we might be able to re-use the --- continuation in a subsequent heap-check. Consider: --- --- case f x of z --- True -> <True code> --- False -> <False code> --- --- Naively we would generate --- --- R2 = x -- argument to f --- Sp[young(L1)] = L1 --- call f returns to L1 --- L1: --- z = R1 --- if (z & 1) then Ltrue else Lfalse --- Ltrue: --- Hp = Hp + 24 --- if (Hp > HpLim) then L4 else L7 --- L4: --- HpAlloc = 24 --- goto L5 --- L5: --- R1 = z --- Sp[young(L6)] = L6 --- call stg_gc_unpt_r1 returns to L6 --- L6: --- z = R1 --- goto L1 --- L7: --- <True code> --- Lfalse: --- <False code> --- --- We want the gc call in L4 to return to L1, and discard L6. Note --- that not only can we share L1 and L6, but the assignment of the --- return address in L4 is unnecessary because the return address for --- L1 is already on the stack. We used to catch the sharing of L1 and --- L6 in the common-block-eliminator, but not the unnecessary return --- address assignment. --- --- Since this case is so common I decided to make it more explicit and --- robust by programming the sharing directly, rather than relying on --- the common-block eliminator to catch it. This makes --- common-block-elimination an optional optimisation, and furthermore --- generates less code in the first place that we have to subsequently --- clean up. --- --- There are some rarer cases of common blocks that we don't catch --- this way, but that's ok. Common-block-elimination is still available --- to catch them when optimisation is enabled. Some examples are: --- --- - when both the True and False branches do a heap check, we --- can share the heap-check failure code L4a and maybe L4 --- --- - in a case-of-case, there might be multiple continuations that --- we can common up. --- --- It is always safe to use AssignedDirectly. Expressions that jump --- to the continuation from multiple places (e.g. case expressions) --- fall back to AssignedDirectly. --- - - -initCgInfoDown :: DynFlags -> Module -> CgInfoDownwards -initCgInfoDown dflags mod - = MkCgInfoDown { cgd_dflags = dflags - , cgd_mod = mod - , cgd_updfr_off = initUpdFrameOff dflags - , cgd_ticky = mkTopTickyCtrLabel - , cgd_sequel = initSequel - , cgd_self_loop = Nothing - , cgd_tick_scope= GlobalScope } - -initSequel :: Sequel -initSequel = Return - -initUpdFrameOff :: DynFlags -> UpdFrameOffset -initUpdFrameOff dflags = widthInBytes (wordWidth dflags) -- space for the RA - - --------------------------------------------------------- --- The code generator state --------------------------------------------------------- - -data CgState - = MkCgState { - cgs_stmts :: CmmAGraph, -- Current procedure - - cgs_tops :: OrdList CmmDecl, - -- Other procedures and data blocks in this compilation unit - -- Both are ordered only so that we can - -- reduce forward references, when it's easy to do so - - cgs_binds :: CgBindings, - - cgs_hp_usg :: HeapUsage, - - cgs_uniqs :: UniqSupply } - -data HeapUsage -- See Note [Virtual and real heap pointers] - = HeapUsage { - virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word - -- Incremented whenever we allocate - realHp :: VirtualHpOffset -- realHp: Virtual offset of real heap ptr - -- Used in instruction addressing modes - } - -type VirtualHpOffset = WordOff - - -{- Note [Virtual and real heap pointers] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The code generator can allocate one or more objects contiguously, performing -one heap check to cover allocation of all the objects at once. Let's call -this little chunk of heap space an "allocation chunk". The code generator -will emit code to - * Perform a heap-exhaustion check - * Move the heap pointer to the end of the allocation chunk - * Allocate multiple objects within the chunk - -The code generator uses VirtualHpOffsets to address words within a -single allocation chunk; these start at one and increase positively. -The first word of the chunk has VirtualHpOffset=1, the second has -VirtualHpOffset=2, and so on. - - * The field realHp tracks (the VirtualHpOffset) where the real Hp - register is pointing. Typically it'll be pointing to the end of the - allocation chunk. - - * The field virtHp gives the VirtualHpOffset of the highest-allocated - word so far. It starts at zero (meaning no word has been allocated), - and increases whenever an object is allocated. - -The difference between realHp and virtHp gives the offset from the -real Hp register of a particular word in the allocation chunk. This -is what getHpRelOffset does. Since the returned offset is relative -to the real Hp register, it is valid only until you change the real -Hp register. (Changing virtHp doesn't matter.) --} - - -initCgState :: UniqSupply -> CgState -initCgState uniqs - = MkCgState { cgs_stmts = mkNop - , cgs_tops = nilOL - , cgs_binds = emptyVarEnv - , cgs_hp_usg = initHpUsage - , cgs_uniqs = uniqs } - -stateIncUsage :: CgState -> CgState -> CgState --- stateIncUsage@ e1 e2 incorporates in e1 --- the heap high water mark found in e2. -stateIncUsage s1 s2@(MkCgState { cgs_hp_usg = hp_usg }) - = s1 { cgs_hp_usg = cgs_hp_usg s1 `maxHpHw` virtHp hp_usg } - `addCodeBlocksFrom` s2 - -addCodeBlocksFrom :: CgState -> CgState -> CgState --- Add code blocks from the latter to the former --- (The cgs_stmts will often be empty, but not always; see codeOnly) -s1 `addCodeBlocksFrom` s2 - = s1 { cgs_stmts = cgs_stmts s1 MkGraph.<*> cgs_stmts s2, - cgs_tops = cgs_tops s1 `appOL` cgs_tops s2 } - - --- The heap high water mark is the larger of virtHp and hwHp. The latter is --- only records the high water marks of forked-off branches, so to find the --- heap high water mark you have to take the max of virtHp and hwHp. Remember, --- virtHp never retreats! --- --- Note Jan 04: ok, so why do we only look at the virtual Hp?? - -heapHWM :: HeapUsage -> VirtualHpOffset -heapHWM = virtHp - -initHpUsage :: HeapUsage -initHpUsage = HeapUsage { virtHp = 0, realHp = 0 } - -maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage -hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw } - --------------------------------------------------------- --- Operators for getting and setting the state and "info_down". --------------------------------------------------------- - -getState :: FCode CgState -getState = FCode $ \_info_down state -> (state, state) - -setState :: CgState -> FCode () -setState state = FCode $ \_info_down _ -> ((), state) - -getHpUsage :: FCode HeapUsage -getHpUsage = do - state <- getState - return $ cgs_hp_usg state - -setHpUsage :: HeapUsage -> FCode () -setHpUsage new_hp_usg = do - state <- getState - setState $ state {cgs_hp_usg = new_hp_usg} - -setVirtHp :: VirtualHpOffset -> FCode () -setVirtHp new_virtHp - = do { hp_usage <- getHpUsage - ; setHpUsage (hp_usage {virtHp = new_virtHp}) } - -getVirtHp :: FCode VirtualHpOffset -getVirtHp - = do { hp_usage <- getHpUsage - ; return (virtHp hp_usage) } - -setRealHp :: VirtualHpOffset -> FCode () -setRealHp new_realHp - = do { hp_usage <- getHpUsage - ; setHpUsage (hp_usage {realHp = new_realHp}) } - -getBinds :: FCode CgBindings -getBinds = do - state <- getState - return $ cgs_binds state - -setBinds :: CgBindings -> FCode () -setBinds new_binds = do - state <- getState - setState $ state {cgs_binds = new_binds} - -withState :: FCode a -> CgState -> FCode (a,CgState) -withState (FCode fcode) newstate = FCode $ \info_down state -> - case fcode info_down newstate of - (retval, state2) -> ((retval,state2), state) - -newUniqSupply :: FCode UniqSupply -newUniqSupply = do - state <- getState - let (us1, us2) = splitUniqSupply (cgs_uniqs state) - setState $ state { cgs_uniqs = us1 } - return us2 - -newUnique :: FCode Unique -newUnique = do - state <- getState - let (u,us') = takeUniqFromSupply (cgs_uniqs state) - setState $ state { cgs_uniqs = us' } - return u - ------------------- -getInfoDown :: FCode CgInfoDownwards -getInfoDown = FCode $ \info_down state -> (info_down,state) - -getSelfLoop :: FCode (Maybe SelfLoopInfo) -getSelfLoop = do - info_down <- getInfoDown - return $ cgd_self_loop info_down - -withSelfLoop :: SelfLoopInfo -> FCode a -> FCode a -withSelfLoop self_loop code = do - info_down <- getInfoDown - withInfoDown code (info_down {cgd_self_loop = Just self_loop}) - -instance HasDynFlags FCode where - getDynFlags = liftM cgd_dflags getInfoDown - -getThisPackage :: FCode UnitId -getThisPackage = liftM thisPackage getDynFlags - -withInfoDown :: FCode a -> CgInfoDownwards -> FCode a -withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state - --- ---------------------------------------------------------------------------- --- Get the current module name - -getModuleName :: FCode Module -getModuleName = do { info <- getInfoDown; return (cgd_mod info) } - --- ---------------------------------------------------------------------------- --- Get/set the end-of-block info - -withSequel :: Sequel -> FCode a -> FCode a -withSequel sequel code - = do { info <- getInfoDown - ; withInfoDown code (info {cgd_sequel = sequel, cgd_self_loop = Nothing }) } - -getSequel :: FCode Sequel -getSequel = do { info <- getInfoDown - ; return (cgd_sequel info) } - --- ---------------------------------------------------------------------------- --- Get/set the size of the update frame - --- We keep track of the size of the update frame so that we --- can set the stack pointer to the proper address on return --- (or tail call) from the closure. --- There should be at most one update frame for each closure. --- Note: I'm including the size of the original return address --- in the size of the update frame -- hence the default case on `get'. - -withUpdFrameOff :: UpdFrameOffset -> FCode a -> FCode a -withUpdFrameOff size code - = do { info <- getInfoDown - ; withInfoDown code (info {cgd_updfr_off = size }) } - -getUpdFrameOff :: FCode UpdFrameOffset -getUpdFrameOff - = do { info <- getInfoDown - ; return $ cgd_updfr_off info } - --- ---------------------------------------------------------------------------- --- Get/set the current ticky counter label - -getTickyCtrLabel :: FCode CLabel -getTickyCtrLabel = do - info <- getInfoDown - return (cgd_ticky info) - -setTickyCtrLabel :: CLabel -> FCode a -> FCode a -setTickyCtrLabel ticky code = do - info <- getInfoDown - withInfoDown code (info {cgd_ticky = ticky}) - --- ---------------------------------------------------------------------------- --- Manage tick scopes - --- | The current tick scope. We will assign this to generated blocks. -getTickScope :: FCode CmmTickScope -getTickScope = do - info <- getInfoDown - return (cgd_tick_scope info) - --- | Places blocks generated by the given code into a fresh --- (sub-)scope. This will make sure that Cmm annotations in our scope --- will apply to the Cmm blocks generated therein - but not the other --- way around. -tickScope :: FCode a -> FCode a -tickScope code = do - info <- getInfoDown - if debugLevel (cgd_dflags info) == 0 then code else do - u <- newUnique - let scope' = SubScope u (cgd_tick_scope info) - withInfoDown code info{ cgd_tick_scope = scope' } - - --------------------------------------------------------- --- Forking --------------------------------------------------------- - -forkClosureBody :: FCode () -> FCode () --- forkClosureBody compiles body_code in environment where: --- - sequel, update stack frame and self loop info are --- set to fresh values --- - state is set to a fresh value, except for local bindings --- that are passed in unchanged. It's up to the enclosed code to --- re-bind the free variables to a field of the closure. - -forkClosureBody body_code - = do { dflags <- getDynFlags - ; info <- getInfoDown - ; us <- newUniqSupply - ; state <- getState - ; let body_info_down = info { cgd_sequel = initSequel - , cgd_updfr_off = initUpdFrameOff dflags - , cgd_self_loop = Nothing } - fork_state_in = (initCgState us) { cgs_binds = cgs_binds state } - ((),fork_state_out) = doFCode body_code body_info_down fork_state_in - ; setState $ state `addCodeBlocksFrom` fork_state_out } - -forkLneBody :: FCode a -> FCode a --- 'forkLneBody' takes a body of let-no-escape binding and compiles --- it in the *current* environment, returning the graph thus constructed. --- --- The current environment is passed on completely unchanged to --- the successor. In particular, any heap usage from the enclosed --- code is discarded; it should deal with its own heap consumption. -forkLneBody body_code - = do { info_down <- getInfoDown - ; us <- newUniqSupply - ; state <- getState - ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state } - (result, fork_state_out) = doFCode body_code info_down fork_state_in - ; setState $ state `addCodeBlocksFrom` fork_state_out - ; return result } - -codeOnly :: FCode () -> FCode () --- Emit any code from the inner thing into the outer thing --- Do not affect anything else in the outer state --- Used in almost-circular code to prevent false loop dependencies -codeOnly body_code - = do { info_down <- getInfoDown - ; us <- newUniqSupply - ; state <- getState - ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state - , cgs_hp_usg = cgs_hp_usg state } - ((), fork_state_out) = doFCode body_code info_down fork_state_in - ; setState $ state `addCodeBlocksFrom` fork_state_out } - -forkAlts :: [FCode a] -> FCode [a] --- (forkAlts' bs d) takes fcodes 'bs' for the branches of a 'case', and --- an fcode for the default case 'd', and compiles each in the current --- environment. The current environment is passed on unmodified, except --- that the virtual Hp is moved on to the worst virtual Hp for the branches - -forkAlts branch_fcodes - = do { info_down <- getInfoDown - ; us <- newUniqSupply - ; state <- getState - ; let compile us branch - = (us2, doFCode branch info_down branch_state) - where - (us1,us2) = splitUniqSupply us - branch_state = (initCgState us1) { - cgs_binds = cgs_binds state - , cgs_hp_usg = cgs_hp_usg state } - (_us, results) = mapAccumL compile us branch_fcodes - (branch_results, branch_out_states) = unzip results - ; setState $ foldl' stateIncUsage state branch_out_states - -- NB foldl. state is the *left* argument to stateIncUsage - ; return branch_results } - -forkAltPair :: FCode a -> FCode a -> FCode (a,a) --- Most common use of 'forkAlts'; having this helper function avoids --- accidental use of failible pattern-matches in @do@-notation -forkAltPair x y = do - xy' <- forkAlts [x,y] - case xy' of - [x',y'] -> return (x',y') - _ -> panic "forkAltPair" - --- collect the code emitted by an FCode computation -getCodeR :: FCode a -> FCode (a, CmmAGraph) -getCodeR fcode - = do { state1 <- getState - ; (a, state2) <- withState fcode (state1 { cgs_stmts = mkNop }) - ; setState $ state2 { cgs_stmts = cgs_stmts state1 } - ; return (a, cgs_stmts state2) } - -getCode :: FCode a -> FCode CmmAGraph -getCode fcode = do { (_,stmts) <- getCodeR fcode; return stmts } - --- | Generate code into a fresh tick (sub-)scope and gather generated code -getCodeScoped :: FCode a -> FCode (a, CmmAGraphScoped) -getCodeScoped fcode - = do { state1 <- getState - ; ((a, tscope), state2) <- - tickScope $ - flip withState state1 { cgs_stmts = mkNop } $ - do { a <- fcode - ; scp <- getTickScope - ; return (a, scp) } - ; setState $ state2 { cgs_stmts = cgs_stmts state1 } - ; return (a, (cgs_stmts state2, tscope)) } - - --- 'getHeapUsage' applies a function to the amount of heap that it uses. --- It initialises the heap usage to zeros, and passes on an unchanged --- heap usage. --- --- It is usually a prelude to performing a GC check, so everything must --- be in a tidy and consistent state. --- --- Note the slightly subtle fixed point behaviour needed here - -getHeapUsage :: (VirtualHpOffset -> FCode a) -> FCode a -getHeapUsage fcode - = do { info_down <- getInfoDown - ; state <- getState - ; let fstate_in = state { cgs_hp_usg = initHpUsage } - (r, fstate_out) = doFCode (fcode hp_hw) info_down fstate_in - hp_hw = heapHWM (cgs_hp_usg fstate_out) -- Loop here! - - ; setState $ fstate_out { cgs_hp_usg = cgs_hp_usg state } - ; return r } - --- ---------------------------------------------------------------------------- --- Combinators for emitting code - -emitCgStmt :: CgStmt -> FCode () -emitCgStmt stmt - = do { state <- getState - ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt } - } - -emitLabel :: BlockId -> FCode () -emitLabel id = do tscope <- getTickScope - emitCgStmt (CgLabel id tscope) - -emitComment :: FastString -> FCode () -emitComment s - | debugIsOn = emitCgStmt (CgStmt (CmmComment s)) - | otherwise = return () - -emitTick :: CmmTickish -> FCode () -emitTick = emitCgStmt . CgStmt . CmmTick - -emitUnwind :: [(GlobalReg, Maybe CmmExpr)] -> FCode () -emitUnwind regs = do - dflags <- getDynFlags - when (debugLevel dflags > 0) $ do - emitCgStmt $ CgStmt $ CmmUnwind regs - -emitAssign :: CmmReg -> CmmExpr -> FCode () -emitAssign l r = emitCgStmt (CgStmt (CmmAssign l r)) - -emitStore :: CmmExpr -> CmmExpr -> FCode () -emitStore l r = emitCgStmt (CgStmt (CmmStore l r)) - -emit :: CmmAGraph -> FCode () -emit ag - = do { state <- getState - ; setState $ state { cgs_stmts = cgs_stmts state MkGraph.<*> ag } } - -emitDecl :: CmmDecl -> FCode () -emitDecl decl - = do { state <- getState - ; setState $ state { cgs_tops = cgs_tops state `snocOL` decl } } - -emitOutOfLine :: BlockId -> CmmAGraphScoped -> FCode () -emitOutOfLine l (stmts, tscope) = emitCgStmt (CgFork l stmts tscope) - -emitProcWithStackFrame - :: Convention -- entry convention - -> Maybe CmmInfoTable -- info table? - -> CLabel -- label for the proc - -> [CmmFormal] -- stack frame - -> [CmmFormal] -- arguments - -> CmmAGraphScoped -- code - -> Bool -- do stack layout? - -> FCode () - -emitProcWithStackFrame _conv mb_info lbl _stk_args [] blocks False - = do { dflags <- getDynFlags - ; emitProc mb_info lbl [] blocks (widthInBytes (wordWidth dflags)) False - } -emitProcWithStackFrame conv mb_info lbl stk_args args (graph, tscope) True - -- do layout - = do { dflags <- getDynFlags - ; let (offset, live, entry) = mkCallEntry dflags conv args stk_args - graph' = entry MkGraph.<*> graph - ; emitProc mb_info lbl live (graph', tscope) offset True - } -emitProcWithStackFrame _ _ _ _ _ _ _ = panic "emitProcWithStackFrame" - -emitProcWithConvention :: Convention -> Maybe CmmInfoTable -> CLabel - -> [CmmFormal] - -> CmmAGraphScoped - -> FCode () -emitProcWithConvention conv mb_info lbl args blocks - = emitProcWithStackFrame conv mb_info lbl [] args blocks True - -emitProc :: Maybe CmmInfoTable -> CLabel -> [GlobalReg] -> CmmAGraphScoped - -> Int -> Bool -> FCode () -emitProc mb_info lbl live blocks offset do_layout - = do { dflags <- getDynFlags - ; l <- newBlockId - ; let - blks :: CmmGraph - blks = labelAGraph l blocks - - infos | Just info <- mb_info = mapSingleton (g_entry blks) info - | otherwise = mapEmpty - - sinfo = StackInfo { arg_space = offset - , updfr_space = Just (initUpdFrameOff dflags) - , do_layout = do_layout } - - tinfo = TopInfo { info_tbls = infos - , stack_info=sinfo} - - proc_block = CmmProc tinfo lbl live blks - - ; state <- getState - ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } } - -getCmm :: FCode () -> FCode CmmGroup --- Get all the CmmTops (there should be no stmts) --- Return a single Cmm which may be split from other Cmms by --- object splitting (at a later stage) -getCmm code - = do { state1 <- getState - ; ((), state2) <- withState code (state1 { cgs_tops = nilOL }) - ; setState $ state2 { cgs_tops = cgs_tops state1 } - ; return (fromOL (cgs_tops state2)) } - - -mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> FCode CmmAGraph -mkCmmIfThenElse e tbranch fbranch = mkCmmIfThenElse' e tbranch fbranch Nothing - -mkCmmIfThenElse' :: CmmExpr -> CmmAGraph -> CmmAGraph - -> Maybe Bool -> FCode CmmAGraph -mkCmmIfThenElse' e tbranch fbranch likely = do - tscp <- getTickScope - endif <- newBlockId - tid <- newBlockId - fid <- newBlockId - - let - (test, then_, else_, likely') = case likely of - Just False | Just e' <- maybeInvertCmmExpr e - -- currently NCG doesn't know about likely - -- annotations. We manually switch then and - -- else branch so the likely false branch - -- becomes a fallthrough. - -> (e', fbranch, tbranch, Just True) - _ -> (e, tbranch, fbranch, likely) - - return $ catAGraphs [ mkCbranch test tid fid likely' - , mkLabel tid tscp, then_, mkBranch endif - , mkLabel fid tscp, else_, mkLabel endif tscp ] - -mkCmmIfGoto :: CmmExpr -> BlockId -> FCode CmmAGraph -mkCmmIfGoto e tid = mkCmmIfGoto' e tid Nothing - -mkCmmIfGoto' :: CmmExpr -> BlockId -> Maybe Bool -> FCode CmmAGraph -mkCmmIfGoto' e tid l = do - endif <- newBlockId - tscp <- getTickScope - return $ catAGraphs [ mkCbranch e tid endif l, mkLabel endif tscp ] - -mkCmmIfThen :: CmmExpr -> CmmAGraph -> FCode CmmAGraph -mkCmmIfThen e tbranch = mkCmmIfThen' e tbranch Nothing - -mkCmmIfThen' :: CmmExpr -> CmmAGraph -> Maybe Bool -> FCode CmmAGraph -mkCmmIfThen' e tbranch l = do - endif <- newBlockId - tid <- newBlockId - tscp <- getTickScope - return $ catAGraphs [ mkCbranch e tid endif l - , mkLabel tid tscp, tbranch, mkLabel endif tscp ] - -mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmExpr] - -> UpdFrameOffset -> [CmmExpr] -> FCode CmmAGraph -mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do - dflags <- getDynFlags - k <- newBlockId - tscp <- getTickScope - let area = Young k - (off, _, copyin) = copyInOflow dflags retConv area results [] - copyout = mkCallReturnsTo dflags f callConv actuals k off updfr_off extra_stack - return $ catAGraphs [copyout, mkLabel k tscp, copyin] - -mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmExpr] -> UpdFrameOffset - -> FCode CmmAGraph -mkCmmCall f results actuals updfr_off - = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off [] - - --- ---------------------------------------------------------------------------- --- turn CmmAGraph into CmmGraph, for making a new proc. - -aGraphToGraph :: CmmAGraphScoped -> FCode CmmGraph -aGraphToGraph stmts - = do { l <- newBlockId - ; return (labelAGraph l stmts) } diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs deleted file mode 100644 index 61d88feabb..0000000000 --- a/compiler/codeGen/StgCmmPrim.hs +++ /dev/null @@ -1,2622 +0,0 @@ -{-# LANGUAGE CPP #-} --- emitPrimOp is quite large -{-# OPTIONS_GHC -fmax-pmcheck-iterations=4000000 #-} - ----------------------------------------------------------------------------- --- --- Stg to C--: primitive operations --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ - -module StgCmmPrim ( - cgOpApp, - cgPrimOp, -- internal(ish), used by cgCase to get code for a - -- comparison without also turning it into a Bool. - shouldInlinePrimOp - ) where - -#include "HsVersions.h" - -import GhcPrelude hiding ((<*>)) - -import StgCmmLayout -import StgCmmForeign -import StgCmmEnv -import StgCmmMonad -import StgCmmUtils -import StgCmmTicky -import StgCmmHeap -import StgCmmProf ( costCentreFrom ) - -import DynFlags -import GHC.Platform -import BasicTypes -import BlockId -import MkGraph -import StgSyn -import Cmm -import Type ( Type, tyConAppTyCon ) -import TyCon -import CLabel -import CmmUtils -import PrimOp -import SMRep -import FastString -import Outputable -import Util -import Data.Maybe - -import Data.Bits ((.&.), bit) -import Control.Monad (liftM, when, unless) - ------------------------------------------------------------------------- --- Primitive operations and foreign calls ------------------------------------------------------------------------- - -{- Note [Foreign call results] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -A foreign call always returns an unboxed tuple of results, one -of which is the state token. This seems to happen even for pure -calls. - -Even if we returned a single result for pure calls, it'd still be -right to wrap it in a singleton unboxed tuple, because the result -might be a Haskell closure pointer, we don't want to evaluate it. -} - ----------------------------------- -cgOpApp :: StgOp -- The op - -> [StgArg] -- Arguments - -> Type -- Result type (always an unboxed tuple) - -> FCode ReturnKind - --- Foreign calls -cgOpApp (StgFCallOp fcall ty) stg_args res_ty - = cgForeignCall fcall ty stg_args res_ty - -- Note [Foreign call results] - --- tagToEnum# is special: we need to pull the constructor --- out of the table, and perform an appropriate return. - -cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty - = ASSERT(isEnumerationTyCon tycon) - do { dflags <- getDynFlags - ; args' <- getNonVoidArgAmodes [arg] - ; let amode = case args' of [amode] -> amode - _ -> panic "TagToEnumOp had void arg" - ; emitReturn [tagToClosure dflags tycon amode] } - where - -- If you're reading this code in the attempt to figure - -- out why the compiler panic'ed here, it is probably because - -- you used tagToEnum# in a non-monomorphic setting, e.g., - -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x# - -- That won't work. - tycon = tyConAppTyCon res_ty - -cgOpApp (StgPrimOp primop) args res_ty = do - dflags <- getDynFlags - cmm_args <- getNonVoidArgAmodes args - case shouldInlinePrimOp dflags primop cmm_args of - Nothing -> do -- out-of-line - let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop)) - emitCall (NativeNodeCall, NativeReturn) fun cmm_args - - Just f -- inline - | ReturnsPrim VoidRep <- result_info - -> do f [] - emitReturn [] - - | ReturnsPrim rep <- result_info - -> do dflags <- getDynFlags - res <- newTemp (primRepCmmType dflags rep) - f [res] - emitReturn [CmmReg (CmmLocal res)] - - | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon - -> do (regs, _hints) <- newUnboxedTupleRegs res_ty - f regs - emitReturn (map (CmmReg . CmmLocal) regs) - - | otherwise -> panic "cgPrimop" - where - result_info = getPrimOpResultInfo primop - -cgOpApp (StgPrimCallOp primcall) args _res_ty - = do { cmm_args <- getNonVoidArgAmodes args - ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall)) - ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args } - --- | Interpret the argument as an unsigned value, assuming the value --- is given in two-complement form in the given width. --- --- Example: @asUnsigned W64 (-1)@ is 18446744073709551615. --- --- This function is used to work around the fact that many array --- primops take Int# arguments, but we interpret them as unsigned --- quantities in the code gen. This means that we have to be careful --- every time we work on e.g. a CmmInt literal that corresponds to the --- array size, as it might contain a negative Integer value if the --- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int# --- literal. -asUnsigned :: Width -> Integer -> Integer -asUnsigned w n = n .&. (bit (widthInBits w) - 1) - --- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use --- ByteOff (or some other fixed width signed type) to represent --- array sizes or indices. This means that these will overflow for --- large enough sizes. - --- | Decide whether an out-of-line primop should be replaced by an --- inline implementation. This might happen e.g. if there's enough --- static information, such as statically know arguments, to emit a --- more efficient implementation inline. --- --- Returns 'Nothing' if this primop should use its out-of-line --- implementation (defined elsewhere) and 'Just' together with a code --- generating function that takes the output regs as arguments --- otherwise. -shouldInlinePrimOp :: DynFlags - -> PrimOp -- ^ The primop - -> [CmmExpr] -- ^ The primop arguments - -> Maybe ([LocalReg] -> FCode ()) - -shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))] - | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> doNewByteArrayOp res (fromInteger n) - -shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init] - | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> - doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel - [ (mkIntExpr dflags (fromInteger n), - fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags) - , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))), - fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags) - ] - (fromInteger n) init - -shouldInlinePrimOp _ CopyArrayOp - [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] = - Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n) - -shouldInlinePrimOp _ CopyMutableArrayOp - [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] = - Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n) - -shouldInlinePrimOp _ CopyArrayArrayOp - [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] = - Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n) - -shouldInlinePrimOp _ CopyMutableArrayArrayOp - [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] = - Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n) - -shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) - -shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n) - -shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) - -shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n) - -shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init] - | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> - doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel - [ (mkIntExpr dflags (fromInteger n), - fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags) - ] - (fromInteger n) init - -shouldInlinePrimOp _ CopySmallArrayOp - [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] = - Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n) - -shouldInlinePrimOp _ CopySmallMutableArrayOp - [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] = - Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n) - -shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) - -shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n) - -shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) - -shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) = - Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n) - -shouldInlinePrimOp dflags primop args - | primOpOutOfLine primop = Nothing - | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args - --- TODO: Several primops, such as 'copyArray#', only have an inline --- implementation (below) but could possibly have both an inline --- implementation and an out-of-line implementation, just like --- 'newArray#'. This would lower the amount of code generated, --- hopefully without a performance impact (needs to be measured). - ---------------------------------------------------- -cgPrimOp :: [LocalReg] -- where to put the results - -> PrimOp -- the op - -> [StgArg] -- arguments - -> FCode () - -cgPrimOp results op args - = do dflags <- getDynFlags - arg_exprs <- getNonVoidArgAmodes args - emitPrimOp dflags results op arg_exprs - - ------------------------------------------------------------------------- --- Emitting code for a primop ------------------------------------------------------------------------- - -emitPrimOp :: DynFlags - -> [LocalReg] -- where to put the results - -> PrimOp -- the op - -> [CmmExpr] -- arguments - -> FCode () - --- First we handle various awkward cases specially. The remaining --- easy cases are then handled by translateOp, defined below. - -emitPrimOp _ [res] ParOp [arg] - = - -- for now, just implement this in a C function - -- later, we might want to inline it. - emitCCall - [(res,NoHint)] - (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction))) - [(baseExpr, AddrHint), (arg,AddrHint)] - -emitPrimOp dflags [res] SparkOp [arg] - = do - -- returns the value of arg in res. We're going to therefore - -- refer to arg twice (once to pass to newSpark(), and once to - -- assign to res), so put it in a temporary. - tmp <- assignTemp arg - tmp2 <- newTemp (bWord dflags) - emitCCall - [(tmp2,NoHint)] - (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction))) - [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)] - emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp)) - -emitPrimOp dflags [res] GetCCSOfOp [arg] - = emitAssign (CmmLocal res) val - where - val - | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg) - | otherwise = CmmLit (zeroCLit dflags) - -emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg] - = emitAssign (CmmLocal res) cccsExpr - -emitPrimOp _ [res] MyThreadIdOp [] - = emitAssign (CmmLocal res) currentTSOExpr - -emitPrimOp dflags [res] ReadMutVarOp [mutv] - = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags)) - -emitPrimOp dflags res@[] WriteMutVarOp [mutv,var] - = do -- Without this write barrier, other CPUs may see this pointer before - -- the writes for the closure it points to have occurred. - emitPrimCall res MO_WriteBarrier [] - emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var - emitCCall - [{-no results-}] - (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) - [(baseExpr, AddrHint), (mutv,AddrHint)] - --- #define sizzeofByteArrayzh(r,a) \ --- r = ((StgArrBytes *)(a))->bytes -emitPrimOp dflags [res] SizeofByteArrayOp [arg] - = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags)) - --- #define sizzeofMutableByteArrayzh(r,a) \ --- r = ((StgArrBytes *)(a))->bytes -emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg] - = emitPrimOp dflags [res] SizeofByteArrayOp [arg] - --- #define getSizzeofMutableByteArrayzh(r,a) \ --- r = ((StgArrBytes *)(a))->bytes -emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg] - = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags)) - - --- #define touchzh(o) /* nothing */ -emitPrimOp _ res@[] TouchOp args@[_arg] - = do emitPrimCall res MO_Touch args - --- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a) -emitPrimOp dflags [res] ByteArrayContents_Char [arg] - = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags)) - --- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn) -emitPrimOp dflags [res] StableNameToIntOp [arg] - = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags)) - -emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2] - = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2]) - --- #define addrToHValuezh(r,a) r=(P_)a -emitPrimOp _ [res] AddrToAnyOp [arg] - = emitAssign (CmmLocal res) arg - --- #define hvalueToAddrzh(r, a) r=(W_)a -emitPrimOp _ [res] AnyToAddrOp [arg] - = emitAssign (CmmLocal res) arg - -{- Freezing arrays-of-ptrs requires changing an info table, for the - benefit of the generational collector. It needs to scavenge mutable - objects, even if they are in old space. When they become immutable, - they can be removed from this scavenge list. -} - --- #define unsafeFreezzeArrayzh(r,a) --- { --- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN_DIRTY_info); --- r = a; --- } -emitPrimOp _ [res] UnsafeFreezeArrayOp [arg] - = emit $ catAGraphs - [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)), - mkAssign (CmmLocal res) arg ] -emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg] - = emit $ catAGraphs - [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)), - mkAssign (CmmLocal res) arg ] -emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg] - = emit $ catAGraphs - [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN_DIRTY_infoLabel)), - mkAssign (CmmLocal res) arg ] - --- #define unsafeFreezzeByteArrayzh(r,a) r=(a) -emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg] - = emitAssign (CmmLocal res) arg - --- Reading/writing pointer arrays - -emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix -emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix -emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v - -emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix -emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix -emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix -emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix -emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix -emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix -emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v -emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v -emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v -emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v - -emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix -emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix -emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v - --- Getting the size of pointer arrays - -emitPrimOp dflags [res] SizeofArrayOp [arg] - = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg - (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags)) - (bWord dflags)) -emitPrimOp dflags [res] SizeofMutableArrayOp [arg] - = emitPrimOp dflags [res] SizeofArrayOp [arg] -emitPrimOp dflags [res] SizeofArrayArrayOp [arg] - = emitPrimOp dflags [res] SizeofArrayOp [arg] -emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg] - = emitPrimOp dflags [res] SizeofArrayOp [arg] - -emitPrimOp dflags [res] SizeofSmallArrayOp [arg] = - emit $ mkAssign (CmmLocal res) - (cmmLoadIndexW dflags arg - (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags)) - (bWord dflags)) -emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] = - emitPrimOp dflags [res] SizeofSmallArrayOp [arg] - --- IndexXXXoffAddr - -emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args -emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args -emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args -emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args -emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args -emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args -emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args -emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args -emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args -emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args -emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args -emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args -emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args -emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args -emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args -emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args - --- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr. - -emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args -emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args -emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args -emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args -emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args -emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args -emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args -emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args -emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args -emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args -emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args -emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args -emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args -emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args -emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args -emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args - --- IndexXXXArray - -emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args -emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args -emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args -emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args -emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args -emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args -emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args -emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args -emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args -emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args -emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args -emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args -emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args -emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args -emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args -emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args - --- ReadXXXArray, identical to IndexXXXArray. - -emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args -emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args -emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args -emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args -emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args -emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args -emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args -emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args -emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args -emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args -emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args -emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args -emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args -emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args -emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args -emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args - --- IndexWord8ArrayAsXXX - -emitPrimOp dflags res IndexByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args -emitPrimOp dflags res IndexByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args -emitPrimOp dflags res IndexByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args -emitPrimOp dflags res IndexByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args -emitPrimOp dflags res IndexByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args -emitPrimOp _ res IndexByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args -emitPrimOp _ res IndexByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args -emitPrimOp dflags res IndexByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args -emitPrimOp dflags res IndexByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args -emitPrimOp dflags res IndexByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args -emitPrimOp _ res IndexByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args -emitPrimOp dflags res IndexByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args -emitPrimOp dflags res IndexByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args -emitPrimOp _ res IndexByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args - --- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX - -emitPrimOp dflags res ReadByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args -emitPrimOp dflags res ReadByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args -emitPrimOp dflags res ReadByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args -emitPrimOp dflags res ReadByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args -emitPrimOp dflags res ReadByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args -emitPrimOp _ res ReadByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args -emitPrimOp _ res ReadByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args -emitPrimOp dflags res ReadByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args -emitPrimOp dflags res ReadByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args -emitPrimOp dflags res ReadByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args -emitPrimOp _ res ReadByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args -emitPrimOp dflags res ReadByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args -emitPrimOp dflags res ReadByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args -emitPrimOp _ res ReadByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args - --- WriteXXXoffAddr - -emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args -emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args -emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args -emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args -emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args -emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args -emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args -emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args -emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args -emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args -emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args -emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args -emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args -emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args -emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args -emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args - --- WriteXXXArray - -emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args -emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args -emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args -emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args -emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args -emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args -emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args -emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args -emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args -emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args -emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args -emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args -emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args -emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args -emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args -emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args - --- WriteInt8ArrayAsXXX - -emitPrimOp dflags res WriteByteArrayOp_Word8AsChar args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args -emitPrimOp dflags res WriteByteArrayOp_Word8AsWideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args -emitPrimOp _ res WriteByteArrayOp_Word8AsInt args = doWriteByteArrayOp Nothing b8 res args -emitPrimOp _ res WriteByteArrayOp_Word8AsWord args = doWriteByteArrayOp Nothing b8 res args -emitPrimOp _ res WriteByteArrayOp_Word8AsAddr args = doWriteByteArrayOp Nothing b8 res args -emitPrimOp _ res WriteByteArrayOp_Word8AsFloat args = doWriteByteArrayOp Nothing b8 res args -emitPrimOp _ res WriteByteArrayOp_Word8AsDouble args = doWriteByteArrayOp Nothing b8 res args -emitPrimOp _ res WriteByteArrayOp_Word8AsStablePtr args = doWriteByteArrayOp Nothing b8 res args -emitPrimOp dflags res WriteByteArrayOp_Word8AsInt16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args -emitPrimOp dflags res WriteByteArrayOp_Word8AsInt32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args -emitPrimOp _ res WriteByteArrayOp_Word8AsInt64 args = doWriteByteArrayOp Nothing b8 res args -emitPrimOp dflags res WriteByteArrayOp_Word8AsWord16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args -emitPrimOp dflags res WriteByteArrayOp_Word8AsWord32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args -emitPrimOp _ res WriteByteArrayOp_Word8AsWord64 args = doWriteByteArrayOp Nothing b8 res args - --- Copying and setting byte arrays -emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] = - doCopyByteArrayOp src src_off dst dst_off n -emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] = - doCopyMutableByteArrayOp src src_off dst dst_off n -emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] = - doCopyByteArrayToAddrOp src src_off dst n -emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] = - doCopyMutableByteArrayToAddrOp src src_off dst n -emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] = - doCopyAddrToByteArrayOp src dst dst_off n -emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] = - doSetByteArrayOp ba off len c - --- Comparing byte arrays -emitPrimOp _ [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] = - doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n - -emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16 -emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32 -emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64 -emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags) - -emitPrimOp _ [res] BRev8Op [w] = emitBRevCall res w W8 -emitPrimOp _ [res] BRev16Op [w] = emitBRevCall res w W16 -emitPrimOp _ [res] BRev32Op [w] = emitBRevCall res w W32 -emitPrimOp _ [res] BRev64Op [w] = emitBRevCall res w W64 -emitPrimOp dflags [res] BRevOp [w] = emitBRevCall res w (wordWidth dflags) - --- Population count -emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8 -emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16 -emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32 -emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64 -emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags) - --- Parallel bit deposit -emitPrimOp _ [res] Pdep8Op [src, mask] = emitPdepCall res src mask W8 -emitPrimOp _ [res] Pdep16Op [src, mask] = emitPdepCall res src mask W16 -emitPrimOp _ [res] Pdep32Op [src, mask] = emitPdepCall res src mask W32 -emitPrimOp _ [res] Pdep64Op [src, mask] = emitPdepCall res src mask W64 -emitPrimOp dflags [res] PdepOp [src, mask] = emitPdepCall res src mask (wordWidth dflags) - --- Parallel bit extract -emitPrimOp _ [res] Pext8Op [src, mask] = emitPextCall res src mask W8 -emitPrimOp _ [res] Pext16Op [src, mask] = emitPextCall res src mask W16 -emitPrimOp _ [res] Pext32Op [src, mask] = emitPextCall res src mask W32 -emitPrimOp _ [res] Pext64Op [src, mask] = emitPextCall res src mask W64 -emitPrimOp dflags [res] PextOp [src, mask] = emitPextCall res src mask (wordWidth dflags) - --- count leading zeros -emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8 -emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16 -emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32 -emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64 -emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags) - --- count trailing zeros -emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8 -emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16 -emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32 -emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64 -emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags) - --- Unsigned int to floating point conversions -emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res] - (MO_UF_Conv W32) [w] -emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res] - (MO_UF_Conv W64) [w] - --- SIMD primops -emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do - checkVecCompatibility dflags vcat n w - doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res - where - zeros :: CmmExpr - zeros = CmmLit $ CmmVec (replicate n zero) - - zero :: CmmLit - zero = case vcat of - IntVec -> CmmInt 0 w - WordVec -> CmmInt 0 w - FloatVec -> CmmFloat 0 w - - ty :: CmmType - ty = vecVmmType vcat n w - -emitPrimOp dflags [res] (VecPackOp vcat n w) es = do - checkVecCompatibility dflags vcat n w - when (es `lengthIsNot` n) $ - panic "emitPrimOp: VecPackOp has wrong number of arguments" - doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res - where - zeros :: CmmExpr - zeros = CmmLit $ CmmVec (replicate n zero) - - zero :: CmmLit - zero = case vcat of - IntVec -> CmmInt 0 w - WordVec -> CmmInt 0 w - FloatVec -> CmmFloat 0 w - - ty :: CmmType - ty = vecVmmType vcat n w - -emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do - checkVecCompatibility dflags vcat n w - when (res `lengthIsNot` n) $ - panic "emitPrimOp: VecUnpackOp has wrong number of results" - doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res - where - ty :: CmmType - ty = vecVmmType vcat n w - -emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do - checkVecCompatibility dflags vcat n w - doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res - where - ty :: CmmType - ty = vecVmmType vcat n w - -emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do - checkVecCompatibility dflags vcat n w - doIndexByteArrayOp Nothing ty res args - where - ty :: CmmType - ty = vecVmmType vcat n w - -emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do - checkVecCompatibility dflags vcat n w - doIndexByteArrayOp Nothing ty res args - where - ty :: CmmType - ty = vecVmmType vcat n w - -emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do - checkVecCompatibility dflags vcat n w - doWriteByteArrayOp Nothing ty res args - where - ty :: CmmType - ty = vecVmmType vcat n w - -emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do - checkVecCompatibility dflags vcat n w - doIndexOffAddrOp Nothing ty res args - where - ty :: CmmType - ty = vecVmmType vcat n w - -emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do - checkVecCompatibility dflags vcat n w - doIndexOffAddrOp Nothing ty res args - where - ty :: CmmType - ty = vecVmmType vcat n w - -emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do - checkVecCompatibility dflags vcat n w - doWriteOffAddrOp Nothing ty res args - where - ty :: CmmType - ty = vecVmmType vcat n w - -emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do - checkVecCompatibility dflags vcat n w - doIndexByteArrayOpAs Nothing vecty ty res args - where - vecty :: CmmType - vecty = vecVmmType vcat n w - - ty :: CmmType - ty = vecCmmCat vcat w - -emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do - checkVecCompatibility dflags vcat n w - doIndexByteArrayOpAs Nothing vecty ty res args - where - vecty :: CmmType - vecty = vecVmmType vcat n w - - ty :: CmmType - ty = vecCmmCat vcat w - -emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do - checkVecCompatibility dflags vcat n w - doWriteByteArrayOp Nothing ty res args - where - ty :: CmmType - ty = vecCmmCat vcat w - -emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do - checkVecCompatibility dflags vcat n w - doIndexOffAddrOpAs Nothing vecty ty res args - where - vecty :: CmmType - vecty = vecVmmType vcat n w - - ty :: CmmType - ty = vecCmmCat vcat w - -emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do - checkVecCompatibility dflags vcat n w - doIndexOffAddrOpAs Nothing vecty ty res args - where - vecty :: CmmType - vecty = vecVmmType vcat n w - - ty :: CmmType - ty = vecCmmCat vcat w - -emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do - checkVecCompatibility dflags vcat n w - doWriteOffAddrOp Nothing ty res args - where - ty :: CmmType - ty = vecCmmCat vcat w - --- Prefetch -emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args -emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args -emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args -emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args - -emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args -emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args -emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args -emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args - -emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args -emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args -emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args -emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args - -emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args -emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args -emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args -emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args - --- Atomic read-modify-write -emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] = - doAtomicRMW res AMO_Add mba ix (bWord dflags) n -emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] = - doAtomicRMW res AMO_Sub mba ix (bWord dflags) n -emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] = - doAtomicRMW res AMO_And mba ix (bWord dflags) n -emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] = - doAtomicRMW res AMO_Nand mba ix (bWord dflags) n -emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] = - doAtomicRMW res AMO_Or mba ix (bWord dflags) n -emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] = - doAtomicRMW res AMO_Xor mba ix (bWord dflags) n -emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] = - doAtomicReadByteArray res mba ix (bWord dflags) -emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] = - doAtomicWriteByteArray mba ix (bWord dflags) val -emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] = - doCasByteArray res mba ix (bWord dflags) old new - --- The rest just translate straightforwardly -emitPrimOp dflags [res] op [arg] - | nopOp op - = emitAssign (CmmLocal res) arg - - | Just (mop,rep) <- narrowOp op - = emitAssign (CmmLocal res) $ - CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]] - -emitPrimOp dflags r@[res] op args - | Just prim <- callishOp op - = do emitPrimCall r prim args - - | Just mop <- translateOp dflags op - = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in - emit stmt - -emitPrimOp dflags results op args - = case callishPrimOpSupported dflags op args of - Left op -> emit $ mkUnsafeCall (PrimTarget op) results args - Right gen -> gen results args - --- Note [QuotRem optimization] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- --- `quot` and `rem` with constant divisor can be implemented with fast bit-ops --- (shift, .&.). --- --- Currently we only support optimization (performed in CmmOpt) when the --- constant is a power of 2. #9041 tracks the implementation of the general --- optimization. --- --- `quotRem` can be optimized in the same way. However as it returns two values, --- it is implemented as a "callish" primop which is harder to match and --- to transform later on. For simplicity, the current implementation detects cases --- that can be optimized (see `quotRemCanBeOptimized`) and converts STG quotRem --- primop into two CMM quot and rem primops. - -type GenericOp = [CmmFormal] -> [CmmActual] -> FCode () - -callishPrimOpSupported :: DynFlags -> PrimOp -> [CmmExpr] -> Either CallishMachOp GenericOp -callishPrimOpSupported dflags op args - = case op of - IntQuotRemOp | ncg && (x86ish || ppc) - , not quotRemCanBeOptimized - -> Left (MO_S_QuotRem (wordWidth dflags)) - | otherwise - -> Right (genericIntQuotRemOp (wordWidth dflags)) - - Int8QuotRemOp | ncg && (x86ish || ppc) - , not quotRemCanBeOptimized - -> Left (MO_S_QuotRem W8) - | otherwise -> Right (genericIntQuotRemOp W8) - - Int16QuotRemOp | ncg && (x86ish || ppc) - , not quotRemCanBeOptimized - -> Left (MO_S_QuotRem W16) - | otherwise -> Right (genericIntQuotRemOp W16) - - - WordQuotRemOp | ncg && (x86ish || ppc) - , not quotRemCanBeOptimized - -> Left (MO_U_QuotRem (wordWidth dflags)) - | otherwise - -> Right (genericWordQuotRemOp (wordWidth dflags)) - - WordQuotRem2Op | (ncg && (x86ish || ppc)) - || llvm -> Left (MO_U_QuotRem2 (wordWidth dflags)) - | otherwise -> Right (genericWordQuotRem2Op dflags) - - Word8QuotRemOp | ncg && (x86ish || ppc) - , not quotRemCanBeOptimized - -> Left (MO_U_QuotRem W8) - | otherwise -> Right (genericWordQuotRemOp W8) - - Word16QuotRemOp| ncg && (x86ish || ppc) - , not quotRemCanBeOptimized - -> Left (MO_U_QuotRem W16) - | otherwise -> Right (genericWordQuotRemOp W16) - - WordAdd2Op | (ncg && (x86ish || ppc)) - || llvm -> Left (MO_Add2 (wordWidth dflags)) - | otherwise -> Right genericWordAdd2Op - - WordAddCOp | (ncg && (x86ish || ppc)) - || llvm -> Left (MO_AddWordC (wordWidth dflags)) - | otherwise -> Right genericWordAddCOp - - WordSubCOp | (ncg && (x86ish || ppc)) - || llvm -> Left (MO_SubWordC (wordWidth dflags)) - | otherwise -> Right genericWordSubCOp - - IntAddCOp | (ncg && (x86ish || ppc)) - || llvm -> Left (MO_AddIntC (wordWidth dflags)) - | otherwise -> Right genericIntAddCOp - - IntSubCOp | (ncg && (x86ish || ppc)) - || llvm -> Left (MO_SubIntC (wordWidth dflags)) - | otherwise -> Right genericIntSubCOp - - WordMul2Op | ncg && (x86ish || ppc) - || llvm -> Left (MO_U_Mul2 (wordWidth dflags)) - | otherwise -> Right genericWordMul2Op - FloatFabsOp | (ncg && x86ish || ppc) - || llvm -> Left MO_F32_Fabs - | otherwise -> Right $ genericFabsOp W32 - DoubleFabsOp | (ncg && x86ish || ppc) - || llvm -> Left MO_F64_Fabs - | otherwise -> Right $ genericFabsOp W64 - - _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op) - where - -- See Note [QuotRem optimization] - quotRemCanBeOptimized = case args of - [_, CmmLit (CmmInt n _) ] -> isJust (exactLog2 n) - _ -> False - - ncg = case hscTarget dflags of - HscAsm -> True - _ -> False - llvm = case hscTarget dflags of - HscLlvm -> True - _ -> False - x86ish = case platformArch (targetPlatform dflags) of - ArchX86 -> True - ArchX86_64 -> True - _ -> False - ppc = case platformArch (targetPlatform dflags) of - ArchPPC -> True - ArchPPC_64 _ -> True - _ -> False - -genericIntQuotRemOp :: Width -> GenericOp -genericIntQuotRemOp width [res_q, res_r] [arg_x, arg_y] - = emit $ mkAssign (CmmLocal res_q) - (CmmMachOp (MO_S_Quot width) [arg_x, arg_y]) <*> - mkAssign (CmmLocal res_r) - (CmmMachOp (MO_S_Rem width) [arg_x, arg_y]) -genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp" - -genericWordQuotRemOp :: Width -> GenericOp -genericWordQuotRemOp width [res_q, res_r] [arg_x, arg_y] - = emit $ mkAssign (CmmLocal res_q) - (CmmMachOp (MO_U_Quot width) [arg_x, arg_y]) <*> - mkAssign (CmmLocal res_r) - (CmmMachOp (MO_U_Rem width) [arg_x, arg_y]) -genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp" - -genericWordQuotRem2Op :: DynFlags -> GenericOp -genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y] - = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low - where ty = cmmExprType dflags arg_x_high - shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i] - shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i] - or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y] - ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y] - ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y] - minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y] - times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y] - zero = lit 0 - one = lit 1 - negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1) - lit i = CmmLit (CmmInt i (wordWidth dflags)) - - f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph - f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*> - mkAssign (CmmLocal res_r) high) - f i acc high low = - do roverflowedBit <- newTemp ty - rhigh' <- newTemp ty - rhigh'' <- newTemp ty - rlow' <- newTemp ty - risge <- newTemp ty - racc' <- newTemp ty - let high' = CmmReg (CmmLocal rhigh') - isge = CmmReg (CmmLocal risge) - overflowedBit = CmmReg (CmmLocal roverflowedBit) - let this = catAGraphs - [mkAssign (CmmLocal roverflowedBit) - (shr high negone), - mkAssign (CmmLocal rhigh') - (or (shl high one) (shr low negone)), - mkAssign (CmmLocal rlow') - (shl low one), - mkAssign (CmmLocal risge) - (or (overflowedBit `ne` zero) - (high' `ge` arg_y)), - mkAssign (CmmLocal rhigh'') - (high' `minus` (arg_y `times` isge)), - mkAssign (CmmLocal racc') - (or (shl acc one) isge)] - rest <- f (i - 1) (CmmReg (CmmLocal racc')) - (CmmReg (CmmLocal rhigh'')) - (CmmReg (CmmLocal rlow')) - return (this <*> rest) -genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op" - -genericWordAdd2Op :: GenericOp -genericWordAdd2Op [res_h, res_l] [arg_x, arg_y] - = do dflags <- getDynFlags - r1 <- newTemp (cmmExprType dflags arg_x) - r2 <- newTemp (cmmExprType dflags arg_x) - let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww] - toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww] - bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm] - add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y] - or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y] - hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags))) - (wordWidth dflags)) - hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags)) - emit $ catAGraphs - [mkAssign (CmmLocal r1) - (add (bottomHalf arg_x) (bottomHalf arg_y)), - mkAssign (CmmLocal r2) - (add (topHalf (CmmReg (CmmLocal r1))) - (add (topHalf arg_x) (topHalf arg_y))), - mkAssign (CmmLocal res_h) - (topHalf (CmmReg (CmmLocal r2))), - mkAssign (CmmLocal res_l) - (or (toTopHalf (CmmReg (CmmLocal r2))) - (bottomHalf (CmmReg (CmmLocal r1))))] -genericWordAdd2Op _ _ = panic "genericWordAdd2Op" - --- | Implements branchless recovery of the carry flag @c@ by checking the --- leftmost bits of both inputs @a@ and @b@ and result @r = a + b@: --- --- @ --- c = a&b | (a|b)&~r --- @ --- --- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/ -genericWordAddCOp :: GenericOp -genericWordAddCOp [res_r, res_c] [aa, bb] - = do dflags <- getDynFlags - emit $ catAGraphs [ - mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]), - mkAssign (CmmLocal res_c) $ - CmmMachOp (mo_wordUShr dflags) [ - CmmMachOp (mo_wordOr dflags) [ - CmmMachOp (mo_wordAnd dflags) [aa,bb], - CmmMachOp (mo_wordAnd dflags) [ - CmmMachOp (mo_wordOr dflags) [aa,bb], - CmmMachOp (mo_wordNot dflags) [CmmReg (CmmLocal res_r)] - ] - ], - mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1) - ] - ] -genericWordAddCOp _ _ = panic "genericWordAddCOp" - --- | Implements branchless recovery of the carry flag @c@ by checking the --- leftmost bits of both inputs @a@ and @b@ and result @r = a - b@: --- --- @ --- c = ~a&b | (~a|b)&r --- @ --- --- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/ -genericWordSubCOp :: GenericOp -genericWordSubCOp [res_r, res_c] [aa, bb] - = do dflags <- getDynFlags - emit $ catAGraphs [ - mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]), - mkAssign (CmmLocal res_c) $ - CmmMachOp (mo_wordUShr dflags) [ - CmmMachOp (mo_wordOr dflags) [ - CmmMachOp (mo_wordAnd dflags) [ - CmmMachOp (mo_wordNot dflags) [aa], - bb - ], - CmmMachOp (mo_wordAnd dflags) [ - CmmMachOp (mo_wordOr dflags) [ - CmmMachOp (mo_wordNot dflags) [aa], - bb - ], - CmmReg (CmmLocal res_r) - ] - ], - mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1) - ] - ] -genericWordSubCOp _ _ = panic "genericWordSubCOp" - -genericIntAddCOp :: GenericOp -genericIntAddCOp [res_r, res_c] [aa, bb] -{- - With some bit-twiddling, we can define int{Add,Sub}Czh portably in - C, and without needing any comparisons. This may not be the - fastest way to do it - if you have better code, please send it! --SDM - - Return : r = a + b, c = 0 if no overflow, 1 on overflow. - - We currently don't make use of the r value if c is != 0 (i.e. - overflow), we just convert to big integers and try again. This - could be improved by making r and c the correct values for - plugging into a new J#. - - { r = ((I_)(a)) + ((I_)(b)); \ - c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ - >> (BITS_IN (I_) - 1); \ - } - Wading through the mass of bracketry, it seems to reduce to: - c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1) - --} - = do dflags <- getDynFlags - emit $ catAGraphs [ - mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]), - mkAssign (CmmLocal res_c) $ - CmmMachOp (mo_wordUShr dflags) [ - CmmMachOp (mo_wordAnd dflags) [ - CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]], - CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)] - ], - mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1) - ] - ] -genericIntAddCOp _ _ = panic "genericIntAddCOp" - -genericIntSubCOp :: GenericOp -genericIntSubCOp [res_r, res_c] [aa, bb] -{- Similarly: - #define subIntCzh(r,c,a,b) \ - { r = ((I_)(a)) - ((I_)(b)); \ - c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ - >> (BITS_IN (I_) - 1); \ - } - - c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1) --} - = do dflags <- getDynFlags - emit $ catAGraphs [ - mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]), - mkAssign (CmmLocal res_c) $ - CmmMachOp (mo_wordUShr dflags) [ - CmmMachOp (mo_wordAnd dflags) [ - CmmMachOp (mo_wordXor dflags) [aa,bb], - CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)] - ], - mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1) - ] - ] -genericIntSubCOp _ _ = panic "genericIntSubCOp" - -genericWordMul2Op :: GenericOp -genericWordMul2Op [res_h, res_l] [arg_x, arg_y] - = do dflags <- getDynFlags - let t = cmmExprType dflags arg_x - xlyl <- liftM CmmLocal $ newTemp t - xlyh <- liftM CmmLocal $ newTemp t - xhyl <- liftM CmmLocal $ newTemp t - r <- liftM CmmLocal $ newTemp t - -- This generic implementation is very simple and slow. We might - -- well be able to do better, but for now this at least works. - let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww] - toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww] - bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm] - add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y] - sum = foldl1 add - mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y] - or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y] - hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags))) - (wordWidth dflags)) - hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags)) - emit $ catAGraphs - [mkAssign xlyl - (mul (bottomHalf arg_x) (bottomHalf arg_y)), - mkAssign xlyh - (mul (bottomHalf arg_x) (topHalf arg_y)), - mkAssign xhyl - (mul (topHalf arg_x) (bottomHalf arg_y)), - mkAssign r - (sum [topHalf (CmmReg xlyl), - bottomHalf (CmmReg xhyl), - bottomHalf (CmmReg xlyh)]), - mkAssign (CmmLocal res_l) - (or (bottomHalf (CmmReg xlyl)) - (toTopHalf (CmmReg r))), - mkAssign (CmmLocal res_h) - (sum [mul (topHalf arg_x) (topHalf arg_y), - topHalf (CmmReg xhyl), - topHalf (CmmReg xlyh), - topHalf (CmmReg r)])] -genericWordMul2Op _ _ = panic "genericWordMul2Op" - --- This replicates what we had in libraries/base/GHC/Float.hs: --- --- abs x | x == 0 = 0 -- handles (-0.0) --- | x > 0 = x --- | otherwise = negateFloat x -genericFabsOp :: Width -> GenericOp -genericFabsOp w [res_r] [aa] - = do dflags <- getDynFlags - let zero = CmmLit (CmmFloat 0 w) - - eq x y = CmmMachOp (MO_F_Eq w) [x, y] - gt x y = CmmMachOp (MO_F_Gt w) [x, y] - - neg x = CmmMachOp (MO_F_Neg w) [x] - - g1 = catAGraphs [mkAssign (CmmLocal res_r) zero] - g2 = catAGraphs [mkAssign (CmmLocal res_r) aa] - - res_t <- CmmLocal <$> newTemp (cmmExprType dflags aa) - let g3 = catAGraphs [mkAssign res_t aa, - mkAssign (CmmLocal res_r) (neg (CmmReg res_t))] - - g4 <- mkCmmIfThenElse (gt aa zero) g2 g3 - - emit =<< mkCmmIfThenElse (eq aa zero) g1 g4 - -genericFabsOp _ _ _ = panic "genericFabsOp" - --- These PrimOps are NOPs in Cmm - -nopOp :: PrimOp -> Bool -nopOp Int2WordOp = True -nopOp Word2IntOp = True -nopOp Int2AddrOp = True -nopOp Addr2IntOp = True -nopOp ChrOp = True -- Int# and Char# are rep'd the same -nopOp OrdOp = True -nopOp _ = False - --- These PrimOps turn into double casts - -narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width) -narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8) -narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16) -narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32) -narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8) -narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16) -narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32) -narrowOp _ = Nothing - --- Native word signless ops - -translateOp :: DynFlags -> PrimOp -> Maybe MachOp -translateOp dflags IntAddOp = Just (mo_wordAdd dflags) -translateOp dflags IntSubOp = Just (mo_wordSub dflags) -translateOp dflags WordAddOp = Just (mo_wordAdd dflags) -translateOp dflags WordSubOp = Just (mo_wordSub dflags) -translateOp dflags AddrAddOp = Just (mo_wordAdd dflags) -translateOp dflags AddrSubOp = Just (mo_wordSub dflags) - -translateOp dflags IntEqOp = Just (mo_wordEq dflags) -translateOp dflags IntNeOp = Just (mo_wordNe dflags) -translateOp dflags WordEqOp = Just (mo_wordEq dflags) -translateOp dflags WordNeOp = Just (mo_wordNe dflags) -translateOp dflags AddrEqOp = Just (mo_wordEq dflags) -translateOp dflags AddrNeOp = Just (mo_wordNe dflags) - -translateOp dflags AndOp = Just (mo_wordAnd dflags) -translateOp dflags OrOp = Just (mo_wordOr dflags) -translateOp dflags XorOp = Just (mo_wordXor dflags) -translateOp dflags NotOp = Just (mo_wordNot dflags) -translateOp dflags SllOp = Just (mo_wordShl dflags) -translateOp dflags SrlOp = Just (mo_wordUShr dflags) - -translateOp dflags AddrRemOp = Just (mo_wordURem dflags) - --- Native word signed ops - -translateOp dflags IntMulOp = Just (mo_wordMul dflags) -translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags)) -translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags) -translateOp dflags IntRemOp = Just (mo_wordSRem dflags) -translateOp dflags IntNegOp = Just (mo_wordSNeg dflags) - - -translateOp dflags IntGeOp = Just (mo_wordSGe dflags) -translateOp dflags IntLeOp = Just (mo_wordSLe dflags) -translateOp dflags IntGtOp = Just (mo_wordSGt dflags) -translateOp dflags IntLtOp = Just (mo_wordSLt dflags) - -translateOp dflags AndIOp = Just (mo_wordAnd dflags) -translateOp dflags OrIOp = Just (mo_wordOr dflags) -translateOp dflags XorIOp = Just (mo_wordXor dflags) -translateOp dflags NotIOp = Just (mo_wordNot dflags) -translateOp dflags ISllOp = Just (mo_wordShl dflags) -translateOp dflags ISraOp = Just (mo_wordSShr dflags) -translateOp dflags ISrlOp = Just (mo_wordUShr dflags) - --- Native word unsigned ops - -translateOp dflags WordGeOp = Just (mo_wordUGe dflags) -translateOp dflags WordLeOp = Just (mo_wordULe dflags) -translateOp dflags WordGtOp = Just (mo_wordUGt dflags) -translateOp dflags WordLtOp = Just (mo_wordULt dflags) - -translateOp dflags WordMulOp = Just (mo_wordMul dflags) -translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags) -translateOp dflags WordRemOp = Just (mo_wordURem dflags) - -translateOp dflags AddrGeOp = Just (mo_wordUGe dflags) -translateOp dflags AddrLeOp = Just (mo_wordULe dflags) -translateOp dflags AddrGtOp = Just (mo_wordUGt dflags) -translateOp dflags AddrLtOp = Just (mo_wordULt dflags) - --- Int8# signed ops - -translateOp dflags Int8Extend = Just (MO_SS_Conv W8 (wordWidth dflags)) -translateOp dflags Int8Narrow = Just (MO_SS_Conv (wordWidth dflags) W8) -translateOp _ Int8NegOp = Just (MO_S_Neg W8) -translateOp _ Int8AddOp = Just (MO_Add W8) -translateOp _ Int8SubOp = Just (MO_Sub W8) -translateOp _ Int8MulOp = Just (MO_Mul W8) -translateOp _ Int8QuotOp = Just (MO_S_Quot W8) -translateOp _ Int8RemOp = Just (MO_S_Rem W8) - -translateOp _ Int8EqOp = Just (MO_Eq W8) -translateOp _ Int8GeOp = Just (MO_S_Ge W8) -translateOp _ Int8GtOp = Just (MO_S_Gt W8) -translateOp _ Int8LeOp = Just (MO_S_Le W8) -translateOp _ Int8LtOp = Just (MO_S_Lt W8) -translateOp _ Int8NeOp = Just (MO_Ne W8) - --- Word8# unsigned ops - -translateOp dflags Word8Extend = Just (MO_UU_Conv W8 (wordWidth dflags)) -translateOp dflags Word8Narrow = Just (MO_UU_Conv (wordWidth dflags) W8) -translateOp _ Word8NotOp = Just (MO_Not W8) -translateOp _ Word8AddOp = Just (MO_Add W8) -translateOp _ Word8SubOp = Just (MO_Sub W8) -translateOp _ Word8MulOp = Just (MO_Mul W8) -translateOp _ Word8QuotOp = Just (MO_U_Quot W8) -translateOp _ Word8RemOp = Just (MO_U_Rem W8) - -translateOp _ Word8EqOp = Just (MO_Eq W8) -translateOp _ Word8GeOp = Just (MO_U_Ge W8) -translateOp _ Word8GtOp = Just (MO_U_Gt W8) -translateOp _ Word8LeOp = Just (MO_U_Le W8) -translateOp _ Word8LtOp = Just (MO_U_Lt W8) -translateOp _ Word8NeOp = Just (MO_Ne W8) - --- Int16# signed ops - -translateOp dflags Int16Extend = Just (MO_SS_Conv W16 (wordWidth dflags)) -translateOp dflags Int16Narrow = Just (MO_SS_Conv (wordWidth dflags) W16) -translateOp _ Int16NegOp = Just (MO_S_Neg W16) -translateOp _ Int16AddOp = Just (MO_Add W16) -translateOp _ Int16SubOp = Just (MO_Sub W16) -translateOp _ Int16MulOp = Just (MO_Mul W16) -translateOp _ Int16QuotOp = Just (MO_S_Quot W16) -translateOp _ Int16RemOp = Just (MO_S_Rem W16) - -translateOp _ Int16EqOp = Just (MO_Eq W16) -translateOp _ Int16GeOp = Just (MO_S_Ge W16) -translateOp _ Int16GtOp = Just (MO_S_Gt W16) -translateOp _ Int16LeOp = Just (MO_S_Le W16) -translateOp _ Int16LtOp = Just (MO_S_Lt W16) -translateOp _ Int16NeOp = Just (MO_Ne W16) - --- Word16# unsigned ops - -translateOp dflags Word16Extend = Just (MO_UU_Conv W16 (wordWidth dflags)) -translateOp dflags Word16Narrow = Just (MO_UU_Conv (wordWidth dflags) W16) -translateOp _ Word16NotOp = Just (MO_Not W16) -translateOp _ Word16AddOp = Just (MO_Add W16) -translateOp _ Word16SubOp = Just (MO_Sub W16) -translateOp _ Word16MulOp = Just (MO_Mul W16) -translateOp _ Word16QuotOp = Just (MO_U_Quot W16) -translateOp _ Word16RemOp = Just (MO_U_Rem W16) - -translateOp _ Word16EqOp = Just (MO_Eq W16) -translateOp _ Word16GeOp = Just (MO_U_Ge W16) -translateOp _ Word16GtOp = Just (MO_U_Gt W16) -translateOp _ Word16LeOp = Just (MO_U_Le W16) -translateOp _ Word16LtOp = Just (MO_U_Lt W16) -translateOp _ Word16NeOp = Just (MO_Ne W16) - --- Char# ops - -translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags)) -translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags)) -translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags)) -translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags)) -translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags)) -translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags)) - --- Double ops - -translateOp _ DoubleEqOp = Just (MO_F_Eq W64) -translateOp _ DoubleNeOp = Just (MO_F_Ne W64) -translateOp _ DoubleGeOp = Just (MO_F_Ge W64) -translateOp _ DoubleLeOp = Just (MO_F_Le W64) -translateOp _ DoubleGtOp = Just (MO_F_Gt W64) -translateOp _ DoubleLtOp = Just (MO_F_Lt W64) - -translateOp _ DoubleAddOp = Just (MO_F_Add W64) -translateOp _ DoubleSubOp = Just (MO_F_Sub W64) -translateOp _ DoubleMulOp = Just (MO_F_Mul W64) -translateOp _ DoubleDivOp = Just (MO_F_Quot W64) -translateOp _ DoubleNegOp = Just (MO_F_Neg W64) - --- Float ops - -translateOp _ FloatEqOp = Just (MO_F_Eq W32) -translateOp _ FloatNeOp = Just (MO_F_Ne W32) -translateOp _ FloatGeOp = Just (MO_F_Ge W32) -translateOp _ FloatLeOp = Just (MO_F_Le W32) -translateOp _ FloatGtOp = Just (MO_F_Gt W32) -translateOp _ FloatLtOp = Just (MO_F_Lt W32) - -translateOp _ FloatAddOp = Just (MO_F_Add W32) -translateOp _ FloatSubOp = Just (MO_F_Sub W32) -translateOp _ FloatMulOp = Just (MO_F_Mul W32) -translateOp _ FloatDivOp = Just (MO_F_Quot W32) -translateOp _ FloatNegOp = Just (MO_F_Neg W32) - --- Vector ops - -translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w) -translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w) -translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w) -translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w) -translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w) - -translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w) -translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w) -translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w) -translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w) -translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w) -translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w) - -translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w) -translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w) -translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w) -translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w) -translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w) - --- Conversions - -translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64) -translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags)) - -translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32) -translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags)) - -translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64) -translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32) - --- Word comparisons masquerading as more exotic things. - -translateOp dflags SameMutVarOp = Just (mo_wordEq dflags) -translateOp dflags SameMVarOp = Just (mo_wordEq dflags) -translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags) -translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags) -translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags) -translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags) -translateOp dflags SameTVarOp = Just (mo_wordEq dflags) -translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags) --- See Note [Comparing stable names] -translateOp dflags EqStableNameOp = Just (mo_wordEq dflags) - -translateOp _ _ = Nothing - --- Note [Comparing stable names] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- --- A StableName# is actually a pointer to a stable name object (SNO) --- containing an index into the stable name table (SNT). We --- used to compare StableName#s by following the pointers to the --- SNOs and checking whether they held the same SNT indices. However, --- this is not necessary: there is a one-to-one correspondence --- between SNOs and entries in the SNT, so simple pointer equality --- does the trick. - --- These primops are implemented by CallishMachOps, because they sometimes --- turn into foreign calls depending on the backend. - -callishOp :: PrimOp -> Maybe CallishMachOp -callishOp DoublePowerOp = Just MO_F64_Pwr -callishOp DoubleSinOp = Just MO_F64_Sin -callishOp DoubleCosOp = Just MO_F64_Cos -callishOp DoubleTanOp = Just MO_F64_Tan -callishOp DoubleSinhOp = Just MO_F64_Sinh -callishOp DoubleCoshOp = Just MO_F64_Cosh -callishOp DoubleTanhOp = Just MO_F64_Tanh -callishOp DoubleAsinOp = Just MO_F64_Asin -callishOp DoubleAcosOp = Just MO_F64_Acos -callishOp DoubleAtanOp = Just MO_F64_Atan -callishOp DoubleAsinhOp = Just MO_F64_Asinh -callishOp DoubleAcoshOp = Just MO_F64_Acosh -callishOp DoubleAtanhOp = Just MO_F64_Atanh -callishOp DoubleLogOp = Just MO_F64_Log -callishOp DoubleLog1POp = Just MO_F64_Log1P -callishOp DoubleExpOp = Just MO_F64_Exp -callishOp DoubleExpM1Op = Just MO_F64_ExpM1 -callishOp DoubleSqrtOp = Just MO_F64_Sqrt - -callishOp FloatPowerOp = Just MO_F32_Pwr -callishOp FloatSinOp = Just MO_F32_Sin -callishOp FloatCosOp = Just MO_F32_Cos -callishOp FloatTanOp = Just MO_F32_Tan -callishOp FloatSinhOp = Just MO_F32_Sinh -callishOp FloatCoshOp = Just MO_F32_Cosh -callishOp FloatTanhOp = Just MO_F32_Tanh -callishOp FloatAsinOp = Just MO_F32_Asin -callishOp FloatAcosOp = Just MO_F32_Acos -callishOp FloatAtanOp = Just MO_F32_Atan -callishOp FloatAsinhOp = Just MO_F32_Asinh -callishOp FloatAcoshOp = Just MO_F32_Acosh -callishOp FloatAtanhOp = Just MO_F32_Atanh -callishOp FloatLogOp = Just MO_F32_Log -callishOp FloatLog1POp = Just MO_F32_Log1P -callishOp FloatExpOp = Just MO_F32_Exp -callishOp FloatExpM1Op = Just MO_F32_ExpM1 -callishOp FloatSqrtOp = Just MO_F32_Sqrt - -callishOp _ = Nothing - ------------------------------------------------------------------------------- --- Helpers for translating various minor variants of array indexing. - -doIndexOffAddrOp :: Maybe MachOp - -> CmmType - -> [LocalReg] - -> [CmmExpr] - -> FCode () -doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx] - = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx -doIndexOffAddrOp _ _ _ _ - = panic "StgCmmPrim: doIndexOffAddrOp" - -doIndexOffAddrOpAs :: Maybe MachOp - -> CmmType - -> CmmType - -> [LocalReg] - -> [CmmExpr] - -> FCode () -doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx] - = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx -doIndexOffAddrOpAs _ _ _ _ _ - = panic "StgCmmPrim: doIndexOffAddrOpAs" - -doIndexByteArrayOp :: Maybe MachOp - -> CmmType - -> [LocalReg] - -> [CmmExpr] - -> FCode () -doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx] - = do dflags <- getDynFlags - mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx -doIndexByteArrayOp _ _ _ _ - = panic "StgCmmPrim: doIndexByteArrayOp" - -doIndexByteArrayOpAs :: Maybe MachOp - -> CmmType - -> CmmType - -> [LocalReg] - -> [CmmExpr] - -> FCode () -doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx] - = do dflags <- getDynFlags - mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx -doIndexByteArrayOpAs _ _ _ _ _ - = panic "StgCmmPrim: doIndexByteArrayOpAs" - -doReadPtrArrayOp :: LocalReg - -> CmmExpr - -> CmmExpr - -> FCode () -doReadPtrArrayOp res addr idx - = do dflags <- getDynFlags - mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx - -doWriteOffAddrOp :: Maybe MachOp - -> CmmType - -> [LocalReg] - -> [CmmExpr] - -> FCode () -doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val] - = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val -doWriteOffAddrOp _ _ _ _ - = panic "StgCmmPrim: doWriteOffAddrOp" - -doWriteByteArrayOp :: Maybe MachOp - -> CmmType - -> [LocalReg] - -> [CmmExpr] - -> FCode () -doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val] - = do dflags <- getDynFlags - mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val -doWriteByteArrayOp _ _ _ _ - = panic "StgCmmPrim: doWriteByteArrayOp" - -doWritePtrArrayOp :: CmmExpr - -> CmmExpr - -> CmmExpr - -> FCode () -doWritePtrArrayOp addr idx val - = do dflags <- getDynFlags - let ty = cmmExprType dflags val - -- This write barrier is to ensure that the heap writes to the object - -- referred to by val have happened before we write val into the array. - -- See #12469 for details. - emitPrimCall [] MO_WriteBarrier [] - mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val - emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel))) - -- the write barrier. We must write a byte into the mark table: - -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N] - emit $ mkStore ( - cmmOffsetExpr dflags - (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags)) - (loadArrPtrsSize dflags addr)) - (CmmMachOp (mo_wordUShr dflags) [idx, - mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)]) - ) (CmmLit (CmmInt 1 W8)) - -loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr -loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags) - where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags - -mkBasicIndexedRead :: ByteOff -- Initial offset in bytes - -> Maybe MachOp -- Optional result cast - -> CmmType -- Type of element we are accessing - -> LocalReg -- Destination - -> CmmExpr -- Base address - -> CmmType -- Type of element by which we are indexing - -> CmmExpr -- Index - -> FCode () -mkBasicIndexedRead off Nothing ty res base idx_ty idx - = do dflags <- getDynFlags - emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx) -mkBasicIndexedRead off (Just cast) ty res base idx_ty idx - = do dflags <- getDynFlags - emitAssign (CmmLocal res) (CmmMachOp cast [ - cmmLoadIndexOffExpr dflags off ty base idx_ty idx]) - -mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes - -> Maybe MachOp -- Optional value cast - -> CmmExpr -- Base address - -> CmmType -- Type of element by which we are indexing - -> CmmExpr -- Index - -> CmmExpr -- Value to write - -> FCode () -mkBasicIndexedWrite off Nothing base idx_ty idx val - = do dflags <- getDynFlags - emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val -mkBasicIndexedWrite off (Just cast) base idx_ty idx val - = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val]) - --- ---------------------------------------------------------------------------- --- Misc utils - -cmmIndexOffExpr :: DynFlags - -> ByteOff -- Initial offset in bytes - -> Width -- Width of element by which we are indexing - -> CmmExpr -- Base address - -> CmmExpr -- Index - -> CmmExpr -cmmIndexOffExpr dflags off width base idx - = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx - -cmmLoadIndexOffExpr :: DynFlags - -> ByteOff -- Initial offset in bytes - -> CmmType -- Type of element we are accessing - -> CmmExpr -- Base address - -> CmmType -- Type of element by which we are indexing - -> CmmExpr -- Index - -> CmmExpr -cmmLoadIndexOffExpr dflags off ty base idx_ty idx - = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty - -setInfo :: CmmExpr -> CmmExpr -> CmmAGraph -setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr - ------------------------------------------------------------------------------- --- Helpers for translating vector primops. - -vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType -vecVmmType pocat n w = vec n (vecCmmCat pocat w) - -vecCmmCat :: PrimOpVecCat -> Width -> CmmType -vecCmmCat IntVec = cmmBits -vecCmmCat WordVec = cmmBits -vecCmmCat FloatVec = cmmFloat - -vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp -vecElemInjectCast _ FloatVec _ = Nothing -vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags) -vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags) -vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags) -vecElemInjectCast _ IntVec W64 = Nothing -vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags) -vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags) -vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags) -vecElemInjectCast _ WordVec W64 = Nothing -vecElemInjectCast _ _ _ = Nothing - -vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp -vecElemProjectCast _ FloatVec _ = Nothing -vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags) -vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags) -vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags) -vecElemProjectCast _ IntVec W64 = Nothing -vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags) -vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags) -vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags) -vecElemProjectCast _ WordVec W64 = Nothing -vecElemProjectCast _ _ _ = Nothing - - --- NOTE [SIMD Design for the future] --- Check to make sure that we can generate code for the specified vector type --- given the current set of dynamic flags. --- Currently these checks are specific to x86 and x86_64 architecture. --- This should be fixed! --- In particular, --- 1) Add better support for other architectures! (this may require a redesign) --- 2) Decouple design choices from LLVM's pseudo SIMD model! --- The high level LLVM naive rep makes per CPU family SIMD generation is own --- optimization problem, and hides important differences in eg ARM vs x86_64 simd --- 3) Depending on the architecture, the SIMD registers may also support general --- computations on Float/Double/Word/Int scalars, but currently on --- for example x86_64, we always put Word/Int (or sized) in GPR --- (general purpose) registers. Would relaxing that allow for --- useful optimization opportunities? --- Phrased differently, it is worth experimenting with supporting --- different register mapping strategies than we currently have, especially if --- someday we want SIMD to be a first class denizen in GHC along with scalar --- values! --- The current design with respect to register mapping of scalars could --- very well be the best,but exploring the design space and doing careful --- measurments is the only only way to validate that. --- In some next generation CPU ISAs, notably RISC V, the SIMD extension --- includes support for a sort of run time CPU dependent vectorization parameter, --- where a loop may act upon a single scalar each iteration OR some 2,4,8 ... --- element chunk! Time will tell if that direction sees wide adoption, --- but it is from that context that unifying our handling of simd and scalars --- may benefit. It is not likely to benefit current architectures, though --- it may very well be a design perspective that helps guide improving the NCG. - - -checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode () -checkVecCompatibility dflags vcat l w = do - when (hscTarget dflags /= HscLlvm) $ do - sorry $ unlines ["SIMD vector instructions require the LLVM back-end." - ,"Please use -fllvm."] - check vecWidth vcat l w - where - check :: Width -> PrimOpVecCat -> Length -> Width -> FCode () - check W128 FloatVec 4 W32 | not (isSseEnabled dflags) = - sorry $ "128-bit wide single-precision floating point " ++ - "SIMD vector instructions require at least -msse." - check W128 _ _ _ | not (isSse2Enabled dflags) = - sorry $ "128-bit wide integer and double precision " ++ - "SIMD vector instructions require at least -msse2." - check W256 FloatVec _ _ | not (isAvxEnabled dflags) = - sorry $ "256-bit wide floating point " ++ - "SIMD vector instructions require at least -mavx." - check W256 _ _ _ | not (isAvx2Enabled dflags) = - sorry $ "256-bit wide integer " ++ - "SIMD vector instructions require at least -mavx2." - check W512 _ _ _ | not (isAvx512fEnabled dflags) = - sorry $ "512-bit wide " ++ - "SIMD vector instructions require -mavx512f." - check _ _ _ _ = return () - - vecWidth = typeWidth (vecVmmType vcat l w) - ------------------------------------------------------------------------------- --- Helpers for translating vector packing and unpacking. - -doVecPackOp :: Maybe MachOp -- Cast from element to vector component - -> CmmType -- Type of vector - -> CmmExpr -- Initial vector - -> [CmmExpr] -- Elements - -> CmmFormal -- Destination for result - -> FCode () -doVecPackOp maybe_pre_write_cast ty z es res = do - dst <- newTemp ty - emitAssign (CmmLocal dst) z - vecPack dst es 0 - where - vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode () - vecPack src [] _ = - emitAssign (CmmLocal res) (CmmReg (CmmLocal src)) - - vecPack src (e : es) i = do - dst <- newTemp ty - if isFloatType (vecElemType ty) - then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid) - [CmmReg (CmmLocal src), cast e, iLit]) - else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid) - [CmmReg (CmmLocal src), cast e, iLit]) - vecPack dst es (i + 1) - where - -- vector indices are always 32-bits - iLit = CmmLit (CmmInt (toInteger i) W32) - - cast :: CmmExpr -> CmmExpr - cast val = case maybe_pre_write_cast of - Nothing -> val - Just cast -> CmmMachOp cast [val] - - len :: Length - len = vecLength ty - - wid :: Width - wid = typeWidth (vecElemType ty) - -doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result - -> CmmType -- Type of vector - -> CmmExpr -- Vector - -> [CmmFormal] -- Element results - -> FCode () -doVecUnpackOp maybe_post_read_cast ty e res = - vecUnpack res 0 - where - vecUnpack :: [CmmFormal] -> Int -> FCode () - vecUnpack [] _ = - return () - - vecUnpack (r : rs) i = do - if isFloatType (vecElemType ty) - then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid) - [e, iLit])) - else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid) - [e, iLit])) - vecUnpack rs (i + 1) - where - -- vector indices are always 32-bits - iLit = CmmLit (CmmInt (toInteger i) W32) - - cast :: CmmExpr -> CmmExpr - cast val = case maybe_post_read_cast of - Nothing -> val - Just cast -> CmmMachOp cast [val] - - len :: Length - len = vecLength ty - - wid :: Width - wid = typeWidth (vecElemType ty) - -doVecInsertOp :: Maybe MachOp -- Cast from element to vector component - -> CmmType -- Vector type - -> CmmExpr -- Source vector - -> CmmExpr -- Element - -> CmmExpr -- Index at which to insert element - -> CmmFormal -- Destination for result - -> FCode () -doVecInsertOp maybe_pre_write_cast ty src e idx res = do - dflags <- getDynFlags - -- vector indices are always 32-bits - let idx' :: CmmExpr - idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx] - if isFloatType (vecElemType ty) - then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx']) - else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx']) - where - cast :: CmmExpr -> CmmExpr - cast val = case maybe_pre_write_cast of - Nothing -> val - Just cast -> CmmMachOp cast [val] - - len :: Length - len = vecLength ty - - wid :: Width - wid = typeWidth (vecElemType ty) - ------------------------------------------------------------------------------- --- Helpers for translating prefetching. - - --- | Translate byte array prefetch operations into proper primcalls. -doPrefetchByteArrayOp :: Int - -> [CmmExpr] - -> FCode () -doPrefetchByteArrayOp locality [addr,idx] - = do dflags <- getDynFlags - mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx -doPrefetchByteArrayOp _ _ - = panic "StgCmmPrim: doPrefetchByteArrayOp" - --- | Translate mutable byte array prefetch operations into proper primcalls. -doPrefetchMutableByteArrayOp :: Int - -> [CmmExpr] - -> FCode () -doPrefetchMutableByteArrayOp locality [addr,idx] - = do dflags <- getDynFlags - mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx -doPrefetchMutableByteArrayOp _ _ - = panic "StgCmmPrim: doPrefetchByteArrayOp" - --- | Translate address prefetch operations into proper primcalls. -doPrefetchAddrOp ::Int - -> [CmmExpr] - -> FCode () -doPrefetchAddrOp locality [addr,idx] - = mkBasicPrefetch locality 0 addr idx -doPrefetchAddrOp _ _ - = panic "StgCmmPrim: doPrefetchAddrOp" - --- | Translate value prefetch operations into proper primcalls. -doPrefetchValueOp :: Int - -> [CmmExpr] - -> FCode () -doPrefetchValueOp locality [addr] - = do dflags <- getDynFlags - mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags))) -doPrefetchValueOp _ _ - = panic "StgCmmPrim: doPrefetchValueOp" - --- | helper to generate prefetch primcalls -mkBasicPrefetch :: Int -- Locality level 0-3 - -> ByteOff -- Initial offset in bytes - -> CmmExpr -- Base address - -> CmmExpr -- Index - -> FCode () -mkBasicPrefetch locality off base idx - = do dflags <- getDynFlags - emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx] - return () - --- ---------------------------------------------------------------------------- --- Allocating byte arrays - --- | Takes a register to return the newly allocated array in and the --- size of the new array in bytes. Allocates a new --- 'MutableByteArray#'. -doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode () -doNewByteArrayOp res_r n = do - dflags <- getDynFlags - - let info_ptr = mkLblExpr mkArrWords_infoLabel - rep = arrWordsRep dflags n - - tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags)) - (mkIntExpr dflags (nonHdrSize dflags rep)) - (zeroExpr dflags) - - let hdr_size = fixedHdrSize dflags - - base <- allocHeapClosure rep info_ptr cccsExpr - [ (mkIntExpr dflags n, - hdr_size + oFFSET_StgArrBytes_bytes dflags) - ] - - emit $ mkAssign (CmmLocal res_r) base - --- ---------------------------------------------------------------------------- --- Comparing byte arrays - -doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr - -> FCode () -doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do - dflags <- getDynFlags - ba1_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba1 (arrWordsHdrSize dflags)) ba1_off - ba2_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba2 (arrWordsHdrSize dflags)) ba2_off - - -- short-cut in case of equal pointers avoiding a costly - -- subroutine call to the memcmp(3) routine; the Cmm logic below - -- results in assembly code being generated for - -- - -- cmpPrefix10 :: ByteArray# -> ByteArray# -> Int# - -- cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10# - -- - -- that looks like - -- - -- leaq 16(%r14),%rax - -- leaq 16(%rsi),%rbx - -- xorl %ecx,%ecx - -- cmpq %rbx,%rax - -- je l_ptr_eq - -- - -- ; NB: the common case (unequal pointers) falls-through - -- ; the conditional jump, and therefore matches the - -- ; usual static branch prediction convention of modern cpus - -- - -- subq $8,%rsp - -- movq %rbx,%rsi - -- movq %rax,%rdi - -- movl $10,%edx - -- xorl %eax,%eax - -- call memcmp - -- addq $8,%rsp - -- movslq %eax,%rax - -- movq %rax,%rcx - -- l_ptr_eq: - -- movq %rcx,%rbx - -- jmp *(%rbp) - - l_ptr_eq <- newBlockId - l_ptr_ne <- newBlockId - - emit (mkAssign (CmmLocal res) (zeroExpr dflags)) - emit (mkCbranch (cmmEqWord dflags ba1_p ba2_p) - l_ptr_eq l_ptr_ne (Just False)) - - emitLabel l_ptr_ne - emitMemcmpCall res ba1_p ba2_p n 1 - - emitLabel l_ptr_eq - --- ---------------------------------------------------------------------------- --- Copying byte arrays - --- | Takes a source 'ByteArray#', an offset in the source array, a --- destination 'MutableByteArray#', an offset into the destination --- array, and the number of bytes to copy. Copies the given number of --- bytes from the source array to the destination array. -doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr - -> FCode () -doCopyByteArrayOp = emitCopyByteArray copy - where - -- Copy data (we assume the arrays aren't overlapping since - -- they're of different types) - copy _src _dst dst_p src_p bytes align = - emitMemcpyCall dst_p src_p bytes align - --- | Takes a source 'MutableByteArray#', an offset in the source --- array, a destination 'MutableByteArray#', an offset into the --- destination array, and the number of bytes to copy. Copies the --- given number of bytes from the source array to the destination --- array. -doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr - -> FCode () -doCopyMutableByteArrayOp = emitCopyByteArray copy - where - -- The only time the memory might overlap is when the two arrays - -- we were provided are the same array! - -- TODO: Optimize branch for common case of no aliasing. - copy src dst dst_p src_p bytes align = do - dflags <- getDynFlags - (moveCall, cpyCall) <- forkAltPair - (getCode $ emitMemmoveCall dst_p src_p bytes align) - (getCode $ emitMemcpyCall dst_p src_p bytes align) - emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall - -emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr - -> Alignment -> FCode ()) - -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr - -> FCode () -emitCopyByteArray copy src src_off dst dst_off n = do - dflags <- getDynFlags - let byteArrayAlignment = wordAlignment dflags - srcOffAlignment = cmmExprAlignment src_off - dstOffAlignment = cmmExprAlignment dst_off - align = minimum [byteArrayAlignment, srcOffAlignment, dstOffAlignment] - dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off - src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off - copy src dst dst_p src_p n align - --- | Takes a source 'ByteArray#', an offset in the source array, a --- destination 'Addr#', and the number of bytes to copy. Copies the given --- number of bytes from the source array to the destination memory region. -doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode () -doCopyByteArrayToAddrOp src src_off dst_p bytes = do - -- Use memcpy (we are allowed to assume the arrays aren't overlapping) - dflags <- getDynFlags - src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off - emitMemcpyCall dst_p src_p bytes (mkAlignment 1) - --- | Takes a source 'MutableByteArray#', an offset in the source array, a --- destination 'Addr#', and the number of bytes to copy. Copies the given --- number of bytes from the source array to the destination memory region. -doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr - -> FCode () -doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp - --- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into --- the destination array, and the number of bytes to copy. Copies the given --- number of bytes from the source memory region to the destination array. -doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode () -doCopyAddrToByteArrayOp src_p dst dst_off bytes = do - -- Use memcpy (we are allowed to assume the arrays aren't overlapping) - dflags <- getDynFlags - dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off - emitMemcpyCall dst_p src_p bytes (mkAlignment 1) - - --- ---------------------------------------------------------------------------- --- Setting byte arrays - --- | Takes a 'MutableByteArray#', an offset into the array, a length, --- and a byte, and sets each of the selected bytes in the array to the --- character. -doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr - -> FCode () -doSetByteArrayOp ba off len c = do - dflags <- getDynFlags - - let byteArrayAlignment = wordAlignment dflags -- known since BA is allocated on heap - offsetAlignment = cmmExprAlignment off - align = min byteArrayAlignment offsetAlignment - - p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off - emitMemsetCall p c len align - --- ---------------------------------------------------------------------------- --- Allocating arrays - --- | Allocate a new array. -doNewArrayOp :: CmmFormal -- ^ return register - -> SMRep -- ^ representation of the array - -> CLabel -- ^ info pointer - -> [(CmmExpr, ByteOff)] -- ^ header payload - -> WordOff -- ^ array size - -> CmmExpr -- ^ initial element - -> FCode () -doNewArrayOp res_r rep info payload n init = do - dflags <- getDynFlags - - let info_ptr = mkLblExpr info - - tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep)) - (mkIntExpr dflags (nonHdrSize dflags rep)) - (zeroExpr dflags) - - base <- allocHeapClosure rep info_ptr cccsExpr payload - - arr <- CmmLocal `fmap` newTemp (bWord dflags) - emit $ mkAssign arr base - - -- Initialise all elements of the array - let mkOff off = cmmOffsetW dflags (CmmReg arr) (hdrSizeW dflags rep + off) - initialization = [ mkStore (mkOff off) init | off <- [0.. n - 1] ] - emit (catAGraphs initialization) - - emit $ mkAssign (CmmLocal res_r) (CmmReg arr) - --- ---------------------------------------------------------------------------- --- Copying pointer arrays - --- EZY: This code has an unusually high amount of assignTemp calls, seen --- nowhere else in the code generator. This is mostly because these --- "primitive" ops result in a surprisingly large amount of code. It --- will likely be worthwhile to optimize what is emitted here, so that --- our optimization passes don't waste time repeatedly optimizing the --- same bits of code. - --- More closely imitates 'assignTemp' from the old code generator, which --- returns a CmmExpr rather than a LocalReg. -assignTempE :: CmmExpr -> FCode CmmExpr -assignTempE e = do - t <- assignTemp e - return (CmmReg (CmmLocal t)) - --- | Takes a source 'Array#', an offset in the source array, a --- destination 'MutableArray#', an offset into the destination array, --- and the number of elements to copy. Copies the given number of --- elements from the source array to the destination array. -doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff - -> FCode () -doCopyArrayOp = emitCopyArray copy - where - -- Copy data (we assume the arrays aren't overlapping since - -- they're of different types) - copy _src _dst dst_p src_p bytes = - do dflags <- getDynFlags - emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) - (wordAlignment dflags) - - --- | Takes a source 'MutableArray#', an offset in the source array, a --- destination 'MutableArray#', an offset into the destination array, --- and the number of elements to copy. Copies the given number of --- elements from the source array to the destination array. -doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff - -> FCode () -doCopyMutableArrayOp = emitCopyArray copy - where - -- The only time the memory might overlap is when the two arrays - -- we were provided are the same array! - -- TODO: Optimize branch for common case of no aliasing. - copy src dst dst_p src_p bytes = do - dflags <- getDynFlags - (moveCall, cpyCall) <- forkAltPair - (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes) - (wordAlignment dflags)) - (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) - (wordAlignment dflags)) - emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall - -emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff - -> FCode ()) -- ^ copy function - -> CmmExpr -- ^ source array - -> CmmExpr -- ^ offset in source array - -> CmmExpr -- ^ destination array - -> CmmExpr -- ^ offset in destination array - -> WordOff -- ^ number of elements to copy - -> FCode () -emitCopyArray copy src0 src_off dst0 dst_off0 n = - when (n /= 0) $ do - dflags <- getDynFlags - - -- Passed as arguments (be careful) - src <- assignTempE src0 - dst <- assignTempE dst0 - dst_off <- assignTempE dst_off0 - - -- Set the dirty bit in the header. - emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel))) - - dst_elems_p <- assignTempE $ cmmOffsetB dflags dst - (arrPtrsHdrSize dflags) - dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off - src_p <- assignTempE $ cmmOffsetExprW dflags - (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off - let bytes = wordsToBytes dflags n - - copy src dst dst_p src_p bytes - - -- The base address of the destination card table - dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p - (loadArrPtrsSize dflags dst) - - emitSetCards dst_off dst_cards_p n - -doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff - -> FCode () -doCopySmallArrayOp = emitCopySmallArray copy - where - -- Copy data (we assume the arrays aren't overlapping since - -- they're of different types) - copy _src _dst dst_p src_p bytes = - do dflags <- getDynFlags - emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) - (wordAlignment dflags) - - -doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff - -> FCode () -doCopySmallMutableArrayOp = emitCopySmallArray copy - where - -- The only time the memory might overlap is when the two arrays - -- we were provided are the same array! - -- TODO: Optimize branch for common case of no aliasing. - copy src dst dst_p src_p bytes = do - dflags <- getDynFlags - (moveCall, cpyCall) <- forkAltPair - (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes) - (wordAlignment dflags)) - (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes) - (wordAlignment dflags)) - emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall - -emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff - -> FCode ()) -- ^ copy function - -> CmmExpr -- ^ source array - -> CmmExpr -- ^ offset in source array - -> CmmExpr -- ^ destination array - -> CmmExpr -- ^ offset in destination array - -> WordOff -- ^ number of elements to copy - -> FCode () -emitCopySmallArray copy src0 src_off dst0 dst_off n = - when (n /= 0) $ do - dflags <- getDynFlags - - -- Passed as arguments (be careful) - src <- assignTempE src0 - dst <- assignTempE dst0 - - -- Set the dirty bit in the header. - emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel))) - - dst_p <- assignTempE $ cmmOffsetExprW dflags - (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off - src_p <- assignTempE $ cmmOffsetExprW dflags - (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off - let bytes = wordsToBytes dflags n - - copy src dst dst_p src_p bytes - --- | Takes an info table label, a register to return the newly --- allocated array in, a source array, an offset in the source array, --- and the number of elements to copy. Allocates a new array and --- initializes it from the source array. -emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff - -> FCode () -emitCloneArray info_p res_r src src_off n = do - dflags <- getDynFlags - - let info_ptr = mkLblExpr info_p - rep = arrPtrsRep dflags n - - tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags)) - (mkIntExpr dflags (nonHdrSize dflags rep)) - (zeroExpr dflags) - - let hdr_size = fixedHdrSize dflags - - base <- allocHeapClosure rep info_ptr cccsExpr - [ (mkIntExpr dflags n, - hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags) - , (mkIntExpr dflags (nonHdrSizeW rep), - hdr_size + oFFSET_StgMutArrPtrs_size dflags) - ] - - arr <- CmmLocal `fmap` newTemp (bWord dflags) - emit $ mkAssign arr base - - dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr) - (arrPtrsHdrSize dflags) - src_p <- assignTempE $ cmmOffsetExprW dflags src - (cmmAddWord dflags - (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off) - - emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n)) - (wordAlignment dflags) - - emit $ mkAssign (CmmLocal res_r) (CmmReg arr) - --- | Takes an info table label, a register to return the newly --- allocated array in, a source array, an offset in the source array, --- and the number of elements to copy. Allocates a new array and --- initializes it from the source array. -emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff - -> FCode () -emitCloneSmallArray info_p res_r src src_off n = do - dflags <- getDynFlags - - let info_ptr = mkLblExpr info_p - rep = smallArrPtrsRep n - - tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags)) - (mkIntExpr dflags (nonHdrSize dflags rep)) - (zeroExpr dflags) - - let hdr_size = fixedHdrSize dflags - - base <- allocHeapClosure rep info_ptr cccsExpr - [ (mkIntExpr dflags n, - hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags) - ] - - arr <- CmmLocal `fmap` newTemp (bWord dflags) - emit $ mkAssign arr base - - dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr) - (smallArrPtrsHdrSize dflags) - src_p <- assignTempE $ cmmOffsetExprW dflags src - (cmmAddWord dflags - (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off) - - emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n)) - (wordAlignment dflags) - - emit $ mkAssign (CmmLocal res_r) (CmmReg arr) - --- | Takes and offset in the destination array, the base address of --- the card table, and the number of elements affected (*not* the --- number of cards). The number of elements may not be zero. --- Marks the relevant cards as dirty. -emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode () -emitSetCards dst_start dst_cards_start n = do - dflags <- getDynFlags - start_card <- assignTempE $ cardCmm dflags dst_start - let end_card = cardCmm dflags - (cmmSubWord dflags - (cmmAddWord dflags dst_start (mkIntExpr dflags n)) - (mkIntExpr dflags 1)) - emitMemsetCall (cmmAddWord dflags dst_cards_start start_card) - (mkIntExpr dflags 1) - (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1)) - (mkAlignment 1) -- no alignment (1 byte) - --- Convert an element index to a card index -cardCmm :: DynFlags -> CmmExpr -> CmmExpr -cardCmm dflags i = - cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)) - ------------------------------------------------------------------------------- --- SmallArray PrimOp implementations - -doReadSmallPtrArrayOp :: LocalReg - -> CmmExpr - -> CmmExpr - -> FCode () -doReadSmallPtrArrayOp res addr idx = do - dflags <- getDynFlags - mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr - (gcWord dflags) idx - -doWriteSmallPtrArrayOp :: CmmExpr - -> CmmExpr - -> CmmExpr - -> FCode () -doWriteSmallPtrArrayOp addr idx val = do - dflags <- getDynFlags - let ty = cmmExprType dflags val - emitPrimCall [] MO_WriteBarrier [] -- #12469 - mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val - emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel))) - ------------------------------------------------------------------------------- --- Atomic read-modify-write - --- | Emit an atomic modification to a byte array element. The result --- reg contains that previous value of the element. Implies a full --- memory barrier. -doAtomicRMW :: LocalReg -- ^ Result reg - -> AtomicMachOp -- ^ Atomic op (e.g. add) - -> CmmExpr -- ^ MutableByteArray# - -> CmmExpr -- ^ Index - -> CmmType -- ^ Type of element by which we are indexing - -> CmmExpr -- ^ Op argument (e.g. amount to add) - -> FCode () -doAtomicRMW res amop mba idx idx_ty n = do - dflags <- getDynFlags - let width = typeWidth idx_ty - addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags) - width mba idx - emitPrimCall - [ res ] - (MO_AtomicRMW width amop) - [ addr, n ] - --- | Emit an atomic read to a byte array that acts as a memory barrier. -doAtomicReadByteArray - :: LocalReg -- ^ Result reg - -> CmmExpr -- ^ MutableByteArray# - -> CmmExpr -- ^ Index - -> CmmType -- ^ Type of element by which we are indexing - -> FCode () -doAtomicReadByteArray res mba idx idx_ty = do - dflags <- getDynFlags - let width = typeWidth idx_ty - addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags) - width mba idx - emitPrimCall - [ res ] - (MO_AtomicRead width) - [ addr ] - --- | Emit an atomic write to a byte array that acts as a memory barrier. -doAtomicWriteByteArray - :: CmmExpr -- ^ MutableByteArray# - -> CmmExpr -- ^ Index - -> CmmType -- ^ Type of element by which we are indexing - -> CmmExpr -- ^ Value to write - -> FCode () -doAtomicWriteByteArray mba idx idx_ty val = do - dflags <- getDynFlags - let width = typeWidth idx_ty - addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags) - width mba idx - emitPrimCall - [ {- no results -} ] - (MO_AtomicWrite width) - [ addr, val ] - -doCasByteArray - :: LocalReg -- ^ Result reg - -> CmmExpr -- ^ MutableByteArray# - -> CmmExpr -- ^ Index - -> CmmType -- ^ Type of element by which we are indexing - -> CmmExpr -- ^ Old value - -> CmmExpr -- ^ New value - -> FCode () -doCasByteArray res mba idx idx_ty old new = do - dflags <- getDynFlags - let width = (typeWidth idx_ty) - addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags) - width mba idx - emitPrimCall - [ res ] - (MO_Cmpxchg width) - [ addr, old, new ] - ------------------------------------------------------------------------------- --- Helpers for emitting function calls - --- | Emit a call to @memcpy@. -emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode () -emitMemcpyCall dst src n align = do - emitPrimCall - [ {-no results-} ] - (MO_Memcpy (alignmentBytes align)) - [ dst, src, n ] - --- | Emit a call to @memmove@. -emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode () -emitMemmoveCall dst src n align = do - emitPrimCall - [ {- no results -} ] - (MO_Memmove (alignmentBytes align)) - [ dst, src, n ] - --- | Emit a call to @memset@. The second argument must fit inside an --- unsigned char. -emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode () -emitMemsetCall dst c n align = do - emitPrimCall - [ {- no results -} ] - (MO_Memset (alignmentBytes align)) - [ dst, c, n ] - -emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode () -emitMemcmpCall res ptr1 ptr2 n align = do - -- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all - -- code-gens currently call out to the @memcmp(3)@ C function. - -- This was easier than moving the sign-extensions into - -- all the code-gens. - dflags <- getDynFlags - let is32Bit = typeWidth (localRegType res) == W32 - - cres <- if is32Bit - then return res - else newTemp b32 - - emitPrimCall - [ cres ] - (MO_Memcmp align) - [ ptr1, ptr2, n ] - - unless is32Bit $ do - emit $ mkAssign (CmmLocal res) - (CmmMachOp - (mo_s_32ToWord dflags) - [(CmmReg (CmmLocal cres))]) - -emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode () -emitBSwapCall res x width = do - emitPrimCall - [ res ] - (MO_BSwap width) - [ x ] - -emitBRevCall :: LocalReg -> CmmExpr -> Width -> FCode () -emitBRevCall res x width = do - emitPrimCall - [ res ] - (MO_BRev width) - [ x ] - -emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode () -emitPopCntCall res x width = do - emitPrimCall - [ res ] - (MO_PopCnt width) - [ x ] - -emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode () -emitPdepCall res x y width = do - emitPrimCall - [ res ] - (MO_Pdep width) - [ x, y ] - -emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode () -emitPextCall res x y width = do - emitPrimCall - [ res ] - (MO_Pext width) - [ x, y ] - -emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode () -emitClzCall res x width = do - emitPrimCall - [ res ] - (MO_Clz width) - [ x ] - -emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode () -emitCtzCall res x width = do - emitPrimCall - [ res ] - (MO_Ctz width) - [ x ] diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs deleted file mode 100644 index 172b77c8f9..0000000000 --- a/compiler/codeGen/StgCmmProf.hs +++ /dev/null @@ -1,360 +0,0 @@ ------------------------------------------------------------------------------ --- --- Code generation for profiling --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ - -module StgCmmProf ( - initCostCentres, ccType, ccsType, - mkCCostCentre, mkCCostCentreStack, - - -- Cost-centre Profiling - dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, - enterCostCentreThunk, enterCostCentreFun, - costCentreFrom, - storeCurCCS, - emitSetCCC, - - saveCurrentCostCentre, restoreCurrentCostCentre, - - -- Lag/drag/void stuff - ldvEnter, ldvEnterClosure, ldvRecordCreate - ) where - -import GhcPrelude - -import StgCmmClosure -import StgCmmUtils -import StgCmmMonad -import SMRep - -import MkGraph -import Cmm -import CmmUtils -import CLabel - -import CostCentre -import DynFlags -import FastString -import Module -import Outputable - -import Control.Monad -import Data.Char (ord) - ------------------------------------------------------------------------------ --- --- Cost-centre-stack Profiling --- ------------------------------------------------------------------------------ - --- Expression representing the current cost centre stack -ccsType :: DynFlags -> CmmType -- Type of a cost-centre stack -ccsType = bWord - -ccType :: DynFlags -> CmmType -- Type of a cost centre -ccType = bWord - -storeCurCCS :: CmmExpr -> CmmAGraph -storeCurCCS e = mkAssign cccsReg e - -mkCCostCentre :: CostCentre -> CmmLit -mkCCostCentre cc = CmmLabel (mkCCLabel cc) - -mkCCostCentreStack :: CostCentreStack -> CmmLit -mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs) - -costCentreFrom :: DynFlags - -> CmmExpr -- A closure pointer - -> CmmExpr -- The cost centre from that closure -costCentreFrom dflags cl = CmmLoad (cmmOffsetB dflags cl (oFFSET_StgHeader_ccs dflags)) (ccsType dflags) - --- | The profiling header words in a static closure -staticProfHdr :: DynFlags -> CostCentreStack -> [CmmLit] -staticProfHdr dflags ccs - = ifProfilingL dflags [mkCCostCentreStack ccs, staticLdvInit dflags] - --- | Profiling header words in a dynamic closure -dynProfHdr :: DynFlags -> CmmExpr -> [CmmExpr] -dynProfHdr dflags ccs = ifProfilingL dflags [ccs, dynLdvInit dflags] - --- | Initialise the profiling field of an update frame -initUpdFrameProf :: CmmExpr -> FCode () -initUpdFrameProf frame - = ifProfiling $ -- frame->header.prof.ccs = CCCS - do dflags <- getDynFlags - emitStore (cmmOffset dflags frame (oFFSET_StgHeader_ccs dflags)) cccsExpr - -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) - -- is unnecessary because it is not used anyhow. - ---------------------------------------------------------------------------- --- Saving and restoring the current cost centre ---------------------------------------------------------------------------- - -{- Note [Saving the current cost centre] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The current cost centre is like a global register. Like other -global registers, it's a caller-saves one. But consider - case (f x) of (p,q) -> rhs -Since 'f' may set the cost centre, we must restore it -before resuming rhs. So we want code like this: - local_cc = CCC -- save - r = f( x ) - CCC = local_cc -- restore -That is, we explicitly "save" the current cost centre in -a LocalReg, local_cc; and restore it after the call. The -C-- infrastructure will arrange to save local_cc across the -call. - -The same goes for join points; - let j x = join-stuff - in blah-blah -We want this kind of code: - local_cc = CCC -- save - blah-blah - J: - CCC = local_cc -- restore --} - -saveCurrentCostCentre :: FCode (Maybe LocalReg) - -- Returns Nothing if profiling is off -saveCurrentCostCentre - = do dflags <- getDynFlags - if not (gopt Opt_SccProfilingOn dflags) - then return Nothing - else do local_cc <- newTemp (ccType dflags) - emitAssign (CmmLocal local_cc) cccsExpr - return (Just local_cc) - -restoreCurrentCostCentre :: Maybe LocalReg -> FCode () -restoreCurrentCostCentre Nothing - = return () -restoreCurrentCostCentre (Just local_cc) - = emit (storeCurCCS (CmmReg (CmmLocal local_cc))) - - -------------------------------------------------------------------------------- --- Recording allocation in a cost centre -------------------------------------------------------------------------------- - --- | Record the allocation of a closure. The CmmExpr is the cost --- centre stack to which to attribute the allocation. -profDynAlloc :: SMRep -> CmmExpr -> FCode () -profDynAlloc rep ccs - = ifProfiling $ - do dflags <- getDynFlags - profAlloc (mkIntExpr dflags (heapClosureSizeW dflags rep)) ccs - --- | Record the allocation of a closure (size is given by a CmmExpr) --- The size must be in words, because the allocation counter in a CCS counts --- in words. -profAlloc :: CmmExpr -> CmmExpr -> FCode () -profAlloc words ccs - = ifProfiling $ - do dflags <- getDynFlags - let alloc_rep = rEP_CostCentreStack_mem_alloc dflags - emit (addToMemE alloc_rep - (cmmOffsetB dflags ccs (oFFSET_CostCentreStack_mem_alloc dflags)) - (CmmMachOp (MO_UU_Conv (wordWidth dflags) (typeWidth alloc_rep)) $ - [CmmMachOp (mo_wordSub dflags) [words, - mkIntExpr dflags (profHdrSize dflags)]])) - -- subtract the "profiling overhead", which is the - -- profiling header in a closure. - --- ----------------------------------------------------------------------- --- Setting the current cost centre on entry to a closure - -enterCostCentreThunk :: CmmExpr -> FCode () -enterCostCentreThunk closure = - ifProfiling $ do - dflags <- getDynFlags - emit $ storeCurCCS (costCentreFrom dflags closure) - -enterCostCentreFun :: CostCentreStack -> CmmExpr -> FCode () -enterCostCentreFun ccs closure = - ifProfiling $ do - if isCurrentCCS ccs - then do dflags <- getDynFlags - emitRtsCall rtsUnitId (fsLit "enterFunCCS") - [(baseExpr, AddrHint), - (costCentreFrom dflags closure, AddrHint)] False - else return () -- top-level function, nothing to do - -ifProfiling :: FCode () -> FCode () -ifProfiling code - = do dflags <- getDynFlags - if gopt Opt_SccProfilingOn dflags - then code - else return () - -ifProfilingL :: DynFlags -> [a] -> [a] -ifProfilingL dflags xs - | gopt Opt_SccProfilingOn dflags = xs - | otherwise = [] - - ---------------------------------------------------------------- --- Initialising Cost Centres & CCSs ---------------------------------------------------------------- - -initCostCentres :: CollectedCCs -> FCode () --- Emit the declarations -initCostCentres (local_CCs, singleton_CCSs) - = do dflags <- getDynFlags - when (gopt Opt_SccProfilingOn dflags) $ - do mapM_ emitCostCentreDecl local_CCs - mapM_ emitCostCentreStackDecl singleton_CCSs - - -emitCostCentreDecl :: CostCentre -> FCode () -emitCostCentreDecl cc = do - { dflags <- getDynFlags - ; let is_caf | isCafCC cc = mkIntCLit dflags (ord 'c') -- 'c' == is a CAF - | otherwise = zero dflags - -- NB. bytesFS: we want the UTF-8 bytes here (#5559) - ; label <- newByteStringCLit (bytesFS $ costCentreUserNameFS cc) - ; modl <- newByteStringCLit (bytesFS $ Module.moduleNameFS - $ Module.moduleName - $ cc_mod cc) - ; loc <- newByteStringCLit $ bytesFS $ mkFastString $ - showPpr dflags (costCentreSrcSpan cc) - -- XXX going via FastString to get UTF-8 encoding is silly - ; let - lits = [ zero dflags, -- StgInt ccID, - label, -- char *label, - modl, -- char *module, - loc, -- char *srcloc, - zero64, -- StgWord64 mem_alloc - zero dflags, -- StgWord time_ticks - is_caf, -- StgInt is_caf - zero dflags -- struct _CostCentre *link - ] - ; emitDataLits (mkCCLabel cc) lits - } - -emitCostCentreStackDecl :: CostCentreStack -> FCode () -emitCostCentreStackDecl ccs - = case maybeSingletonCCS ccs of - Just cc -> - do dflags <- getDynFlags - let mk_lits cc = zero dflags : - mkCCostCentre cc : - replicate (sizeof_ccs_words dflags - 2) (zero dflags) - -- Note: to avoid making any assumptions about how the - -- C compiler (that compiles the RTS, in particular) does - -- layouts of structs containing long-longs, simply - -- pad out the struct with zero words until we hit the - -- size of the overall struct (which we get via DerivedConstants.h) - emitDataLits (mkCCSLabel ccs) (mk_lits cc) - Nothing -> pprPanic "emitCostCentreStackDecl" (ppr ccs) - -zero :: DynFlags -> CmmLit -zero dflags = mkIntCLit dflags 0 -zero64 :: CmmLit -zero64 = CmmInt 0 W64 - -sizeof_ccs_words :: DynFlags -> Int -sizeof_ccs_words dflags - -- round up to the next word. - | ms == 0 = ws - | otherwise = ws + 1 - where - (ws,ms) = sIZEOF_CostCentreStack dflags `divMod` wORD_SIZE dflags - --- --------------------------------------------------------------------------- --- Set the current cost centre stack - -emitSetCCC :: CostCentre -> Bool -> Bool -> FCode () -emitSetCCC cc tick push - = do dflags <- getDynFlags - if not (gopt Opt_SccProfilingOn dflags) - then return () - else do tmp <- newTemp (ccsType dflags) - pushCostCentre tmp cccsExpr cc - when tick $ emit (bumpSccCount dflags (CmmReg (CmmLocal tmp))) - when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp))) - -pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode () -pushCostCentre result ccs cc - = emitRtsCallWithResult result AddrHint - rtsUnitId - (fsLit "pushCostCentre") [(ccs,AddrHint), - (CmmLit (mkCCostCentre cc), AddrHint)] - False - -bumpSccCount :: DynFlags -> CmmExpr -> CmmAGraph -bumpSccCount dflags ccs - = addToMem (rEP_CostCentreStack_scc_count dflags) - (cmmOffsetB dflags ccs (oFFSET_CostCentreStack_scc_count dflags)) 1 - ------------------------------------------------------------------------------ --- --- Lag/drag/void stuff --- ------------------------------------------------------------------------------ - --- --- Initial value for the LDV field in a static closure --- -staticLdvInit :: DynFlags -> CmmLit -staticLdvInit = zeroCLit - --- --- Initial value of the LDV field in a dynamic closure --- -dynLdvInit :: DynFlags -> CmmExpr -dynLdvInit dflags = -- (era << LDV_SHIFT) | LDV_STATE_CREATE - CmmMachOp (mo_wordOr dflags) [ - CmmMachOp (mo_wordShl dflags) [loadEra dflags, mkIntExpr dflags (lDV_SHIFT dflags)], - CmmLit (mkWordCLit dflags (iLDV_STATE_CREATE dflags)) - ] - --- --- Initialise the LDV word of a new closure --- -ldvRecordCreate :: CmmExpr -> FCode () -ldvRecordCreate closure = do - dflags <- getDynFlags - emit $ mkStore (ldvWord dflags closure) (dynLdvInit dflags) - --- --- | Called when a closure is entered, marks the closure as having --- been "used". The closure is not an "inherently used" one. The --- closure is not @IND@ because that is not considered for LDV profiling. --- -ldvEnterClosure :: ClosureInfo -> CmmReg -> FCode () -ldvEnterClosure closure_info node_reg = do - dflags <- getDynFlags - let tag = funTag dflags closure_info - -- don't forget to substract node's tag - ldvEnter (cmmOffsetB dflags (CmmReg node_reg) (-tag)) - -ldvEnter :: CmmExpr -> FCode () --- Argument is a closure pointer -ldvEnter cl_ptr = do - dflags <- getDynFlags - let -- don't forget to substract node's tag - ldv_wd = ldvWord dflags cl_ptr - new_ldv_wd = cmmOrWord dflags (cmmAndWord dflags (CmmLoad ldv_wd (bWord dflags)) - (CmmLit (mkWordCLit dflags (iLDV_CREATE_MASK dflags)))) - (cmmOrWord dflags (loadEra dflags) (CmmLit (mkWordCLit dflags (iLDV_STATE_USE dflags)))) - ifProfiling $ - -- if (era > 0) { - -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | - -- era | LDV_STATE_USE } - emit =<< mkCmmIfThenElse (CmmMachOp (mo_wordUGt dflags) [loadEra dflags, CmmLit (zeroCLit dflags)]) - (mkStore ldv_wd new_ldv_wd) - mkNop - -loadEra :: DynFlags -> CmmExpr -loadEra dflags = CmmMachOp (MO_UU_Conv (cIntWidth dflags) (wordWidth dflags)) - [CmmLoad (mkLblExpr (mkCmmDataLabel rtsUnitId (fsLit "era"))) - (cInt dflags)] - -ldvWord :: DynFlags -> CmmExpr -> CmmExpr --- Takes the address of a closure, and returns --- the address of the LDV word in the closure -ldvWord dflags closure_ptr - = cmmOffsetB dflags closure_ptr (oFFSET_StgHeader_ldvw dflags) diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs deleted file mode 100644 index 868b52f402..0000000000 --- a/compiler/codeGen/StgCmmTicky.hs +++ /dev/null @@ -1,682 +0,0 @@ -{-# LANGUAGE BangPatterns #-} - ------------------------------------------------------------------------------ --- --- Code generation for ticky-ticky profiling --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ - -{- OVERVIEW: ticky ticky profiling - -Please see -https://gitlab.haskell.org/ghc/ghc/wikis/debugging/ticky-ticky and also -edit it and the rest of this comment to keep them up-to-date if you -change ticky-ticky. Thanks! - - *** All allocation ticky numbers are in bytes. *** - -Some of the relevant source files: - - ***not necessarily an exhaustive list*** - - * some codeGen/ modules import this one - - * this module imports cmm/CLabel.hs to manage labels - - * cmm/CmmParse.y expands some macros using generators defined in - this module - - * includes/stg/Ticky.h declares all of the global counters - - * includes/rts/Ticky.h declares the C data type for an - STG-declaration's counters - - * some macros defined in includes/Cmm.h (and used within the RTS's - CMM code) update the global ticky counters - - * at the end of execution rts/Ticky.c generates the final report - +RTS -r<report-file> -RTS - -The rts/Ticky.c function that generates the report includes an -STG-declaration's ticky counters if - - * that declaration was entered, or - - * it was allocated (if -ticky-allocd) - -On either of those events, the counter is "registered" by adding it to -a linked list; cf the CMM generated by registerTickyCtr. - -Ticky-ticky profiling has evolved over many years. Many of the -counters from its most sophisticated days are no longer -active/accurate. As the RTS has changed, sometimes the ticky code for -relevant counters was not accordingly updated. Unfortunately, neither -were the comments. - -As of March 2013, there still exist deprecated code and comments in -the code generator as well as the RTS because: - - * I don't know what is out-of-date versus merely commented out for - momentary convenience, and - - * someone else might know how to repair it! - --} - -module StgCmmTicky ( - withNewTickyCounterFun, - withNewTickyCounterLNE, - withNewTickyCounterThunk, - withNewTickyCounterStdThunk, - withNewTickyCounterCon, - - tickyDynAlloc, - tickyAllocHeap, - - tickyAllocPrim, - tickyAllocThunk, - tickyAllocPAP, - tickyHeapCheck, - tickyStackCheck, - - tickyUnknownCall, tickyDirectCall, - - tickyPushUpdateFrame, - tickyUpdateFrameOmitted, - - tickyEnterDynCon, - tickyEnterStaticCon, - tickyEnterViaNode, - - tickyEnterFun, - tickyEnterThunk, tickyEnterStdThunk, -- dynamic non-value - -- thunks only - tickyEnterLNE, - - tickyUpdateBhCaf, - tickyBlackHole, - tickyUnboxedTupleReturn, - tickyReturnOldCon, tickyReturnNewCon, - - tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, - tickySlowCall, tickySlowCallPat, - ) where - -import GhcPrelude - -import StgCmmArgRep ( slowCallPattern , toArgRep , argRepString ) -import StgCmmClosure -import StgCmmUtils -import StgCmmMonad - -import StgSyn -import CmmExpr -import MkGraph -import CmmUtils -import CLabel -import SMRep - -import Module -import Name -import Id -import BasicTypes -import FastString -import Outputable -import Util - -import DynFlags - --- Turgid imports for showTypeCategory -import PrelNames -import TcType -import Type -import TyCon - -import Data.Maybe -import qualified Data.Char -import Control.Monad ( when ) - ------------------------------------------------------------------------------ --- --- Ticky-ticky profiling --- ------------------------------------------------------------------------------ - -data TickyClosureType - = TickyFun - Bool -- True <-> single entry - | TickyCon - | TickyThunk - Bool -- True <-> updateable - Bool -- True <-> standard thunk (AP or selector), has no entry counter - | TickyLNE - -withNewTickyCounterFun :: Bool -> Name -> [NonVoid Id] -> FCode a -> FCode a -withNewTickyCounterFun single_entry = withNewTickyCounter (TickyFun single_entry) - -withNewTickyCounterLNE :: Name -> [NonVoid Id] -> FCode a -> FCode a -withNewTickyCounterLNE nm args code = do - b <- tickyLNEIsOn - if not b then code else withNewTickyCounter TickyLNE nm args code - -thunkHasCounter :: Bool -> FCode Bool -thunkHasCounter isStatic = do - b <- tickyDynThunkIsOn - pure (not isStatic && b) - -withNewTickyCounterThunk - :: Bool -- ^ static - -> Bool -- ^ updateable - -> Name - -> FCode a - -> FCode a -withNewTickyCounterThunk isStatic isUpdatable name code = do - has_ctr <- thunkHasCounter isStatic - if not has_ctr - then code - else withNewTickyCounter (TickyThunk isUpdatable False) name [] code - -withNewTickyCounterStdThunk - :: Bool -- ^ updateable - -> Name - -> FCode a - -> FCode a -withNewTickyCounterStdThunk isUpdatable name code = do - has_ctr <- thunkHasCounter False - if not has_ctr - then code - else withNewTickyCounter (TickyThunk isUpdatable True) name [] code - -withNewTickyCounterCon - :: Name - -> FCode a - -> FCode a -withNewTickyCounterCon name code = do - has_ctr <- thunkHasCounter False - if not has_ctr - then code - else withNewTickyCounter TickyCon name [] code - --- args does not include the void arguments -withNewTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode a -> FCode a -withNewTickyCounter cloType name args m = do - lbl <- emitTickyCounter cloType name args - setTickyCtrLabel lbl m - -emitTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode CLabel -emitTickyCounter cloType name args - = let ctr_lbl = mkRednCountsLabel name in - (>> return ctr_lbl) $ - ifTicky $ do - { dflags <- getDynFlags - ; parent <- getTickyCtrLabel - ; mod_name <- getModuleName - - -- When printing the name of a thing in a ticky file, we - -- want to give the module name even for *local* things. We - -- print just "x (M)" rather that "M.x" to distinguish them - -- from the global kind. - ; let ppr_for_ticky_name :: SDoc - ppr_for_ticky_name = - let n = ppr name - ext = case cloType of - TickyFun single_entry -> parens $ hcat $ punctuate comma $ - [text "fun"] ++ [text "se"|single_entry] - TickyCon -> parens (text "con") - TickyThunk upd std -> parens $ hcat $ punctuate comma $ - [text "thk"] ++ [text "se"|not upd] ++ [text "std"|std] - TickyLNE | isInternalName name -> parens (text "LNE") - | otherwise -> panic "emitTickyCounter: how is this an external LNE?" - p = case hasHaskellName parent of - -- NB the default "top" ticky ctr does not - -- have a Haskell name - Just pname -> text "in" <+> ppr (nameUnique pname) - _ -> empty - in if isInternalName name - then n <+> parens (ppr mod_name) <+> ext <+> p - else n <+> ext <+> p - - ; fun_descr_lit <- newStringCLit $ showSDocDebug dflags ppr_for_ticky_name - ; arg_descr_lit <- newStringCLit $ map (showTypeCategory . idType . fromNonVoid) args - ; emitDataLits ctr_lbl - -- Must match layout of includes/rts/Ticky.h's StgEntCounter - -- - -- krc: note that all the fields are I32 now; some were I16 - -- before, but the code generator wasn't handling that - -- properly and it led to chaos, panic and disorder. - [ mkIntCLit dflags 0, -- registered? - mkIntCLit dflags (length args), -- Arity - mkIntCLit dflags 0, -- Heap allocated for this thing - fun_descr_lit, - arg_descr_lit, - zeroCLit dflags, -- Entries into this thing - zeroCLit dflags, -- Heap allocated by this thing - zeroCLit dflags -- Link to next StgEntCounter - ] - } - --- ----------------------------------------------------------------------------- --- Ticky stack frames - -tickyPushUpdateFrame, tickyUpdateFrameOmitted :: FCode () -tickyPushUpdateFrame = ifTicky $ bumpTickyCounter (fsLit "UPDF_PUSHED_ctr") -tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter (fsLit "UPDF_OMITTED_ctr") - --- ----------------------------------------------------------------------------- --- Ticky entries - --- NB the name-specific entries are only available for names that have --- dedicated Cmm code. As far as I know, this just rules out --- constructor thunks. For them, there is no CMM code block to put the --- bump of name-specific ticky counter into. On the other hand, we can --- still track allocation their allocation. - -tickyEnterDynCon, tickyEnterStaticCon, tickyEnterViaNode :: FCode () -tickyEnterDynCon = ifTicky $ bumpTickyCounter (fsLit "ENT_DYN_CON_ctr") -tickyEnterStaticCon = ifTicky $ bumpTickyCounter (fsLit "ENT_STATIC_CON_ctr") -tickyEnterViaNode = ifTicky $ bumpTickyCounter (fsLit "ENT_VIA_NODE_ctr") - -tickyEnterThunk :: ClosureInfo -> FCode () -tickyEnterThunk cl_info - = ifTicky $ do - { bumpTickyCounter ctr - ; has_ctr <- thunkHasCounter static - ; when has_ctr $ do - ticky_ctr_lbl <- getTickyCtrLabel - registerTickyCtrAtEntryDyn ticky_ctr_lbl - bumpTickyEntryCount ticky_ctr_lbl } - where - updatable = closureSingleEntry cl_info - static = isStaticClosure cl_info - - ctr | static = if updatable then fsLit "ENT_STATIC_THK_SINGLE_ctr" - else fsLit "ENT_STATIC_THK_MANY_ctr" - | otherwise = if updatable then fsLit "ENT_DYN_THK_SINGLE_ctr" - else fsLit "ENT_DYN_THK_MANY_ctr" - -tickyEnterStdThunk :: ClosureInfo -> FCode () -tickyEnterStdThunk = tickyEnterThunk - -tickyBlackHole :: Bool{-updatable-} -> FCode () -tickyBlackHole updatable - = ifTicky (bumpTickyCounter ctr) - where - ctr | updatable = (fsLit "UPD_BH_SINGLE_ENTRY_ctr") - | otherwise = (fsLit "UPD_BH_UPDATABLE_ctr") - -tickyUpdateBhCaf :: ClosureInfo -> FCode () -tickyUpdateBhCaf cl_info - = ifTicky (bumpTickyCounter ctr) - where - ctr | closureUpdReqd cl_info = (fsLit "UPD_CAF_BH_SINGLE_ENTRY_ctr") - | otherwise = (fsLit "UPD_CAF_BH_UPDATABLE_ctr") - -tickyEnterFun :: ClosureInfo -> FCode () -tickyEnterFun cl_info = ifTicky $ do - ctr_lbl <- getTickyCtrLabel - - if isStaticClosure cl_info - then do bumpTickyCounter (fsLit "ENT_STATIC_FUN_DIRECT_ctr") - registerTickyCtr ctr_lbl - else do bumpTickyCounter (fsLit "ENT_DYN_FUN_DIRECT_ctr") - registerTickyCtrAtEntryDyn ctr_lbl - - bumpTickyEntryCount ctr_lbl - -tickyEnterLNE :: FCode () -tickyEnterLNE = ifTicky $ do - bumpTickyCounter (fsLit "ENT_LNE_ctr") - ifTickyLNE $ do - ctr_lbl <- getTickyCtrLabel - registerTickyCtr ctr_lbl - bumpTickyEntryCount ctr_lbl - --- needn't register a counter upon entry if --- --- 1) it's for a dynamic closure, and --- --- 2) -ticky-allocd is on --- --- since the counter was registered already upon being alloc'd -registerTickyCtrAtEntryDyn :: CLabel -> FCode () -registerTickyCtrAtEntryDyn ctr_lbl = do - already_registered <- tickyAllocdIsOn - when (not already_registered) $ registerTickyCtr ctr_lbl - -registerTickyCtr :: CLabel -> FCode () --- Register a ticky counter --- if ( ! f_ct.registeredp ) { --- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */ --- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */ --- f_ct.registeredp = 1 } -registerTickyCtr ctr_lbl = do - dflags <- getDynFlags - let - -- krc: code generator doesn't handle Not, so we test for Eq 0 instead - test = CmmMachOp (MO_Eq (wordWidth dflags)) - [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl - (oFFSET_StgEntCounter_registeredp dflags))) (bWord dflags), - zeroExpr dflags] - register_stmts - = [ mkStore (CmmLit (cmmLabelOffB ctr_lbl (oFFSET_StgEntCounter_link dflags))) - (CmmLoad ticky_entry_ctrs (bWord dflags)) - , mkStore ticky_entry_ctrs (mkLblExpr ctr_lbl) - , mkStore (CmmLit (cmmLabelOffB ctr_lbl - (oFFSET_StgEntCounter_registeredp dflags))) - (mkIntExpr dflags 1) ] - ticky_entry_ctrs = mkLblExpr (mkCmmDataLabel rtsUnitId (fsLit "ticky_entry_ctrs")) - emit =<< mkCmmIfThen test (catAGraphs register_stmts) - -tickyReturnOldCon, tickyReturnNewCon :: RepArity -> FCode () -tickyReturnOldCon arity - = ifTicky $ do { bumpTickyCounter (fsLit "RET_OLD_ctr") - ; bumpHistogram (fsLit "RET_OLD_hst") arity } -tickyReturnNewCon arity - = ifTicky $ do { bumpTickyCounter (fsLit "RET_NEW_ctr") - ; bumpHistogram (fsLit "RET_NEW_hst") arity } - -tickyUnboxedTupleReturn :: RepArity -> FCode () -tickyUnboxedTupleReturn arity - = ifTicky $ do { bumpTickyCounter (fsLit "RET_UNBOXED_TUP_ctr") - ; bumpHistogram (fsLit "RET_UNBOXED_TUP_hst") arity } - --- ----------------------------------------------------------------------------- --- Ticky calls - --- Ticks at a *call site*: -tickyDirectCall :: RepArity -> [StgArg] -> FCode () -tickyDirectCall arity args - | args `lengthIs` arity = tickyKnownCallExact - | otherwise = do tickyKnownCallExtraArgs - tickySlowCallPat (map argPrimRep (drop arity args)) - -tickyKnownCallTooFewArgs :: FCode () -tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_TOO_FEW_ARGS_ctr") - -tickyKnownCallExact :: FCode () -tickyKnownCallExact = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_ctr") - -tickyKnownCallExtraArgs :: FCode () -tickyKnownCallExtraArgs = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_EXTRA_ARGS_ctr") - -tickyUnknownCall :: FCode () -tickyUnknownCall = ifTicky $ bumpTickyCounter (fsLit "UNKNOWN_CALL_ctr") - --- Tick for the call pattern at slow call site (i.e. in addition to --- tickyUnknownCall, tickyKnownCallExtraArgs, etc.) -tickySlowCall :: LambdaFormInfo -> [StgArg] -> FCode () -tickySlowCall _ [] = return () -tickySlowCall lf_info args = do - -- see Note [Ticky for slow calls] - if isKnownFun lf_info - then tickyKnownCallTooFewArgs - else tickyUnknownCall - tickySlowCallPat (map argPrimRep args) - -tickySlowCallPat :: [PrimRep] -> FCode () -tickySlowCallPat args = ifTicky $ - let argReps = map toArgRep args - (_, n_matched) = slowCallPattern argReps - in if n_matched > 0 && args `lengthIs` n_matched - then bumpTickyLbl $ mkRtsSlowFastTickyCtrLabel $ concatMap (map Data.Char.toLower . argRepString) argReps - else bumpTickyCounter $ fsLit "VERY_SLOW_CALL_ctr" - -{- - -Note [Ticky for slow calls] -~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Terminology is unfortunately a bit mixed up for these calls. codeGen -uses "slow call" to refer to unknown calls and under-saturated known -calls. - -Nowadays, though (ie as of the eval/apply paper), the significantly -slower calls are actually just a subset of these: the ones with no -built-in argument pattern (cf StgCmmArgRep.slowCallPattern) - -So for ticky profiling, we split slow calls into -"SLOW_CALL_fast_<pattern>_ctr" (those matching a built-in pattern) and -VERY_SLOW_CALL_ctr (those without a built-in pattern; these are very -bad for both space and time). - --} - --- ----------------------------------------------------------------------------- --- Ticky allocation - -tickyDynAlloc :: Maybe Id -> SMRep -> LambdaFormInfo -> FCode () --- Called when doing a dynamic heap allocation; the LambdaFormInfo --- used to distinguish between closure types --- --- TODO what else to count while we're here? -tickyDynAlloc mb_id rep lf = ifTicky $ getDynFlags >>= \dflags -> - let bytes = wORD_SIZE dflags * heapClosureSizeW dflags rep - - countGlobal tot ctr = do - bumpTickyCounterBy tot bytes - bumpTickyCounter ctr - countSpecific = ifTickyAllocd $ case mb_id of - Nothing -> return () - Just id -> do - let ctr_lbl = mkRednCountsLabel (idName id) - registerTickyCtr ctr_lbl - bumpTickyAllocd ctr_lbl bytes - - -- TODO are we still tracking "good stuff" (_gds) versus - -- administrative (_adm) versus slop (_slp)? I'm going with all _gds - -- for now, since I don't currently know neither if we do nor how to - -- distinguish. NSF Mar 2013 - - in case () of - _ | isConRep rep -> - ifTickyDynThunk countSpecific >> - countGlobal (fsLit "ALLOC_CON_gds") (fsLit "ALLOC_CON_ctr") - | isThunkRep rep -> - ifTickyDynThunk countSpecific >> - if lfUpdatable lf - then countGlobal (fsLit "ALLOC_THK_gds") (fsLit "ALLOC_UP_THK_ctr") - else countGlobal (fsLit "ALLOC_THK_gds") (fsLit "ALLOC_SE_THK_ctr") - | isFunRep rep -> - countSpecific >> - countGlobal (fsLit "ALLOC_FUN_gds") (fsLit "ALLOC_FUN_ctr") - | otherwise -> panic "How is this heap object not a con, thunk, or fun?" - - - -tickyAllocHeap :: - Bool -> -- is this a genuine allocation? As opposed to - -- StgCmmLayout.adjustHpBackwards - VirtualHpOffset -> FCode () --- Called when doing a heap check [TICK_ALLOC_HEAP] --- Must be lazy in the amount of allocation! -tickyAllocHeap genuine hp - = ifTicky $ - do { dflags <- getDynFlags - ; ticky_ctr <- getTickyCtrLabel - ; emit $ catAGraphs $ - -- only test hp from within the emit so that the monadic - -- computation itself is not strict in hp (cf knot in - -- StgCmmMonad.getHeapUsage) - if hp == 0 then [] - else let !bytes = wORD_SIZE dflags * hp in [ - -- Bump the allocation total in the closure's StgEntCounter - addToMem (rEP_StgEntCounter_allocs dflags) - (CmmLit (cmmLabelOffB ticky_ctr (oFFSET_StgEntCounter_allocs dflags))) - bytes, - -- Bump the global allocation total ALLOC_HEAP_tot - addToMemLbl (bWord dflags) - (mkCmmDataLabel rtsUnitId (fsLit "ALLOC_HEAP_tot")) - bytes, - -- Bump the global allocation counter ALLOC_HEAP_ctr - if not genuine then mkNop - else addToMemLbl (bWord dflags) - (mkCmmDataLabel rtsUnitId (fsLit "ALLOC_HEAP_ctr")) - 1 - ]} - - --------------------------------------------------------------------------------- --- these three are only called from CmmParse.y (ie ultimately from the RTS) - --- the units are bytes - -tickyAllocPrim :: CmmExpr -- ^ size of the full header, in bytes - -> CmmExpr -- ^ size of the payload, in bytes - -> CmmExpr -> FCode () -tickyAllocPrim _hdr _goods _slop = ifTicky $ do - bumpTickyCounter (fsLit "ALLOC_PRIM_ctr") - bumpTickyCounterByE (fsLit "ALLOC_PRIM_adm") _hdr - bumpTickyCounterByE (fsLit "ALLOC_PRIM_gds") _goods - bumpTickyCounterByE (fsLit "ALLOC_PRIM_slp") _slop - -tickyAllocThunk :: CmmExpr -> CmmExpr -> FCode () -tickyAllocThunk _goods _slop = ifTicky $ do - -- TODO is it ever called with a Single-Entry thunk? - bumpTickyCounter (fsLit "ALLOC_UP_THK_ctr") - bumpTickyCounterByE (fsLit "ALLOC_THK_gds") _goods - bumpTickyCounterByE (fsLit "ALLOC_THK_slp") _slop - -tickyAllocPAP :: CmmExpr -> CmmExpr -> FCode () -tickyAllocPAP _goods _slop = ifTicky $ do - bumpTickyCounter (fsLit "ALLOC_PAP_ctr") - bumpTickyCounterByE (fsLit "ALLOC_PAP_gds") _goods - bumpTickyCounterByE (fsLit "ALLOC_PAP_slp") _slop - -tickyHeapCheck :: FCode () -tickyHeapCheck = ifTicky $ bumpTickyCounter (fsLit "HEAP_CHK_ctr") - -tickyStackCheck :: FCode () -tickyStackCheck = ifTicky $ bumpTickyCounter (fsLit "STK_CHK_ctr") - --- ----------------------------------------------------------------------------- --- Ticky utils - -ifTicky :: FCode () -> FCode () -ifTicky code = - getDynFlags >>= \dflags -> when (gopt Opt_Ticky dflags) code - -tickyAllocdIsOn :: FCode Bool -tickyAllocdIsOn = gopt Opt_Ticky_Allocd `fmap` getDynFlags - -tickyLNEIsOn :: FCode Bool -tickyLNEIsOn = gopt Opt_Ticky_LNE `fmap` getDynFlags - -tickyDynThunkIsOn :: FCode Bool -tickyDynThunkIsOn = gopt Opt_Ticky_Dyn_Thunk `fmap` getDynFlags - -ifTickyAllocd :: FCode () -> FCode () -ifTickyAllocd code = tickyAllocdIsOn >>= \b -> when b code - -ifTickyLNE :: FCode () -> FCode () -ifTickyLNE code = tickyLNEIsOn >>= \b -> when b code - -ifTickyDynThunk :: FCode () -> FCode () -ifTickyDynThunk code = tickyDynThunkIsOn >>= \b -> when b code - -bumpTickyCounter :: FastString -> FCode () -bumpTickyCounter lbl = bumpTickyLbl (mkCmmDataLabel rtsUnitId lbl) - -bumpTickyCounterBy :: FastString -> Int -> FCode () -bumpTickyCounterBy lbl = bumpTickyLblBy (mkCmmDataLabel rtsUnitId lbl) - -bumpTickyCounterByE :: FastString -> CmmExpr -> FCode () -bumpTickyCounterByE lbl = bumpTickyLblByE (mkCmmDataLabel rtsUnitId lbl) - -bumpTickyEntryCount :: CLabel -> FCode () -bumpTickyEntryCount lbl = do - dflags <- getDynFlags - bumpTickyLit (cmmLabelOffB lbl (oFFSET_StgEntCounter_entry_count dflags)) - -bumpTickyAllocd :: CLabel -> Int -> FCode () -bumpTickyAllocd lbl bytes = do - dflags <- getDynFlags - bumpTickyLitBy (cmmLabelOffB lbl (oFFSET_StgEntCounter_allocd dflags)) bytes - -bumpTickyLbl :: CLabel -> FCode () -bumpTickyLbl lhs = bumpTickyLitBy (cmmLabelOffB lhs 0) 1 - -bumpTickyLblBy :: CLabel -> Int -> FCode () -bumpTickyLblBy lhs = bumpTickyLitBy (cmmLabelOffB lhs 0) - -bumpTickyLblByE :: CLabel -> CmmExpr -> FCode () -bumpTickyLblByE lhs = bumpTickyLitByE (cmmLabelOffB lhs 0) - -bumpTickyLit :: CmmLit -> FCode () -bumpTickyLit lhs = bumpTickyLitBy lhs 1 - -bumpTickyLitBy :: CmmLit -> Int -> FCode () -bumpTickyLitBy lhs n = do - dflags <- getDynFlags - emit (addToMem (bWord dflags) (CmmLit lhs) n) - -bumpTickyLitByE :: CmmLit -> CmmExpr -> FCode () -bumpTickyLitByE lhs e = do - dflags <- getDynFlags - emit (addToMemE (bWord dflags) (CmmLit lhs) e) - -bumpHistogram :: FastString -> Int -> FCode () -bumpHistogram lbl n = do - dflags <- getDynFlags - let offset = n `min` (tICKY_BIN_COUNT dflags - 1) - emit (addToMem (bWord dflags) - (cmmIndexExpr dflags - (wordWidth dflags) - (CmmLit (CmmLabel (mkCmmDataLabel rtsUnitId lbl))) - (CmmLit (CmmInt (fromIntegral offset) (wordWidth dflags)))) - 1) - ------------------------------------------------------------------- --- Showing the "type category" for ticky-ticky profiling - -showTypeCategory :: Type -> Char - {- - + dictionary - - > function - - {C,I,F,D,W} char, int, float, double, word - {c,i,f,d,w} unboxed ditto - - T tuple - - P other primitive type - p unboxed ditto - - L list - E enumeration type - S other single-constructor type - M other multi-constructor data-con type - - . other type - - - reserved for others to mark as "uninteresting" - - Accurate as of Mar 2013, but I eliminated the Array category instead - of updating it, for simplicity. It's in P/p, I think --NSF - - -} -showTypeCategory ty - | isDictTy ty = '+' - | otherwise = case tcSplitTyConApp_maybe ty of - Nothing -> '.' - Just (tycon, _) -> - (if isUnliftedTyCon tycon then Data.Char.toLower else id) $ - let anyOf us = getUnique tycon `elem` us in - case () of - _ | anyOf [funTyConKey] -> '>' - | anyOf [charPrimTyConKey, charTyConKey] -> 'C' - | anyOf [doublePrimTyConKey, doubleTyConKey] -> 'D' - | anyOf [floatPrimTyConKey, floatTyConKey] -> 'F' - | anyOf [intPrimTyConKey, int32PrimTyConKey, int64PrimTyConKey, - intTyConKey, int8TyConKey, int16TyConKey, int32TyConKey, int64TyConKey - ] -> 'I' - | anyOf [wordPrimTyConKey, word32PrimTyConKey, word64PrimTyConKey, wordTyConKey, - word8TyConKey, word16TyConKey, word32TyConKey, word64TyConKey - ] -> 'W' - | anyOf [listTyConKey] -> 'L' - | isTupleTyCon tycon -> 'T' - | isPrimTyCon tycon -> 'P' - | isEnumerationTyCon tycon -> 'E' - | isJust (tyConSingleDataCon_maybe tycon) -> 'S' - | otherwise -> 'M' -- oh, well... diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs deleted file mode 100644 index 766584e2c9..0000000000 --- a/compiler/codeGen/StgCmmUtils.hs +++ /dev/null @@ -1,578 +0,0 @@ -{-# LANGUAGE CPP #-} - ------------------------------------------------------------------------------ --- --- Code generator utilities; mostly monadic --- --- (c) The University of Glasgow 2004-2006 --- ------------------------------------------------------------------------------ - -module StgCmmUtils ( - cgLit, mkSimpleLit, - emitDataLits, mkDataLits, - emitRODataLits, mkRODataLits, - emitRtsCall, emitRtsCallWithResult, emitRtsCallGen, - assignTemp, newTemp, - - newUnboxedTupleRegs, - - emitMultiAssign, emitCmmLitSwitch, emitSwitch, - - tagToClosure, mkTaggedObjectLoad, - - callerSaves, callerSaveVolatileRegs, get_GlobalReg_addr, - - cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord, - cmmUGtWord, cmmSubWord, cmmMulWord, cmmAddWord, cmmUShrWord, - cmmOffsetExprW, cmmOffsetExprB, - cmmRegOffW, cmmRegOffB, - cmmLabelOffW, cmmLabelOffB, - cmmOffsetW, cmmOffsetB, - cmmOffsetLitW, cmmOffsetLitB, - cmmLoadIndexW, - cmmConstrTag1, - - cmmUntag, cmmIsTagged, - - addToMem, addToMemE, addToMemLblE, addToMemLbl, - mkWordCLit, - newStringCLit, newByteStringCLit, - blankWord, - ) where - -#include "HsVersions.h" - -import GhcPrelude - -import StgCmmMonad -import StgCmmClosure -import Cmm -import BlockId -import MkGraph -import CodeGen.Platform -import CLabel -import CmmUtils -import CmmSwitch -import CgUtils - -import ForeignCall -import IdInfo -import Type -import TyCon -import SMRep -import Module -import Literal -import Digraph -import Util -import Unique -import UniqSupply (MonadUnique(..)) -import DynFlags -import FastString -import Outputable -import RepType - -import Data.ByteString (ByteString) -import qualified Data.ByteString.Char8 as BS8 -import qualified Data.Map as M -import Data.Char -import Data.List -import Data.Ord - - -------------------------------------------------------------------------- --- --- Literals --- -------------------------------------------------------------------------- - -cgLit :: Literal -> FCode CmmLit -cgLit (LitString s) = newByteStringCLit s - -- not unpackFS; we want the UTF-8 byte stream. -cgLit other_lit = do dflags <- getDynFlags - return (mkSimpleLit dflags other_lit) - -mkSimpleLit :: DynFlags -> Literal -> CmmLit -mkSimpleLit dflags (LitChar c) = CmmInt (fromIntegral (ord c)) - (wordWidth dflags) -mkSimpleLit dflags LitNullAddr = zeroCLit dflags -mkSimpleLit dflags (LitNumber LitNumInt i _) = CmmInt i (wordWidth dflags) -mkSimpleLit _ (LitNumber LitNumInt64 i _) = CmmInt i W64 -mkSimpleLit dflags (LitNumber LitNumWord i _) = CmmInt i (wordWidth dflags) -mkSimpleLit _ (LitNumber LitNumWord64 i _) = CmmInt i W64 -mkSimpleLit _ (LitFloat r) = CmmFloat r W32 -mkSimpleLit _ (LitDouble r) = CmmFloat r W64 -mkSimpleLit _ (LitLabel fs ms fod) - = let -- TODO: Literal labels might not actually be in the current package... - labelSrc = ForeignLabelInThisPackage - in CmmLabel (mkForeignLabel fs ms labelSrc fod) --- NB: LitRubbish should have been lowered in "CoreToStg" -mkSimpleLit _ other = pprPanic "mkSimpleLit" (ppr other) - --------------------------------------------------------------------------- --- --- Incrementing a memory location --- --------------------------------------------------------------------------- - -addToMemLbl :: CmmType -> CLabel -> Int -> CmmAGraph -addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n - -addToMemLblE :: CmmType -> CLabel -> CmmExpr -> CmmAGraph -addToMemLblE rep lbl = addToMemE rep (CmmLit (CmmLabel lbl)) - -addToMem :: CmmType -- rep of the counter - -> CmmExpr -- Address - -> Int -- What to add (a word) - -> CmmAGraph -addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) (typeWidth rep))) - -addToMemE :: CmmType -- rep of the counter - -> CmmExpr -- Address - -> CmmExpr -- What to add (a word-typed expression) - -> CmmAGraph -addToMemE rep ptr n - = mkStore ptr (CmmMachOp (MO_Add (typeWidth rep)) [CmmLoad ptr rep, n]) - - -------------------------------------------------------------------------- --- --- Loading a field from an object, --- where the object pointer is itself tagged --- -------------------------------------------------------------------------- - -mkTaggedObjectLoad - :: DynFlags -> LocalReg -> LocalReg -> ByteOff -> DynTag -> CmmAGraph --- (loadTaggedObjectField reg base off tag) generates assignment --- reg = bitsK[ base + off - tag ] --- where K is fixed by 'reg' -mkTaggedObjectLoad dflags reg base offset tag - = mkAssign (CmmLocal reg) - (CmmLoad (cmmOffsetB dflags - (CmmReg (CmmLocal base)) - (offset - tag)) - (localRegType reg)) - -------------------------------------------------------------------------- --- --- Converting a closure tag to a closure for enumeration types --- (this is the implementation of tagToEnum#). --- -------------------------------------------------------------------------- - -tagToClosure :: DynFlags -> TyCon -> CmmExpr -> CmmExpr -tagToClosure dflags tycon tag - = CmmLoad (cmmOffsetExprW dflags closure_tbl tag) (bWord dflags) - where closure_tbl = CmmLit (CmmLabel lbl) - lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs - -------------------------------------------------------------------------- --- --- Conditionals and rts calls --- -------------------------------------------------------------------------- - -emitRtsCall :: UnitId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () -emitRtsCall pkg fun args safe = emitRtsCallGen [] (mkCmmCodeLabel pkg fun) args safe - -emitRtsCallWithResult :: LocalReg -> ForeignHint -> UnitId -> FastString - -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () -emitRtsCallWithResult res hint pkg fun args safe - = emitRtsCallGen [(res,hint)] (mkCmmCodeLabel pkg fun) args safe - --- Make a call to an RTS C procedure -emitRtsCallGen - :: [(LocalReg,ForeignHint)] - -> CLabel - -> [(CmmExpr,ForeignHint)] - -> Bool -- True <=> CmmSafe call - -> FCode () -emitRtsCallGen res lbl args safe - = do { dflags <- getDynFlags - ; updfr_off <- getUpdFrameOff - ; let (caller_save, caller_load) = callerSaveVolatileRegs dflags - ; emit caller_save - ; call updfr_off - ; emit caller_load } - where - call updfr_off = - if safe then - emit =<< mkCmmCall fun_expr res' args' updfr_off - else do - let conv = ForeignConvention CCallConv arg_hints res_hints CmmMayReturn - emit $ mkUnsafeCall (ForeignTarget fun_expr conv) res' args' - (args', arg_hints) = unzip args - (res', res_hints) = unzip res - fun_expr = mkLblExpr lbl - - ------------------------------------------------------------------------------ --- --- Caller-Save Registers --- ------------------------------------------------------------------------------ - --- Here we generate the sequence of saves/restores required around a --- foreign call instruction. - --- TODO: reconcile with includes/Regs.h --- * Regs.h claims that BaseReg should be saved last and loaded first --- * This might not have been tickled before since BaseReg is callee save --- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim --- --- This code isn't actually used right now, because callerSaves --- only ever returns true in the current universe for registers NOT in --- system_regs (just do a grep for CALLER_SAVES in --- includes/stg/MachRegs.h). It's all one giant no-op, and for --- good reason: having to save system registers on every foreign call --- would be very expensive, so we avoid assigning them to those --- registers when we add support for an architecture. --- --- Note that the old code generator actually does more work here: it --- also saves other global registers. We can't (nor want) to do that --- here, as we don't have liveness information. And really, we --- shouldn't be doing the workaround at this point in the pipeline, see --- Note [Register parameter passing] and the ToDo on CmmCall in --- cmm/CmmNode.hs. Right now the workaround is to avoid inlining across --- unsafe foreign calls in rewriteAssignments, but this is strictly --- temporary. -callerSaveVolatileRegs :: DynFlags -> (CmmAGraph, CmmAGraph) -callerSaveVolatileRegs dflags = (caller_save, caller_load) - where - platform = targetPlatform dflags - - caller_save = catAGraphs (map callerSaveGlobalReg regs_to_save) - caller_load = catAGraphs (map callerRestoreGlobalReg regs_to_save) - - system_regs = [ Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery - {- ,SparkHd,SparkTl,SparkBase,SparkLim -} - , BaseReg ] - - regs_to_save = filter (callerSaves platform) system_regs - - callerSaveGlobalReg reg - = mkStore (get_GlobalReg_addr dflags reg) (CmmReg (CmmGlobal reg)) - - callerRestoreGlobalReg reg - = mkAssign (CmmGlobal reg) - (CmmLoad (get_GlobalReg_addr dflags reg) (globalRegType dflags reg)) - - -------------------------------------------------------------------------- --- --- Strings generate a top-level data block --- -------------------------------------------------------------------------- - -emitDataLits :: CLabel -> [CmmLit] -> FCode () --- Emit a data-segment data block -emitDataLits lbl lits = emitDecl (mkDataLits (Section Data lbl) lbl lits) - -emitRODataLits :: CLabel -> [CmmLit] -> FCode () --- Emit a read-only data block -emitRODataLits lbl lits = emitDecl (mkRODataLits lbl lits) - -newStringCLit :: String -> FCode CmmLit --- Make a global definition for the string, --- and return its label -newStringCLit str = newByteStringCLit (BS8.pack str) - -newByteStringCLit :: ByteString -> FCode CmmLit -newByteStringCLit bytes - = do { uniq <- newUnique - ; let (lit, decl) = mkByteStringCLit (mkStringLitLabel uniq) bytes - ; emitDecl decl - ; return lit } - -------------------------------------------------------------------------- --- --- Assigning expressions to temporaries --- -------------------------------------------------------------------------- - -assignTemp :: CmmExpr -> FCode LocalReg --- Make sure the argument is in a local register. --- We don't bother being particularly aggressive with avoiding --- unnecessary local registers, since we can rely on a later --- optimization pass to inline as necessary (and skipping out --- on things like global registers can be a little dangerous --- due to them being trashed on foreign calls--though it means --- the optimization pass doesn't have to do as much work) -assignTemp (CmmReg (CmmLocal reg)) = return reg -assignTemp e = do { dflags <- getDynFlags - ; uniq <- newUnique - ; let reg = LocalReg uniq (cmmExprType dflags e) - ; emitAssign (CmmLocal reg) e - ; return reg } - -newTemp :: MonadUnique m => CmmType -> m LocalReg -newTemp rep = do { uniq <- getUniqueM - ; return (LocalReg uniq rep) } - -newUnboxedTupleRegs :: Type -> FCode ([LocalReg], [ForeignHint]) --- Choose suitable local regs to use for the components --- of an unboxed tuple that we are about to return to --- the Sequel. If the Sequel is a join point, using the --- regs it wants will save later assignments. -newUnboxedTupleRegs res_ty - = ASSERT( isUnboxedTupleType res_ty ) - do { dflags <- getDynFlags - ; sequel <- getSequel - ; regs <- choose_regs dflags sequel - ; ASSERT( regs `equalLength` reps ) - return (regs, map primRepForeignHint reps) } - where - reps = typePrimRep res_ty - choose_regs _ (AssignTo regs _) = return regs - choose_regs dflags _ = mapM (newTemp . primRepCmmType dflags) reps - - - -------------------------------------------------------------------------- --- emitMultiAssign -------------------------------------------------------------------------- - -emitMultiAssign :: [LocalReg] -> [CmmExpr] -> FCode () --- Emit code to perform the assignments in the --- input simultaneously, using temporary variables when necessary. - -type Key = Int -type Vrtx = (Key, Stmt) -- Give each vertex a unique number, - -- for fast comparison -type Stmt = (LocalReg, CmmExpr) -- r := e - --- We use the strongly-connected component algorithm, in which --- * the vertices are the statements --- * an edge goes from s1 to s2 iff --- s1 assigns to something s2 uses --- that is, if s1 should *follow* s2 in the final order - -emitMultiAssign [] [] = return () -emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs -emitMultiAssign regs rhss = do - dflags <- getDynFlags - ASSERT2( equalLength regs rhss, ppr regs $$ ppr rhss ) - unscramble dflags ([1..] `zip` (regs `zip` rhss)) - -unscramble :: DynFlags -> [Vrtx] -> FCode () -unscramble dflags vertices = mapM_ do_component components - where - edges :: [ Node Key Vrtx ] - edges = [ DigraphNode vertex key1 (edges_from stmt1) - | vertex@(key1, stmt1) <- vertices ] - - edges_from :: Stmt -> [Key] - edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices, - stmt1 `mustFollow` stmt2 ] - - components :: [SCC Vrtx] - components = stronglyConnCompFromEdgedVerticesUniq edges - - -- do_components deal with one strongly-connected component - -- Not cyclic, or singleton? Just do it - do_component :: SCC Vrtx -> FCode () - do_component (AcyclicSCC (_,stmt)) = mk_graph stmt - do_component (CyclicSCC []) = panic "do_component" - do_component (CyclicSCC [(_,stmt)]) = mk_graph stmt - - -- Cyclic? Then go via temporaries. Pick one to - -- break the loop and try again with the rest. - do_component (CyclicSCC ((_,first_stmt) : rest)) = do - dflags <- getDynFlags - u <- newUnique - let (to_tmp, from_tmp) = split dflags u first_stmt - mk_graph to_tmp - unscramble dflags rest - mk_graph from_tmp - - split :: DynFlags -> Unique -> Stmt -> (Stmt, Stmt) - split dflags uniq (reg, rhs) - = ((tmp, rhs), (reg, CmmReg (CmmLocal tmp))) - where - rep = cmmExprType dflags rhs - tmp = LocalReg uniq rep - - mk_graph :: Stmt -> FCode () - mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs - - mustFollow :: Stmt -> Stmt -> Bool - (reg, _) `mustFollow` (_, rhs) = regUsedIn dflags (CmmLocal reg) rhs - -------------------------------------------------------------------------- --- mkSwitch -------------------------------------------------------------------------- - - -emitSwitch :: CmmExpr -- Tag to switch on - -> [(ConTagZ, CmmAGraphScoped)] -- Tagged branches - -> Maybe CmmAGraphScoped -- Default branch (if any) - -> ConTagZ -> ConTagZ -- Min and Max possible values; - -- behaviour outside this range is - -- undefined - -> FCode () - --- First, two rather common cases in which there is no work to do -emitSwitch _ [] (Just code) _ _ = emit (fst code) -emitSwitch _ [(_,code)] Nothing _ _ = emit (fst code) - --- Right, off we go -emitSwitch tag_expr branches mb_deflt lo_tag hi_tag = do - join_lbl <- newBlockId - mb_deflt_lbl <- label_default join_lbl mb_deflt - branches_lbls <- label_branches join_lbl branches - tag_expr' <- assignTemp' tag_expr - - -- Sort the branches before calling mk_discrete_switch - let branches_lbls' = [ (fromIntegral i, l) | (i,l) <- sortBy (comparing fst) branches_lbls ] - let range = (fromIntegral lo_tag, fromIntegral hi_tag) - - emit $ mk_discrete_switch False tag_expr' branches_lbls' mb_deflt_lbl range - - emitLabel join_lbl - -mk_discrete_switch :: Bool -- ^ Use signed comparisons - -> CmmExpr - -> [(Integer, BlockId)] - -> Maybe BlockId - -> (Integer, Integer) - -> CmmAGraph - --- SINGLETON TAG RANGE: no case analysis to do -mk_discrete_switch _ _tag_expr [(tag, lbl)] _ (lo_tag, hi_tag) - | lo_tag == hi_tag - = ASSERT( tag == lo_tag ) - mkBranch lbl - --- SINGLETON BRANCH, NO DEFAULT: no case analysis to do -mk_discrete_switch _ _tag_expr [(_tag,lbl)] Nothing _ - = mkBranch lbl - -- The simplifier might have eliminated a case - -- so we may have e.g. case xs of - -- [] -> e - -- In that situation we can be sure the (:) case - -- can't happen, so no need to test - --- SOMETHING MORE COMPLICATED: defer to CmmImplementSwitchPlans --- See Note [Cmm Switches, the general plan] in CmmSwitch -mk_discrete_switch signed tag_expr branches mb_deflt range - = mkSwitch tag_expr $ mkSwitchTargets signed range mb_deflt (M.fromList branches) - -divideBranches :: Ord a => [(a,b)] -> ([(a,b)], a, [(a,b)]) -divideBranches branches = (lo_branches, mid, hi_branches) - where - -- 2 branches => n_branches `div` 2 = 1 - -- => branches !! 1 give the *second* tag - -- There are always at least 2 branches here - (mid,_) = branches !! (length branches `div` 2) - (lo_branches, hi_branches) = span is_lo branches - is_lo (t,_) = t < mid - --------------- -emitCmmLitSwitch :: CmmExpr -- Tag to switch on - -> [(Literal, CmmAGraphScoped)] -- Tagged branches - -> CmmAGraphScoped -- Default branch (always) - -> FCode () -- Emit the code -emitCmmLitSwitch _scrut [] deflt = emit $ fst deflt -emitCmmLitSwitch scrut branches deflt = do - scrut' <- assignTemp' scrut - join_lbl <- newBlockId - deflt_lbl <- label_code join_lbl deflt - branches_lbls <- label_branches join_lbl branches - - dflags <- getDynFlags - let cmm_ty = cmmExprType dflags scrut - rep = typeWidth cmm_ty - - -- We find the necessary type information in the literals in the branches - let signed = case head branches of - (LitNumber nt _ _, _) -> litNumIsSigned nt - _ -> False - - let range | signed = (tARGET_MIN_INT dflags, tARGET_MAX_INT dflags) - | otherwise = (0, tARGET_MAX_WORD dflags) - - if isFloatType cmm_ty - then emit =<< mk_float_switch rep scrut' deflt_lbl noBound branches_lbls - else emit $ mk_discrete_switch - signed - scrut' - [(litValue lit,l) | (lit,l) <- branches_lbls] - (Just deflt_lbl) - range - emitLabel join_lbl - --- | lower bound (inclusive), upper bound (exclusive) -type LitBound = (Maybe Literal, Maybe Literal) - -noBound :: LitBound -noBound = (Nothing, Nothing) - -mk_float_switch :: Width -> CmmExpr -> BlockId - -> LitBound - -> [(Literal,BlockId)] - -> FCode CmmAGraph -mk_float_switch rep scrut deflt _bounds [(lit,blk)] - = do dflags <- getDynFlags - return $ mkCbranch (cond dflags) deflt blk Nothing - where - cond dflags = CmmMachOp ne [scrut, CmmLit cmm_lit] - where - cmm_lit = mkSimpleLit dflags lit - ne = MO_F_Ne rep - -mk_float_switch rep scrut deflt_blk_id (lo_bound, hi_bound) branches - = do dflags <- getDynFlags - lo_blk <- mk_float_switch rep scrut deflt_blk_id bounds_lo lo_branches - hi_blk <- mk_float_switch rep scrut deflt_blk_id bounds_hi hi_branches - mkCmmIfThenElse (cond dflags) lo_blk hi_blk - where - (lo_branches, mid_lit, hi_branches) = divideBranches branches - - bounds_lo = (lo_bound, Just mid_lit) - bounds_hi = (Just mid_lit, hi_bound) - - cond dflags = CmmMachOp lt [scrut, CmmLit cmm_lit] - where - cmm_lit = mkSimpleLit dflags mid_lit - lt = MO_F_Lt rep - - --------------- -label_default :: BlockId -> Maybe CmmAGraphScoped -> FCode (Maybe BlockId) -label_default _ Nothing - = return Nothing -label_default join_lbl (Just code) - = do lbl <- label_code join_lbl code - return (Just lbl) - --------------- -label_branches :: BlockId -> [(a,CmmAGraphScoped)] -> FCode [(a,BlockId)] -label_branches _join_lbl [] - = return [] -label_branches join_lbl ((tag,code):branches) - = do lbl <- label_code join_lbl code - branches' <- label_branches join_lbl branches - return ((tag,lbl):branches') - --------------- -label_code :: BlockId -> CmmAGraphScoped -> FCode BlockId --- label_code J code --- generates --- [L: code; goto J] --- and returns L -label_code join_lbl (code,tsc) = do - lbl <- newBlockId - emitOutOfLine lbl (code MkGraph.<*> mkBranch join_lbl, tsc) - return lbl - --------------- -assignTemp' :: CmmExpr -> FCode CmmExpr -assignTemp' e - | isTrivialCmmExpr e = return e - | otherwise = do - dflags <- getDynFlags - lreg <- newTemp (cmmExprType dflags e) - let reg = CmmLocal lreg - emitAssign reg e - return (CmmReg reg) |