summaryrefslogtreecommitdiff
path: root/compiler/GHC
diff options
context:
space:
mode:
authorSylvain Henry <sylvain@haskus.fr>2019-08-13 17:26:32 +0200
committerSylvain Henry <sylvain@haskus.fr>2019-09-10 00:04:50 +0200
commit447864a94a1679b5b079e08bb7208a0005381cef (patch)
treebaa469c52620ce7ae02def3e5e6a6f109cc89f40 /compiler/GHC
parent270fbe8512f04b6107755fa22bdec62205c0a567 (diff)
downloadhaskell-447864a94a1679b5b079e08bb7208a0005381cef.tar.gz
Module hierarchy: StgToCmm (#13009)
Add StgToCmm module hierarchy. Platform modules that are used in several other places (NCG, LLVM codegen, Cmm transformations) are put into GHC.Platform.
Diffstat (limited to 'compiler/GHC')
-rw-r--r--compiler/GHC/Platform/ARM.hs10
-rw-r--r--compiler/GHC/Platform/ARM64.hs10
-rw-r--r--compiler/GHC/Platform/NoRegs.hs9
-rw-r--r--compiler/GHC/Platform/PPC.hs10
-rw-r--r--compiler/GHC/Platform/Regs.hs107
-rw-r--r--compiler/GHC/Platform/SPARC.hs10
-rw-r--r--compiler/GHC/Platform/X86.hs10
-rw-r--r--compiler/GHC/Platform/X86_64.hs10
-rw-r--r--compiler/GHC/StgToCmm.hs223
-rw-r--r--compiler/GHC/StgToCmm/ArgRep.hs160
-rw-r--r--compiler/GHC/StgToCmm/Bind.hs753
-rw-r--r--compiler/GHC/StgToCmm/Bind.hs-boot6
-rw-r--r--compiler/GHC/StgToCmm/CgUtils.hs186
-rw-r--r--compiler/GHC/StgToCmm/Closure.hs1008
-rw-r--r--compiler/GHC/StgToCmm/Con.hs285
-rw-r--r--compiler/GHC/StgToCmm/Env.hs208
-rw-r--r--compiler/GHC/StgToCmm/Expr.hs992
-rw-r--r--compiler/GHC/StgToCmm/ExtCode.hs252
-rw-r--r--compiler/GHC/StgToCmm/Foreign.hs627
-rw-r--r--compiler/GHC/StgToCmm/Heap.hs680
-rw-r--r--compiler/GHC/StgToCmm/Hpc.hs48
-rw-r--r--compiler/GHC/StgToCmm/Layout.hs623
-rw-r--r--compiler/GHC/StgToCmm/Monad.hs861
-rw-r--r--compiler/GHC/StgToCmm/Prim.hs2622
-rw-r--r--compiler/GHC/StgToCmm/Prof.hs360
-rw-r--r--compiler/GHC/StgToCmm/Ticky.hs682
-rw-r--r--compiler/GHC/StgToCmm/Utils.hs578
27 files changed, 11330 insertions, 0 deletions
diff --git a/compiler/GHC/Platform/ARM.hs b/compiler/GHC/Platform/ARM.hs
new file mode 100644
index 0000000000..d0c7e5811a
--- /dev/null
+++ b/compiler/GHC/Platform/ARM.hs
@@ -0,0 +1,10 @@
+{-# LANGUAGE CPP #-}
+
+module GHC.Platform.ARM where
+
+import GhcPrelude
+
+#define MACHREGS_NO_REGS 0
+#define MACHREGS_arm 1
+#include "../../../includes/CodeGen.Platform.hs"
+
diff --git a/compiler/GHC/Platform/ARM64.hs b/compiler/GHC/Platform/ARM64.hs
new file mode 100644
index 0000000000..ebd66b92c5
--- /dev/null
+++ b/compiler/GHC/Platform/ARM64.hs
@@ -0,0 +1,10 @@
+{-# LANGUAGE CPP #-}
+
+module GHC.Platform.ARM64 where
+
+import GhcPrelude
+
+#define MACHREGS_NO_REGS 0
+#define MACHREGS_aarch64 1
+#include "../../../includes/CodeGen.Platform.hs"
+
diff --git a/compiler/GHC/Platform/NoRegs.hs b/compiler/GHC/Platform/NoRegs.hs
new file mode 100644
index 0000000000..e8abf44253
--- /dev/null
+++ b/compiler/GHC/Platform/NoRegs.hs
@@ -0,0 +1,9 @@
+{-# LANGUAGE CPP #-}
+
+module GHC.Platform.NoRegs where
+
+import GhcPrelude
+
+#define MACHREGS_NO_REGS 1
+#include "../../../includes/CodeGen.Platform.hs"
+
diff --git a/compiler/GHC/Platform/PPC.hs b/compiler/GHC/Platform/PPC.hs
new file mode 100644
index 0000000000..f405f95438
--- /dev/null
+++ b/compiler/GHC/Platform/PPC.hs
@@ -0,0 +1,10 @@
+{-# LANGUAGE CPP #-}
+
+module GHC.Platform.PPC where
+
+import GhcPrelude
+
+#define MACHREGS_NO_REGS 0
+#define MACHREGS_powerpc 1
+#include "../../../includes/CodeGen.Platform.hs"
+
diff --git a/compiler/GHC/Platform/Regs.hs b/compiler/GHC/Platform/Regs.hs
new file mode 100644
index 0000000000..e7887fbe72
--- /dev/null
+++ b/compiler/GHC/Platform/Regs.hs
@@ -0,0 +1,107 @@
+
+module GHC.Platform.Regs
+ (callerSaves, activeStgRegs, haveRegBase, globalRegMaybe, freeReg)
+ where
+
+import GhcPrelude
+
+import CmmExpr
+import GHC.Platform
+import Reg
+
+import qualified GHC.Platform.ARM as ARM
+import qualified GHC.Platform.ARM64 as ARM64
+import qualified GHC.Platform.PPC as PPC
+import qualified GHC.Platform.SPARC as SPARC
+import qualified GHC.Platform.X86 as X86
+import qualified GHC.Platform.X86_64 as X86_64
+import qualified GHC.Platform.NoRegs as NoRegs
+
+-- | Returns 'True' if this global register is stored in a caller-saves
+-- machine register.
+
+callerSaves :: Platform -> GlobalReg -> Bool
+callerSaves platform
+ | platformUnregisterised platform = NoRegs.callerSaves
+ | otherwise
+ = case platformArch platform of
+ ArchX86 -> X86.callerSaves
+ ArchX86_64 -> X86_64.callerSaves
+ ArchSPARC -> SPARC.callerSaves
+ ArchARM {} -> ARM.callerSaves
+ ArchARM64 -> ARM64.callerSaves
+ arch
+ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
+ PPC.callerSaves
+
+ | otherwise -> NoRegs.callerSaves
+
+-- | Here is where the STG register map is defined for each target arch.
+-- The order matters (for the llvm backend anyway)! We must make sure to
+-- maintain the order here with the order used in the LLVM calling conventions.
+-- Note that also, this isn't all registers, just the ones that are currently
+-- possbily mapped to real registers.
+activeStgRegs :: Platform -> [GlobalReg]
+activeStgRegs platform
+ | platformUnregisterised platform = NoRegs.activeStgRegs
+ | otherwise
+ = case platformArch platform of
+ ArchX86 -> X86.activeStgRegs
+ ArchX86_64 -> X86_64.activeStgRegs
+ ArchSPARC -> SPARC.activeStgRegs
+ ArchARM {} -> ARM.activeStgRegs
+ ArchARM64 -> ARM64.activeStgRegs
+ arch
+ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
+ PPC.activeStgRegs
+
+ | otherwise -> NoRegs.activeStgRegs
+
+haveRegBase :: Platform -> Bool
+haveRegBase platform
+ | platformUnregisterised platform = NoRegs.haveRegBase
+ | otherwise
+ = case platformArch platform of
+ ArchX86 -> X86.haveRegBase
+ ArchX86_64 -> X86_64.haveRegBase
+ ArchSPARC -> SPARC.haveRegBase
+ ArchARM {} -> ARM.haveRegBase
+ ArchARM64 -> ARM64.haveRegBase
+ arch
+ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
+ PPC.haveRegBase
+
+ | otherwise -> NoRegs.haveRegBase
+
+globalRegMaybe :: Platform -> GlobalReg -> Maybe RealReg
+globalRegMaybe platform
+ | platformUnregisterised platform = NoRegs.globalRegMaybe
+ | otherwise
+ = case platformArch platform of
+ ArchX86 -> X86.globalRegMaybe
+ ArchX86_64 -> X86_64.globalRegMaybe
+ ArchSPARC -> SPARC.globalRegMaybe
+ ArchARM {} -> ARM.globalRegMaybe
+ ArchARM64 -> ARM64.globalRegMaybe
+ arch
+ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
+ PPC.globalRegMaybe
+
+ | otherwise -> NoRegs.globalRegMaybe
+
+freeReg :: Platform -> RegNo -> Bool
+freeReg platform
+ | platformUnregisterised platform = NoRegs.freeReg
+ | otherwise
+ = case platformArch platform of
+ ArchX86 -> X86.freeReg
+ ArchX86_64 -> X86_64.freeReg
+ ArchSPARC -> SPARC.freeReg
+ ArchARM {} -> ARM.freeReg
+ ArchARM64 -> ARM64.freeReg
+ arch
+ | arch `elem` [ArchPPC, ArchPPC_64 ELF_V1, ArchPPC_64 ELF_V2] ->
+ PPC.freeReg
+
+ | otherwise -> NoRegs.freeReg
+
diff --git a/compiler/GHC/Platform/SPARC.hs b/compiler/GHC/Platform/SPARC.hs
new file mode 100644
index 0000000000..b0cdb27f44
--- /dev/null
+++ b/compiler/GHC/Platform/SPARC.hs
@@ -0,0 +1,10 @@
+{-# LANGUAGE CPP #-}
+
+module GHC.Platform.SPARC where
+
+import GhcPrelude
+
+#define MACHREGS_NO_REGS 0
+#define MACHREGS_sparc 1
+#include "../../../includes/CodeGen.Platform.hs"
+
diff --git a/compiler/GHC/Platform/X86.hs b/compiler/GHC/Platform/X86.hs
new file mode 100644
index 0000000000..1570ba9fa0
--- /dev/null
+++ b/compiler/GHC/Platform/X86.hs
@@ -0,0 +1,10 @@
+{-# LANGUAGE CPP #-}
+
+module GHC.Platform.X86 where
+
+import GhcPrelude
+
+#define MACHREGS_NO_REGS 0
+#define MACHREGS_i386 1
+#include "../../../includes/CodeGen.Platform.hs"
+
diff --git a/compiler/GHC/Platform/X86_64.hs b/compiler/GHC/Platform/X86_64.hs
new file mode 100644
index 0000000000..d2d1b15c71
--- /dev/null
+++ b/compiler/GHC/Platform/X86_64.hs
@@ -0,0 +1,10 @@
+{-# LANGUAGE CPP #-}
+
+module GHC.Platform.X86_64 where
+
+import GhcPrelude
+
+#define MACHREGS_NO_REGS 0
+#define MACHREGS_x86_64 1
+#include "../../../includes/CodeGen.Platform.hs"
+
diff --git a/compiler/GHC/StgToCmm.hs b/compiler/GHC/StgToCmm.hs
new file mode 100644
index 0000000000..c7ee604692
--- /dev/null
+++ b/compiler/GHC/StgToCmm.hs
@@ -0,0 +1,223 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE DataKinds #-}
+
+-----------------------------------------------------------------------------
+--
+-- Stg to C-- code generation
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm ( codeGen ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude as Prelude
+
+import GHC.StgToCmm.Prof (initCostCentres, ldvEnter)
+import GHC.StgToCmm.Monad
+import GHC.StgToCmm.Env
+import GHC.StgToCmm.Bind
+import GHC.StgToCmm.Con
+import GHC.StgToCmm.Layout
+import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Closure
+import GHC.StgToCmm.Hpc
+import GHC.StgToCmm.Ticky
+
+import Cmm
+import CmmUtils
+import CLabel
+
+import StgSyn
+import DynFlags
+import ErrUtils
+
+import HscTypes
+import CostCentre
+import Id
+import IdInfo
+import RepType
+import DataCon
+import TyCon
+import Module
+import Outputable
+import Stream
+import BasicTypes
+import VarSet ( isEmptyDVarSet )
+
+import OrdList
+import MkGraph
+
+import Data.IORef
+import Control.Monad (when,void)
+import Util
+
+codeGen :: DynFlags
+ -> Module
+ -> [TyCon]
+ -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering.
+ -> [CgStgTopBinding] -- Bindings to convert
+ -> HpcInfo
+ -> Stream IO CmmGroup () -- Output as a stream, so codegen can
+ -- be interleaved with output
+
+codeGen dflags this_mod data_tycons
+ cost_centre_info stg_binds hpc_info
+ = do { -- cg: run the code generator, and yield the resulting CmmGroup
+ -- Using an IORef to store the state is a bit crude, but otherwise
+ -- we would need to add a state monad layer.
+ ; cgref <- liftIO $ newIORef =<< initC
+ ; let cg :: FCode () -> Stream IO CmmGroup ()
+ cg fcode = do
+ cmm <- liftIO . withTiming (return dflags) (text "STG -> Cmm") (`seq` ()) $ do
+ st <- readIORef cgref
+ let (a,st') = runC dflags this_mod st (getCmm fcode)
+
+ -- NB. stub-out cgs_tops and cgs_stmts. This fixes
+ -- a big space leak. DO NOT REMOVE!
+ writeIORef cgref $! st'{ cgs_tops = nilOL,
+ cgs_stmts = mkNop }
+ return a
+ yield cmm
+
+ -- Note [codegen-split-init] the cmm_init block must come
+ -- FIRST. This is because when -split-objs is on we need to
+ -- combine this block with its initialisation routines; see
+ -- Note [pipeline-split-init].
+ ; cg (mkModuleInit cost_centre_info this_mod hpc_info)
+
+ ; mapM_ (cg . cgTopBinding dflags) stg_binds
+
+ -- Put datatype_stuff after code_stuff, because the
+ -- datatype closure table (for enumeration types) to
+ -- (say) PrelBase_True_closure, which is defined in
+ -- code_stuff
+ ; let do_tycon tycon = do
+ -- Generate a table of static closures for an
+ -- enumeration type Note that the closure pointers are
+ -- tagged.
+ when (isEnumerationTyCon tycon) $ cg (cgEnumerationTyCon tycon)
+ mapM_ (cg . cgDataCon) (tyConDataCons tycon)
+
+ ; mapM_ do_tycon data_tycons
+ }
+
+---------------------------------------------------------------
+-- Top-level bindings
+---------------------------------------------------------------
+
+{- 'cgTopBinding' is only used for top-level bindings, since they need
+to be allocated statically (not in the heap) and need to be labelled.
+No unboxed bindings can happen at top level.
+
+In the code below, the static bindings are accumulated in the
+@MkCgState@, and transferred into the ``statics'' slot by @forkStatics@.
+This is so that we can write the top level processing in a compositional
+style, with the increasing static environment being plumbed as a state
+variable. -}
+
+cgTopBinding :: DynFlags -> CgStgTopBinding -> FCode ()
+cgTopBinding dflags (StgTopLifted (StgNonRec id rhs))
+ = do { let (info, fcode) = cgTopRhs dflags NonRecursive id rhs
+ ; fcode
+ ; addBindC info
+ }
+
+cgTopBinding dflags (StgTopLifted (StgRec pairs))
+ = do { let (bndrs, rhss) = unzip pairs
+ ; let pairs' = zip bndrs rhss
+ r = unzipWith (cgTopRhs dflags Recursive) pairs'
+ (infos, fcodes) = unzip r
+ ; addBindsC infos
+ ; sequence_ fcodes
+ }
+
+cgTopBinding dflags (StgTopStringLit id str)
+ = do { let label = mkBytesLabel (idName id)
+ ; let (lit, decl) = mkByteStringCLit label str
+ ; emitDecl decl
+ ; addBindC (litIdInfo dflags id mkLFStringLit lit)
+ }
+
+cgTopRhs :: DynFlags -> RecFlag -> Id -> CgStgRhs -> (CgIdInfo, FCode ())
+ -- The Id is passed along for setting up a binding...
+
+cgTopRhs dflags _rec bndr (StgRhsCon _cc con args)
+ = cgTopRhsCon dflags bndr con (assertNonVoidStgArgs args)
+ -- con args are always non-void,
+ -- see Note [Post-unarisation invariants] in UnariseStg
+
+cgTopRhs dflags rec bndr (StgRhsClosure fvs cc upd_flag args body)
+ = ASSERT(isEmptyDVarSet fvs) -- There should be no free variables
+ cgTopRhsClosure dflags rec bndr cc upd_flag args body
+
+
+---------------------------------------------------------------
+-- Module initialisation code
+---------------------------------------------------------------
+
+mkModuleInit
+ :: CollectedCCs -- cost centre info
+ -> Module
+ -> HpcInfo
+ -> FCode ()
+
+mkModuleInit cost_centre_info this_mod hpc_info
+ = do { initHpc this_mod hpc_info
+ ; initCostCentres cost_centre_info
+ }
+
+
+---------------------------------------------------------------
+-- Generating static stuff for algebraic data types
+---------------------------------------------------------------
+
+
+cgEnumerationTyCon :: TyCon -> FCode ()
+cgEnumerationTyCon tycon
+ = do dflags <- getDynFlags
+ emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs)
+ [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs)
+ (tagForCon dflags con)
+ | con <- tyConDataCons tycon]
+
+
+cgDataCon :: DataCon -> FCode ()
+-- Generate the entry code, info tables, and (for niladic constructor)
+-- the static closure, for a constructor.
+cgDataCon data_con
+ = do { dflags <- getDynFlags
+ ; let
+ (tot_wds, -- #ptr_wds + #nonptr_wds
+ ptr_wds) -- #ptr_wds
+ = mkVirtConstrSizes dflags arg_reps
+
+ nonptr_wds = tot_wds - ptr_wds
+
+ dyn_info_tbl =
+ mkDataConInfoTable dflags data_con False ptr_wds nonptr_wds
+
+ -- We're generating info tables, so we don't know and care about
+ -- what the actual arguments are. Using () here as the place holder.
+ arg_reps :: [NonVoid PrimRep]
+ arg_reps = [ NonVoid rep_ty
+ | ty <- dataConRepArgTys data_con
+ , rep_ty <- typePrimRep ty
+ , not (isVoidRep rep_ty) ]
+
+ ; emitClosureAndInfoTable dyn_info_tbl NativeDirectCall [] $
+ -- NB: the closure pointer is assumed *untagged* on
+ -- entry to a constructor. If the pointer is tagged,
+ -- then we should not be entering it. This assumption
+ -- is used in ldvEnter and when tagging the pointer to
+ -- return it.
+ -- NB 2: We don't set CC when entering data (WDP 94/06)
+ do { tickyEnterDynCon
+ ; ldvEnter (CmmReg nodeReg)
+ ; tickyReturnOldCon (length arg_reps)
+ ; void $ emitReturn [cmmOffsetB dflags (CmmReg nodeReg) (tagForCon dflags data_con)]
+ }
+ -- The case continuation code expects a tagged pointer
+ }
diff --git a/compiler/GHC/StgToCmm/ArgRep.hs b/compiler/GHC/StgToCmm/ArgRep.hs
new file mode 100644
index 0000000000..cc2fe8306a
--- /dev/null
+++ b/compiler/GHC/StgToCmm/ArgRep.hs
@@ -0,0 +1,160 @@
+-----------------------------------------------------------------------------
+--
+-- Argument representations used in GHC.StgToCmm.Layout.
+--
+-- (c) The University of Glasgow 2013
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.ArgRep (
+ ArgRep(..), toArgRep, argRepSizeW,
+
+ argRepString, isNonV, idArgRep,
+
+ slowCallPattern,
+
+ ) where
+
+import GhcPrelude
+
+import GHC.StgToCmm.Closure ( idPrimRep )
+
+import SMRep ( WordOff )
+import Id ( Id )
+import TyCon ( PrimRep(..), primElemRepSizeB )
+import BasicTypes ( RepArity )
+import Constants ( wORD64_SIZE )
+import DynFlags
+
+import Outputable
+import FastString
+
+-- I extricated this code as this new module in order to avoid a
+-- cyclic dependency between GHC.StgToCmm.Layout and GHC.StgToCmm.Ticky.
+--
+-- NSF 18 Feb 2013
+
+-------------------------------------------------------------------------
+-- Classifying arguments: ArgRep
+-------------------------------------------------------------------------
+
+-- ArgRep is re-exported by GHC.StgToCmm.Layout, but only for use in the
+-- byte-code generator which also needs to know about the
+-- classification of arguments.
+
+data ArgRep = P -- GC Ptr
+ | N -- Word-sized non-ptr
+ | L -- 64-bit non-ptr (long)
+ | V -- Void
+ | F -- Float
+ | D -- Double
+ | V16 -- 16-byte (128-bit) vectors of Float/Double/Int8/Word32/etc.
+ | V32 -- 32-byte (256-bit) vectors of Float/Double/Int8/Word32/etc.
+ | V64 -- 64-byte (512-bit) vectors of Float/Double/Int8/Word32/etc.
+instance Outputable ArgRep where ppr = text . argRepString
+
+argRepString :: ArgRep -> String
+argRepString P = "P"
+argRepString N = "N"
+argRepString L = "L"
+argRepString V = "V"
+argRepString F = "F"
+argRepString D = "D"
+argRepString V16 = "V16"
+argRepString V32 = "V32"
+argRepString V64 = "V64"
+
+toArgRep :: PrimRep -> ArgRep
+toArgRep VoidRep = V
+toArgRep LiftedRep = P
+toArgRep UnliftedRep = P
+toArgRep IntRep = N
+toArgRep WordRep = N
+toArgRep Int8Rep = N -- Gets widened to native word width for calls
+toArgRep Word8Rep = N -- Gets widened to native word width for calls
+toArgRep Int16Rep = N -- Gets widened to native word width for calls
+toArgRep Word16Rep = N -- Gets widened to native word width for calls
+toArgRep Int32Rep = N -- Gets widened to native word width for calls
+toArgRep Word32Rep = N -- Gets widened to native word width for calls
+toArgRep AddrRep = N
+toArgRep Int64Rep = L
+toArgRep Word64Rep = L
+toArgRep FloatRep = F
+toArgRep DoubleRep = D
+toArgRep (VecRep len elem) = case len*primElemRepSizeB elem of
+ 16 -> V16
+ 32 -> V32
+ 64 -> V64
+ _ -> error "toArgRep: bad vector primrep"
+
+isNonV :: ArgRep -> Bool
+isNonV V = False
+isNonV _ = True
+
+argRepSizeW :: DynFlags -> ArgRep -> WordOff -- Size in words
+argRepSizeW _ N = 1
+argRepSizeW _ P = 1
+argRepSizeW _ F = 1
+argRepSizeW dflags L = wORD64_SIZE `quot` wORD_SIZE dflags
+argRepSizeW dflags D = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags
+argRepSizeW _ V = 0
+argRepSizeW dflags V16 = 16 `quot` wORD_SIZE dflags
+argRepSizeW dflags V32 = 32 `quot` wORD_SIZE dflags
+argRepSizeW dflags V64 = 64 `quot` wORD_SIZE dflags
+
+idArgRep :: Id -> ArgRep
+idArgRep = toArgRep . idPrimRep
+
+-- This list of argument patterns should be kept in sync with at least
+-- the following:
+--
+-- * GHC.StgToCmm.Layout.stdPattern maybe to some degree?
+--
+-- * the RTS_RET(stg_ap_*) and RTS_FUN_DECL(stg_ap_*_fast)
+-- declarations in includes/stg/MiscClosures.h
+--
+-- * the SLOW_CALL_*_ctr declarations in includes/stg/Ticky.h,
+--
+-- * the TICK_SLOW_CALL_*() #defines in includes/Cmm.h,
+--
+-- * the PR_CTR(SLOW_CALL_*_ctr) calls in rts/Ticky.c,
+--
+-- * and the SymI_HasProto(stg_ap_*_{ret,info,fast}) calls and
+-- SymI_HasProto(SLOW_CALL_*_ctr) calls in rts/Linker.c
+--
+-- There may be more places that I haven't found; I merely igrep'd for
+-- pppppp and excluded things that seemed ghci-specific.
+--
+-- Also, it seems at the moment that ticky counters with void
+-- arguments will never be bumped, but I'm still declaring those
+-- counters, defensively.
+--
+-- NSF 6 Mar 2013
+
+slowCallPattern :: [ArgRep] -> (FastString, RepArity)
+-- Returns the generic apply function and arity
+--
+-- The first batch of cases match (some) specialised entries
+-- The last group deals exhaustively with the cases for the first argument
+-- (and the zero-argument case)
+--
+-- In 99% of cases this function will match *all* the arguments in one batch
+
+slowCallPattern (P: P: P: P: P: P: _) = (fsLit "stg_ap_pppppp", 6)
+slowCallPattern (P: P: P: P: P: _) = (fsLit "stg_ap_ppppp", 5)
+slowCallPattern (P: P: P: P: _) = (fsLit "stg_ap_pppp", 4)
+slowCallPattern (P: P: P: V: _) = (fsLit "stg_ap_pppv", 4)
+slowCallPattern (P: P: P: _) = (fsLit "stg_ap_ppp", 3)
+slowCallPattern (P: P: V: _) = (fsLit "stg_ap_ppv", 3)
+slowCallPattern (P: P: _) = (fsLit "stg_ap_pp", 2)
+slowCallPattern (P: V: _) = (fsLit "stg_ap_pv", 2)
+slowCallPattern (P: _) = (fsLit "stg_ap_p", 1)
+slowCallPattern (V: _) = (fsLit "stg_ap_v", 1)
+slowCallPattern (N: _) = (fsLit "stg_ap_n", 1)
+slowCallPattern (F: _) = (fsLit "stg_ap_f", 1)
+slowCallPattern (D: _) = (fsLit "stg_ap_d", 1)
+slowCallPattern (L: _) = (fsLit "stg_ap_l", 1)
+slowCallPattern (V16: _) = (fsLit "stg_ap_v16", 1)
+slowCallPattern (V32: _) = (fsLit "stg_ap_v32", 1)
+slowCallPattern (V64: _) = (fsLit "stg_ap_v64", 1)
+slowCallPattern [] = (fsLit "stg_ap_0", 0)
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs
new file mode 100644
index 0000000000..bfe9255783
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Bind.hs
@@ -0,0 +1,753 @@
+-----------------------------------------------------------------------------
+--
+-- Stg to C-- code generation: bindings
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.Bind (
+ cgTopRhsClosure,
+ cgBind,
+ emitBlackHoleCode,
+ pushUpdateFrame, emitUpdateFrame
+ ) where
+
+import GhcPrelude hiding ((<*>))
+
+import GHC.StgToCmm.Expr
+import GHC.StgToCmm.Monad
+import GHC.StgToCmm.Env
+import GHC.StgToCmm.Con
+import GHC.StgToCmm.Heap
+import GHC.StgToCmm.Prof (ldvEnterClosure, enterCostCentreFun, enterCostCentreThunk,
+ initUpdFrameProf)
+import GHC.StgToCmm.Ticky
+import GHC.StgToCmm.Layout
+import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Closure
+import GHC.StgToCmm.Foreign (emitPrimCall)
+
+import MkGraph
+import CoreSyn ( AltCon(..), tickishIsCode )
+import BlockId
+import SMRep
+import Cmm
+import CmmInfo
+import CmmUtils
+import CLabel
+import StgSyn
+import CostCentre
+import Id
+import IdInfo
+import Name
+import Module
+import ListSetOps
+import Util
+import VarSet
+import BasicTypes
+import Outputable
+import FastString
+import DynFlags
+
+import Control.Monad
+
+------------------------------------------------------------------------
+-- Top-level bindings
+------------------------------------------------------------------------
+
+-- For closures bound at top level, allocate in static space.
+-- They should have no free variables.
+
+cgTopRhsClosure :: DynFlags
+ -> RecFlag -- member of a recursive group?
+ -> Id
+ -> CostCentreStack -- Optional cost centre annotation
+ -> UpdateFlag
+ -> [Id] -- Args
+ -> CgStgExpr
+ -> (CgIdInfo, FCode ())
+
+cgTopRhsClosure dflags rec id ccs upd_flag args body =
+ let closure_label = mkLocalClosureLabel (idName id) (idCafInfo id)
+ cg_id_info = litIdInfo dflags id lf_info (CmmLabel closure_label)
+ lf_info = mkClosureLFInfo dflags id TopLevel [] upd_flag args
+ in (cg_id_info, gen_code dflags lf_info closure_label)
+ where
+ -- special case for a indirection (f = g). We create an IND_STATIC
+ -- closure pointing directly to the indirectee. This is exactly
+ -- what the CAF will eventually evaluate to anyway, we're just
+ -- shortcutting the whole process, and generating a lot less code
+ -- (#7308). Eventually the IND_STATIC closure will be eliminated
+ -- by assembly '.equiv' directives, where possible (#15155).
+ -- See note [emit-time elimination of static indirections] in CLabel.
+ --
+ -- Note: we omit the optimisation when this binding is part of a
+ -- recursive group, because the optimisation would inhibit the black
+ -- hole detection from working in that case. Test
+ -- concurrent/should_run/4030 fails, for instance.
+ --
+ gen_code dflags _ closure_label
+ | StgApp f [] <- body, null args, isNonRec rec
+ = do
+ cg_info <- getCgIdInfo f
+ let closure_rep = mkStaticClosureFields dflags
+ indStaticInfoTable ccs MayHaveCafRefs
+ [unLit (idInfoToAmode cg_info)]
+ emitDataLits closure_label closure_rep
+ return ()
+
+ gen_code dflags lf_info _closure_label
+ = do { let name = idName id
+ ; mod_name <- getModuleName
+ ; let descr = closureDescription dflags mod_name name
+ closure_info = mkClosureInfo dflags True id lf_info 0 0 descr
+
+ -- We don't generate the static closure here, because we might
+ -- want to add references to static closures to it later. The
+ -- static closure is generated by CmmBuildInfoTables.updInfoSRTs,
+ -- See Note [SRTs], specifically the [FUN] optimisation.
+
+ ; let fv_details :: [(NonVoid Id, ByteOff)]
+ header = if isLFThunk lf_info then ThunkHeader else StdHeader
+ (_, _, fv_details) = mkVirtHeapOffsets dflags header []
+ -- Don't drop the non-void args until the closure info has been made
+ ; forkClosureBody (closureCodeBody True id closure_info ccs
+ (nonVoidIds args) (length args) body fv_details)
+
+ ; return () }
+
+ unLit (CmmLit l) = l
+ unLit _ = panic "unLit"
+
+------------------------------------------------------------------------
+-- Non-top-level bindings
+------------------------------------------------------------------------
+
+cgBind :: CgStgBinding -> FCode ()
+cgBind (StgNonRec name rhs)
+ = do { (info, fcode) <- cgRhs name rhs
+ ; addBindC info
+ ; init <- fcode
+ ; emit init }
+ -- init cannot be used in body, so slightly better to sink it eagerly
+
+cgBind (StgRec pairs)
+ = do { r <- sequence $ unzipWith cgRhs pairs
+ ; let (id_infos, fcodes) = unzip r
+ ; addBindsC id_infos
+ ; (inits, body) <- getCodeR $ sequence fcodes
+ ; emit (catAGraphs inits <*> body) }
+
+{- Note [cgBind rec]
+
+ Recursive let-bindings are tricky.
+ Consider the following pseudocode:
+
+ let x = \_ -> ... y ...
+ y = \_ -> ... z ...
+ z = \_ -> ... x ...
+ in ...
+
+ For each binding, we need to allocate a closure, and each closure must
+ capture the address of the other closures.
+ We want to generate the following C-- code:
+ // Initialization Code
+ x = hp - 24; // heap address of x's closure
+ y = hp - 40; // heap address of x's closure
+ z = hp - 64; // heap address of x's closure
+ // allocate and initialize x
+ m[hp-8] = ...
+ m[hp-16] = y // the closure for x captures y
+ m[hp-24] = x_info;
+ // allocate and initialize y
+ m[hp-32] = z; // the closure for y captures z
+ m[hp-40] = y_info;
+ // allocate and initialize z
+ ...
+
+ For each closure, we must generate not only the code to allocate and
+ initialize the closure itself, but also some initialization Code that
+ sets a variable holding the closure pointer.
+
+ We could generate a pair of the (init code, body code), but since
+ the bindings are recursive we also have to initialise the
+ environment with the CgIdInfo for all the bindings before compiling
+ anything. So we do this in 3 stages:
+
+ 1. collect all the CgIdInfos and initialise the environment
+ 2. compile each binding into (init, body) code
+ 3. emit all the inits, and then all the bodies
+
+ We'd rather not have separate functions to do steps 1 and 2 for
+ each binding, since in pratice they share a lot of code. So we
+ have just one function, cgRhs, that returns a pair of the CgIdInfo
+ for step 1, and a monadic computation to generate the code in step
+ 2.
+
+ The alternative to separating things in this way is to use a
+ fixpoint. That's what we used to do, but it introduces a
+ maintenance nightmare because there is a subtle dependency on not
+ being too strict everywhere. Doing things this way means that the
+ FCode monad can be strict, for example.
+ -}
+
+cgRhs :: Id
+ -> CgStgRhs
+ -> FCode (
+ CgIdInfo -- The info for this binding
+ , FCode CmmAGraph -- A computation which will generate the
+ -- code for the binding, and return an
+ -- assignent of the form "x = Hp - n"
+ -- (see above)
+ )
+
+cgRhs id (StgRhsCon cc con args)
+ = withNewTickyCounterCon (idName id) $
+ buildDynCon id True cc con (assertNonVoidStgArgs args)
+ -- con args are always non-void,
+ -- see Note [Post-unarisation invariants] in UnariseStg
+
+{- See Note [GC recovery] in compiler/GHC.StgToCmm/Closure.hs -}
+cgRhs id (StgRhsClosure fvs cc upd_flag args body)
+ = do dflags <- getDynFlags
+ mkRhsClosure dflags id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body
+
+------------------------------------------------------------------------
+-- Non-constructor right hand sides
+------------------------------------------------------------------------
+
+mkRhsClosure :: DynFlags -> Id -> CostCentreStack
+ -> [NonVoid Id] -- Free vars
+ -> UpdateFlag
+ -> [Id] -- Args
+ -> CgStgExpr
+ -> FCode (CgIdInfo, FCode CmmAGraph)
+
+{- mkRhsClosure looks for two special forms of the right-hand side:
+ a) selector thunks
+ b) AP thunks
+
+If neither happens, it just calls mkClosureLFInfo. You might think
+that mkClosureLFInfo should do all this, but it seems wrong for the
+latter to look at the structure of an expression
+
+Note [Selectors]
+~~~~~~~~~~~~~~~~
+We look at the body of the closure to see if it's a selector---turgid,
+but nothing deep. We are looking for a closure of {\em exactly} the
+form:
+
+... = [the_fv] \ u [] ->
+ case the_fv of
+ con a_1 ... a_n -> a_i
+
+Note [Ap thunks]
+~~~~~~~~~~~~~~~~
+A more generic AP thunk of the form
+
+ x = [ x_1...x_n ] \.. [] -> x_1 ... x_n
+
+A set of these is compiled statically into the RTS, so we just use
+those. We could extend the idea to thunks where some of the x_i are
+global ids (and hence not free variables), but this would entail
+generating a larger thunk. It might be an option for non-optimising
+compilation, though.
+
+We only generate an Ap thunk if all the free variables are pointers,
+for semi-obvious reasons.
+
+-}
+
+---------- Note [Selectors] ------------------
+mkRhsClosure dflags bndr _cc
+ [NonVoid the_fv] -- Just one free var
+ upd_flag -- Updatable thunk
+ [] -- A thunk
+ expr
+ | let strip = stripStgTicksTopE (not . tickishIsCode)
+ , StgCase (StgApp scrutinee [{-no args-}])
+ _ -- ignore bndr
+ (AlgAlt _)
+ [(DataAlt _, params, sel_expr)] <- strip expr
+ , StgApp selectee [{-no args-}] <- strip sel_expr
+ , the_fv == scrutinee -- Scrutinee is the only free variable
+
+ , let (_, _, params_w_offsets) = mkVirtConstrOffsets dflags (addIdReps (assertNonVoidIds params))
+ -- pattern binders are always non-void,
+ -- see Note [Post-unarisation invariants] in UnariseStg
+ , Just the_offset <- assocMaybe params_w_offsets (NonVoid selectee)
+
+ , let offset_into_int = bytesToWordsRoundUp dflags the_offset
+ - fixedHdrSizeW dflags
+ , offset_into_int <= mAX_SPEC_SELECTEE_SIZE dflags -- Offset is small enough
+ = -- NOT TRUE: ASSERT(is_single_constructor)
+ -- The simplifier may have statically determined that the single alternative
+ -- is the only possible case and eliminated the others, even if there are
+ -- other constructors in the datatype. It's still ok to make a selector
+ -- thunk in this case, because we *know* which constructor the scrutinee
+ -- will evaluate to.
+ --
+ -- srt is discarded; it must be empty
+ let lf_info = mkSelectorLFInfo bndr offset_into_int (isUpdatable upd_flag)
+ in cgRhsStdThunk bndr lf_info [StgVarArg the_fv]
+
+---------- Note [Ap thunks] ------------------
+mkRhsClosure dflags bndr _cc
+ fvs
+ upd_flag
+ [] -- No args; a thunk
+ (StgApp fun_id args)
+
+ -- We are looking for an "ApThunk"; see data con ApThunk in GHC.StgToCmm.Closure
+ -- of form (x1 x2 .... xn), where all the xi are locals (not top-level)
+ -- So the xi will all be free variables
+ | args `lengthIs` (n_fvs-1) -- This happens only if the fun_id and
+ -- args are all distinct local variables
+ -- The "-1" is for fun_id
+ -- Missed opportunity: (f x x) is not detected
+ , all (isGcPtrRep . idPrimRep . fromNonVoid) fvs
+ , isUpdatable upd_flag
+ , n_fvs <= mAX_SPEC_AP_SIZE dflags
+ , not (gopt Opt_SccProfilingOn dflags)
+ -- not when profiling: we don't want to
+ -- lose information about this particular
+ -- thunk (e.g. its type) (#949)
+ , idArity fun_id == unknownArity -- don't spoil a known call
+
+ -- Ha! an Ap thunk
+ = cgRhsStdThunk bndr lf_info payload
+
+ where
+ n_fvs = length fvs
+ lf_info = mkApLFInfo bndr upd_flag n_fvs
+ -- the payload has to be in the correct order, hence we can't
+ -- just use the fvs.
+ payload = StgVarArg fun_id : args
+
+---------- Default case ------------------
+mkRhsClosure dflags bndr cc fvs upd_flag args body
+ = do { let lf_info = mkClosureLFInfo dflags bndr NotTopLevel fvs upd_flag args
+ ; (id_info, reg) <- rhsIdInfo bndr lf_info
+ ; return (id_info, gen_code lf_info reg) }
+ where
+ gen_code lf_info reg
+ = do { -- LAY OUT THE OBJECT
+ -- If the binder is itself a free variable, then don't store
+ -- it in the closure. Instead, just bind it to Node on entry.
+ -- NB we can be sure that Node will point to it, because we
+ -- haven't told mkClosureLFInfo about this; so if the binder
+ -- _was_ a free var of its RHS, mkClosureLFInfo thinks it *is*
+ -- stored in the closure itself, so it will make sure that
+ -- Node points to it...
+ ; let reduced_fvs = filter (NonVoid bndr /=) fvs
+
+ -- MAKE CLOSURE INFO FOR THIS CLOSURE
+ ; mod_name <- getModuleName
+ ; dflags <- getDynFlags
+ ; let name = idName bndr
+ descr = closureDescription dflags mod_name name
+ fv_details :: [(NonVoid Id, ByteOff)]
+ header = if isLFThunk lf_info then ThunkHeader else StdHeader
+ (tot_wds, ptr_wds, fv_details)
+ = mkVirtHeapOffsets dflags header (addIdReps reduced_fvs)
+ closure_info = mkClosureInfo dflags False -- Not static
+ bndr lf_info tot_wds ptr_wds
+ descr
+
+ -- BUILD ITS INFO TABLE AND CODE
+ ; forkClosureBody $
+ -- forkClosureBody: (a) ensure that bindings in here are not seen elsewhere
+ -- (b) ignore Sequel from context; use empty Sequel
+ -- And compile the body
+ closureCodeBody False bndr closure_info cc (nonVoidIds args)
+ (length args) body fv_details
+
+ -- BUILD THE OBJECT
+-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
+ ; let use_cc = cccsExpr; blame_cc = cccsExpr
+ ; emit (mkComment $ mkFastString "calling allocDynClosure")
+ ; let toVarArg (NonVoid a, off) = (NonVoid (StgVarArg a), off)
+ ; let info_tbl = mkCmmInfo closure_info bndr currentCCS
+ ; hp_plus_n <- allocDynClosure (Just bndr) info_tbl lf_info use_cc blame_cc
+ (map toVarArg fv_details)
+
+ -- RETURN
+ ; return (mkRhsInit dflags reg lf_info hp_plus_n) }
+
+-------------------------
+cgRhsStdThunk
+ :: Id
+ -> LambdaFormInfo
+ -> [StgArg] -- payload
+ -> FCode (CgIdInfo, FCode CmmAGraph)
+
+cgRhsStdThunk bndr lf_info payload
+ = do { (id_info, reg) <- rhsIdInfo bndr lf_info
+ ; return (id_info, gen_code reg)
+ }
+ where
+ gen_code reg -- AHA! A STANDARD-FORM THUNK
+ = withNewTickyCounterStdThunk (lfUpdatable lf_info) (idName bndr) $
+ do
+ { -- LAY OUT THE OBJECT
+ mod_name <- getModuleName
+ ; dflags <- getDynFlags
+ ; let header = if isLFThunk lf_info then ThunkHeader else StdHeader
+ (tot_wds, ptr_wds, payload_w_offsets)
+ = mkVirtHeapOffsets dflags header
+ (addArgReps (nonVoidStgArgs payload))
+
+ descr = closureDescription dflags mod_name (idName bndr)
+ closure_info = mkClosureInfo dflags False -- Not static
+ bndr lf_info tot_wds ptr_wds
+ descr
+
+-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body
+ ; let use_cc = cccsExpr; blame_cc = cccsExpr
+
+
+ -- BUILD THE OBJECT
+ ; let info_tbl = mkCmmInfo closure_info bndr currentCCS
+ ; hp_plus_n <- allocDynClosure (Just bndr) info_tbl lf_info
+ use_cc blame_cc payload_w_offsets
+
+ -- RETURN
+ ; return (mkRhsInit dflags reg lf_info hp_plus_n) }
+
+
+mkClosureLFInfo :: DynFlags
+ -> Id -- The binder
+ -> TopLevelFlag -- True of top level
+ -> [NonVoid Id] -- Free vars
+ -> UpdateFlag -- Update flag
+ -> [Id] -- Args
+ -> LambdaFormInfo
+mkClosureLFInfo dflags bndr top fvs upd_flag args
+ | null args =
+ mkLFThunk (idType bndr) top (map fromNonVoid fvs) upd_flag
+ | otherwise =
+ mkLFReEntrant top (map fromNonVoid fvs) args (mkArgDescr dflags args)
+
+
+------------------------------------------------------------------------
+-- The code for closures
+------------------------------------------------------------------------
+
+closureCodeBody :: Bool -- whether this is a top-level binding
+ -> Id -- the closure's name
+ -> ClosureInfo -- Lots of information about this closure
+ -> CostCentreStack -- Optional cost centre attached to closure
+ -> [NonVoid Id] -- incoming args to the closure
+ -> Int -- arity, including void args
+ -> CgStgExpr
+ -> [(NonVoid Id, ByteOff)] -- the closure's free vars
+ -> FCode ()
+
+{- There are two main cases for the code for closures.
+
+* If there are *no arguments*, then the closure is a thunk, and not in
+ normal form. So it should set up an update frame (if it is
+ shared). NB: Thunks cannot have a primitive type!
+
+* If there is *at least one* argument, then this closure is in
+ normal form, so there is no need to set up an update frame.
+-}
+
+closureCodeBody top_lvl bndr cl_info cc _args arity body fv_details
+ | arity == 0 -- No args i.e. thunk
+ = withNewTickyCounterThunk
+ (isStaticClosure cl_info)
+ (closureUpdReqd cl_info)
+ (closureName cl_info) $
+ emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl [] $
+ \(_, node, _) -> thunkCode cl_info fv_details cc node arity body
+ where
+ lf_info = closureLFInfo cl_info
+ info_tbl = mkCmmInfo cl_info bndr cc
+
+closureCodeBody top_lvl bndr cl_info cc args arity body fv_details
+ = -- Note: args may be [], if all args are Void
+ withNewTickyCounterFun
+ (closureSingleEntry cl_info)
+ (closureName cl_info)
+ args $ do {
+
+ ; let
+ lf_info = closureLFInfo cl_info
+ info_tbl = mkCmmInfo cl_info bndr cc
+
+ -- Emit the main entry code
+ ; emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args $
+ \(_offset, node, arg_regs) -> do
+ -- Emit slow-entry code (for entering a closure through a PAP)
+ { mkSlowEntryCode bndr cl_info arg_regs
+ ; dflags <- getDynFlags
+ ; let node_points = nodeMustPointToIt dflags lf_info
+ node' = if node_points then Just node else Nothing
+ ; loop_header_id <- newBlockId
+ -- Extend reader monad with information that
+ -- self-recursive tail calls can be optimized into local
+ -- jumps. See Note [Self-recursive tail calls] in GHC.StgToCmm.Expr.
+ ; withSelfLoop (bndr, loop_header_id, arg_regs) $ do
+ {
+ -- Main payload
+ ; entryHeapCheck cl_info node' arity arg_regs $ do
+ { -- emit LDV code when profiling
+ when node_points (ldvEnterClosure cl_info (CmmLocal node))
+ -- ticky after heap check to avoid double counting
+ ; tickyEnterFun cl_info
+ ; enterCostCentreFun cc
+ (CmmMachOp (mo_wordSub dflags)
+ [ CmmReg (CmmLocal node) -- See [NodeReg clobbered with loopification]
+ , mkIntExpr dflags (funTag dflags cl_info) ])
+ ; fv_bindings <- mapM bind_fv fv_details
+ -- Load free vars out of closure *after*
+ -- heap check, to reduce live vars over check
+ ; when node_points $ load_fvs node lf_info fv_bindings
+ ; void $ cgExpr body
+ }}}
+
+ }
+
+-- Note [NodeReg clobbered with loopification]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Previously we used to pass nodeReg (aka R1) here. With profiling, upon
+-- entering a closure, enterFunCCS was called with R1 passed to it. But since R1
+-- may get clobbered inside the body of a closure, and since a self-recursive
+-- tail call does not restore R1, a subsequent call to enterFunCCS received a
+-- possibly bogus value from R1. The solution is to not pass nodeReg (aka R1) to
+-- enterFunCCS. Instead, we pass node, the callee-saved temporary that stores
+-- the original value of R1. This way R1 may get modified but loopification will
+-- not care.
+
+-- A function closure pointer may be tagged, so we
+-- must take it into account when accessing the free variables.
+bind_fv :: (NonVoid Id, ByteOff) -> FCode (LocalReg, ByteOff)
+bind_fv (id, off) = do { reg <- rebindToReg id; return (reg, off) }
+
+load_fvs :: LocalReg -> LambdaFormInfo -> [(LocalReg, ByteOff)] -> FCode ()
+load_fvs node lf_info = mapM_ (\ (reg, off) ->
+ do dflags <- getDynFlags
+ let tag = lfDynTag dflags lf_info
+ emit $ mkTaggedObjectLoad dflags reg node off tag)
+
+-----------------------------------------
+-- The "slow entry" code for a function. This entry point takes its
+-- arguments on the stack. It loads the arguments into registers
+-- according to the calling convention, and jumps to the function's
+-- normal entry point. The function's closure is assumed to be in
+-- R1/node.
+--
+-- The slow entry point is used for unknown calls: eg. stg_PAP_entry
+
+mkSlowEntryCode :: Id -> ClosureInfo -> [LocalReg] -> FCode ()
+-- If this function doesn't have a specialised ArgDescr, we need
+-- to generate the function's arg bitmap and slow-entry code.
+-- Here, we emit the slow-entry code.
+mkSlowEntryCode bndr cl_info arg_regs -- function closure is already in `Node'
+ | Just (_, ArgGen _) <- closureFunInfo cl_info
+ = do dflags <- getDynFlags
+ let node = idToReg dflags (NonVoid bndr)
+ slow_lbl = closureSlowEntryLabel cl_info
+ fast_lbl = closureLocalEntryLabel dflags cl_info
+ -- mkDirectJump does not clobber `Node' containing function closure
+ jump = mkJump dflags NativeNodeCall
+ (mkLblExpr fast_lbl)
+ (map (CmmReg . CmmLocal) (node : arg_regs))
+ (initUpdFrameOff dflags)
+ tscope <- getTickScope
+ emitProcWithConvention Slow Nothing slow_lbl
+ (node : arg_regs) (jump, tscope)
+ | otherwise = return ()
+
+-----------------------------------------
+thunkCode :: ClosureInfo -> [(NonVoid Id, ByteOff)] -> CostCentreStack
+ -> LocalReg -> Int -> CgStgExpr -> FCode ()
+thunkCode cl_info fv_details _cc node arity body
+ = do { dflags <- getDynFlags
+ ; let node_points = nodeMustPointToIt dflags (closureLFInfo cl_info)
+ node' = if node_points then Just node else Nothing
+ ; ldvEnterClosure cl_info (CmmLocal node) -- NB: Node always points when profiling
+
+ -- Heap overflow check
+ ; entryHeapCheck cl_info node' arity [] $ do
+ { -- Overwrite with black hole if necessary
+ -- but *after* the heap-overflow check
+ ; tickyEnterThunk cl_info
+ ; when (blackHoleOnEntry cl_info && node_points)
+ (blackHoleIt node)
+
+ -- Push update frame
+ ; setupUpdate cl_info node $
+ -- We only enter cc after setting up update so
+ -- that cc of enclosing scope will be recorded
+ -- in update frame CAF/DICT functions will be
+ -- subsumed by this enclosing cc
+ do { enterCostCentreThunk (CmmReg nodeReg)
+ ; let lf_info = closureLFInfo cl_info
+ ; fv_bindings <- mapM bind_fv fv_details
+ ; load_fvs node lf_info fv_bindings
+ ; void $ cgExpr body }}}
+
+
+------------------------------------------------------------------------
+-- Update and black-hole wrappers
+------------------------------------------------------------------------
+
+blackHoleIt :: LocalReg -> FCode ()
+-- Only called for closures with no args
+-- Node points to the closure
+blackHoleIt node_reg
+ = emitBlackHoleCode (CmmReg (CmmLocal node_reg))
+
+emitBlackHoleCode :: CmmExpr -> FCode ()
+emitBlackHoleCode node = do
+ dflags <- getDynFlags
+
+ -- Eager blackholing is normally disabled, but can be turned on with
+ -- -feager-blackholing. When it is on, we replace the info pointer
+ -- of the thunk with stg_EAGER_BLACKHOLE_info on entry.
+
+ -- If we wanted to do eager blackholing with slop filling, we'd need
+ -- to do it at the *end* of a basic block, otherwise we overwrite
+ -- the free variables in the thunk that we still need. We have a
+ -- patch for this from Andy Cheadle, but not incorporated yet. --SDM
+ -- [6/2004]
+ --
+ -- Previously, eager blackholing was enabled when ticky-ticky was
+ -- on. But it didn't work, and it wasn't strictly necessary to bring
+ -- back minimal ticky-ticky, so now EAGER_BLACKHOLING is
+ -- unconditionally disabled. -- krc 1/2007
+
+ -- Note the eager-blackholing check is here rather than in blackHoleOnEntry,
+ -- because emitBlackHoleCode is called from CmmParse.
+
+ let eager_blackholing = not (gopt Opt_SccProfilingOn dflags)
+ && gopt Opt_EagerBlackHoling dflags
+ -- Profiling needs slop filling (to support LDV
+ -- profiling), so currently eager blackholing doesn't
+ -- work with profiling.
+
+ when eager_blackholing $ do
+ emitStore (cmmOffsetW dflags node (fixedHdrSizeW dflags)) currentTSOExpr
+ -- See Note [Heap memory barriers] in SMP.h.
+ emitPrimCall [] MO_WriteBarrier []
+ emitStore node (CmmReg (CmmGlobal EagerBlackholeInfo))
+
+setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode ()
+ -- Nota Bene: this function does not change Node (even if it's a CAF),
+ -- so that the cost centre in the original closure can still be
+ -- extracted by a subsequent enterCostCentre
+setupUpdate closure_info node body
+ | not (lfUpdatable (closureLFInfo closure_info))
+ = body
+
+ | not (isStaticClosure closure_info)
+ = if not (closureUpdReqd closure_info)
+ then do tickyUpdateFrameOmitted; body
+ else do
+ tickyPushUpdateFrame
+ dflags <- getDynFlags
+ let
+ bh = blackHoleOnEntry closure_info &&
+ not (gopt Opt_SccProfilingOn dflags) &&
+ gopt Opt_EagerBlackHoling dflags
+
+ lbl | bh = mkBHUpdInfoLabel
+ | otherwise = mkUpdInfoLabel
+
+ pushUpdateFrame lbl (CmmReg (CmmLocal node)) body
+
+ | otherwise -- A static closure
+ = do { tickyUpdateBhCaf closure_info
+
+ ; if closureUpdReqd closure_info
+ then do -- Blackhole the (updatable) CAF:
+ { upd_closure <- link_caf node
+ ; pushUpdateFrame mkBHUpdInfoLabel upd_closure body }
+ else do {tickyUpdateFrameOmitted; body}
+ }
+
+-----------------------------------------------------------------------------
+-- Setting up update frames
+
+-- Push the update frame on the stack in the Entry area,
+-- leaving room for the return address that is already
+-- at the old end of the area.
+--
+pushUpdateFrame :: CLabel -> CmmExpr -> FCode () -> FCode ()
+pushUpdateFrame lbl updatee body
+ = do
+ updfr <- getUpdFrameOff
+ dflags <- getDynFlags
+ let
+ hdr = fixedHdrSize dflags
+ frame = updfr + hdr + sIZEOF_StgUpdateFrame_NoHdr dflags
+ --
+ emitUpdateFrame dflags (CmmStackSlot Old frame) lbl updatee
+ withUpdFrameOff frame body
+
+emitUpdateFrame :: DynFlags -> CmmExpr -> CLabel -> CmmExpr -> FCode ()
+emitUpdateFrame dflags frame lbl updatee = do
+ let
+ hdr = fixedHdrSize dflags
+ off_updatee = hdr + oFFSET_StgUpdateFrame_updatee dflags
+ --
+ emitStore frame (mkLblExpr lbl)
+ emitStore (cmmOffset dflags frame off_updatee) updatee
+ initUpdFrameProf frame
+
+-----------------------------------------------------------------------------
+-- Entering a CAF
+--
+-- See Note [CAF management] in rts/sm/Storage.c
+
+link_caf :: LocalReg -- pointer to the closure
+ -> FCode CmmExpr -- Returns amode for closure to be updated
+-- This function returns the address of the black hole, so it can be
+-- updated with the new value when available.
+link_caf node = do
+ { dflags <- getDynFlags
+ -- Call the RTS function newCAF, returning the newly-allocated
+ -- blackhole indirection closure
+ ; let newCAF_lbl = mkForeignLabel (fsLit "newCAF") Nothing
+ ForeignLabelInExternalPackage IsFunction
+ ; bh <- newTemp (bWord dflags)
+ ; emitRtsCallGen [(bh,AddrHint)] newCAF_lbl
+ [ (baseExpr, AddrHint),
+ (CmmReg (CmmLocal node), AddrHint) ]
+ False
+
+ -- see Note [atomic CAF entry] in rts/sm/Storage.c
+ ; updfr <- getUpdFrameOff
+ ; let target = entryCode dflags (closureInfoPtr dflags (CmmReg (CmmLocal node)))
+ ; emit =<< mkCmmIfThen
+ (cmmEqWord dflags (CmmReg (CmmLocal bh)) (zeroExpr dflags))
+ -- re-enter the CAF
+ (mkJump dflags NativeNodeCall target [] updfr)
+
+ ; return (CmmReg (CmmLocal bh)) }
+
+------------------------------------------------------------------------
+-- Profiling
+------------------------------------------------------------------------
+
+-- For "global" data constructors the description is simply occurrence
+-- name of the data constructor itself. Otherwise it is determined by
+-- @closureDescription@ from the let binding information.
+
+closureDescription :: DynFlags
+ -> Module -- Module
+ -> Name -- Id of closure binding
+ -> String
+ -- Not called for StgRhsCon which have global info tables built in
+ -- CgConTbls.hs with a description generated from the data constructor
+closureDescription dflags mod_name name
+ = showSDocDump dflags (char '<' <>
+ (if isExternalName name
+ then ppr name -- ppr will include the module name prefix
+ else pprModule mod_name <> char '.' <> ppr name) <>
+ char '>')
+ -- showSDocDump, because we want to see the unique on the Name.
diff --git a/compiler/GHC/StgToCmm/Bind.hs-boot b/compiler/GHC/StgToCmm/Bind.hs-boot
new file mode 100644
index 0000000000..d16c34ebd3
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Bind.hs-boot
@@ -0,0 +1,6 @@
+module GHC.StgToCmm.Bind where
+
+import GHC.StgToCmm.Monad( FCode )
+import StgSyn( CgStgBinding )
+
+cgBind :: CgStgBinding -> FCode ()
diff --git a/compiler/GHC/StgToCmm/CgUtils.hs b/compiler/GHC/StgToCmm/CgUtils.hs
new file mode 100644
index 0000000000..f3dccd9745
--- /dev/null
+++ b/compiler/GHC/StgToCmm/CgUtils.hs
@@ -0,0 +1,186 @@
+{-# LANGUAGE GADTs #-}
+
+-----------------------------------------------------------------------------
+--
+-- Code generator utilities; mostly monadic
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.CgUtils (
+ fixStgRegisters,
+ baseRegOffset,
+ get_Regtable_addr_from_offset,
+ regTableOffset,
+ get_GlobalReg_addr,
+ ) where
+
+import GhcPrelude
+
+import GHC.Platform.Regs
+import Cmm
+import Hoopl.Block
+import Hoopl.Graph
+import CmmUtils
+import CLabel
+import DynFlags
+import Outputable
+
+-- -----------------------------------------------------------------------------
+-- Information about global registers
+
+baseRegOffset :: DynFlags -> GlobalReg -> Int
+
+baseRegOffset dflags (VanillaReg 1 _) = oFFSET_StgRegTable_rR1 dflags
+baseRegOffset dflags (VanillaReg 2 _) = oFFSET_StgRegTable_rR2 dflags
+baseRegOffset dflags (VanillaReg 3 _) = oFFSET_StgRegTable_rR3 dflags
+baseRegOffset dflags (VanillaReg 4 _) = oFFSET_StgRegTable_rR4 dflags
+baseRegOffset dflags (VanillaReg 5 _) = oFFSET_StgRegTable_rR5 dflags
+baseRegOffset dflags (VanillaReg 6 _) = oFFSET_StgRegTable_rR6 dflags
+baseRegOffset dflags (VanillaReg 7 _) = oFFSET_StgRegTable_rR7 dflags
+baseRegOffset dflags (VanillaReg 8 _) = oFFSET_StgRegTable_rR8 dflags
+baseRegOffset dflags (VanillaReg 9 _) = oFFSET_StgRegTable_rR9 dflags
+baseRegOffset dflags (VanillaReg 10 _) = oFFSET_StgRegTable_rR10 dflags
+baseRegOffset _ (VanillaReg n _) = panic ("Registers above R10 are not supported (tried to use R" ++ show n ++ ")")
+baseRegOffset dflags (FloatReg 1) = oFFSET_StgRegTable_rF1 dflags
+baseRegOffset dflags (FloatReg 2) = oFFSET_StgRegTable_rF2 dflags
+baseRegOffset dflags (FloatReg 3) = oFFSET_StgRegTable_rF3 dflags
+baseRegOffset dflags (FloatReg 4) = oFFSET_StgRegTable_rF4 dflags
+baseRegOffset dflags (FloatReg 5) = oFFSET_StgRegTable_rF5 dflags
+baseRegOffset dflags (FloatReg 6) = oFFSET_StgRegTable_rF6 dflags
+baseRegOffset _ (FloatReg n) = panic ("Registers above F6 are not supported (tried to use F" ++ show n ++ ")")
+baseRegOffset dflags (DoubleReg 1) = oFFSET_StgRegTable_rD1 dflags
+baseRegOffset dflags (DoubleReg 2) = oFFSET_StgRegTable_rD2 dflags
+baseRegOffset dflags (DoubleReg 3) = oFFSET_StgRegTable_rD3 dflags
+baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags
+baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags
+baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags
+baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")")
+baseRegOffset dflags (XmmReg 1) = oFFSET_StgRegTable_rXMM1 dflags
+baseRegOffset dflags (XmmReg 2) = oFFSET_StgRegTable_rXMM2 dflags
+baseRegOffset dflags (XmmReg 3) = oFFSET_StgRegTable_rXMM3 dflags
+baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags
+baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags
+baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags
+baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
+baseRegOffset dflags (YmmReg 1) = oFFSET_StgRegTable_rYMM1 dflags
+baseRegOffset dflags (YmmReg 2) = oFFSET_StgRegTable_rYMM2 dflags
+baseRegOffset dflags (YmmReg 3) = oFFSET_StgRegTable_rYMM3 dflags
+baseRegOffset dflags (YmmReg 4) = oFFSET_StgRegTable_rYMM4 dflags
+baseRegOffset dflags (YmmReg 5) = oFFSET_StgRegTable_rYMM5 dflags
+baseRegOffset dflags (YmmReg 6) = oFFSET_StgRegTable_rYMM6 dflags
+baseRegOffset _ (YmmReg n) = panic ("Registers above YMM6 are not supported (tried to use YMM" ++ show n ++ ")")
+baseRegOffset dflags (ZmmReg 1) = oFFSET_StgRegTable_rZMM1 dflags
+baseRegOffset dflags (ZmmReg 2) = oFFSET_StgRegTable_rZMM2 dflags
+baseRegOffset dflags (ZmmReg 3) = oFFSET_StgRegTable_rZMM3 dflags
+baseRegOffset dflags (ZmmReg 4) = oFFSET_StgRegTable_rZMM4 dflags
+baseRegOffset dflags (ZmmReg 5) = oFFSET_StgRegTable_rZMM5 dflags
+baseRegOffset dflags (ZmmReg 6) = oFFSET_StgRegTable_rZMM6 dflags
+baseRegOffset _ (ZmmReg n) = panic ("Registers above ZMM6 are not supported (tried to use ZMM" ++ show n ++ ")")
+baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags
+baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags
+baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags
+baseRegOffset _ (LongReg n) = panic ("Registers above L1 are not supported (tried to use L" ++ show n ++ ")")
+baseRegOffset dflags Hp = oFFSET_StgRegTable_rHp dflags
+baseRegOffset dflags HpLim = oFFSET_StgRegTable_rHpLim dflags
+baseRegOffset dflags CCCS = oFFSET_StgRegTable_rCCCS dflags
+baseRegOffset dflags CurrentTSO = oFFSET_StgRegTable_rCurrentTSO dflags
+baseRegOffset dflags CurrentNursery = oFFSET_StgRegTable_rCurrentNursery dflags
+baseRegOffset dflags HpAlloc = oFFSET_StgRegTable_rHpAlloc dflags
+baseRegOffset dflags EagerBlackholeInfo = oFFSET_stgEagerBlackholeInfo dflags
+baseRegOffset dflags GCEnter1 = oFFSET_stgGCEnter1 dflags
+baseRegOffset dflags GCFun = oFFSET_stgGCFun dflags
+baseRegOffset _ BaseReg = panic "CgUtils.baseRegOffset:BaseReg"
+baseRegOffset _ PicBaseReg = panic "CgUtils.baseRegOffset:PicBaseReg"
+baseRegOffset _ MachSp = panic "CgUtils.baseRegOffset:MachSp"
+baseRegOffset _ UnwindReturnReg = panic "CgUtils.baseRegOffset:UnwindReturnReg"
+
+
+-- -----------------------------------------------------------------------------
+--
+-- STG/Cmm GlobalReg
+--
+-- -----------------------------------------------------------------------------
+
+-- | We map STG registers onto appropriate CmmExprs. Either they map
+-- to real machine registers or stored as offsets from BaseReg. Given
+-- a GlobalReg, get_GlobalReg_addr always produces the
+-- register table address for it.
+get_GlobalReg_addr :: DynFlags -> GlobalReg -> CmmExpr
+get_GlobalReg_addr dflags BaseReg = regTableOffset dflags 0
+get_GlobalReg_addr dflags mid
+ = get_Regtable_addr_from_offset dflags (baseRegOffset dflags mid)
+
+-- Calculate a literal representing an offset into the register table.
+-- Used when we don't have an actual BaseReg to offset from.
+regTableOffset :: DynFlags -> Int -> CmmExpr
+regTableOffset dflags n =
+ CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r dflags + n))
+
+get_Regtable_addr_from_offset :: DynFlags -> Int -> CmmExpr
+get_Regtable_addr_from_offset dflags offset =
+ if haveRegBase (targetPlatform dflags)
+ then CmmRegOff baseReg offset
+ else regTableOffset dflags offset
+
+-- | Fixup global registers so that they assign to locations within the
+-- RegTable if they aren't pinned for the current target.
+fixStgRegisters :: DynFlags -> RawCmmDecl -> RawCmmDecl
+fixStgRegisters _ top@(CmmData _ _) = top
+
+fixStgRegisters dflags (CmmProc info lbl live graph) =
+ let graph' = modifyGraph (mapGraphBlocks (fixStgRegBlock dflags)) graph
+ in CmmProc info lbl live graph'
+
+fixStgRegBlock :: DynFlags -> Block CmmNode e x -> Block CmmNode e x
+fixStgRegBlock dflags block = mapBlock (fixStgRegStmt dflags) block
+
+fixStgRegStmt :: DynFlags -> CmmNode e x -> CmmNode e x
+fixStgRegStmt dflags stmt = fixAssign $ mapExpDeep fixExpr stmt
+ where
+ platform = targetPlatform dflags
+
+ fixAssign stmt =
+ case stmt of
+ CmmAssign (CmmGlobal reg) src
+ -- MachSp isn't an STG register; it's merely here for tracking unwind
+ -- information
+ | reg == MachSp -> stmt
+ | otherwise ->
+ let baseAddr = get_GlobalReg_addr dflags reg
+ in case reg `elem` activeStgRegs (targetPlatform dflags) of
+ True -> CmmAssign (CmmGlobal reg) src
+ False -> CmmStore baseAddr src
+ other_stmt -> other_stmt
+
+ fixExpr expr = case expr of
+ -- MachSp isn't an STG; it's merely here for tracking unwind information
+ CmmReg (CmmGlobal MachSp) -> expr
+ CmmReg (CmmGlobal reg) ->
+ -- Replace register leaves with appropriate StixTrees for
+ -- the given target. MagicIds which map to a reg on this
+ -- arch are left unchanged. For the rest, BaseReg is taken
+ -- to mean the address of the reg table in MainCapability,
+ -- and for all others we generate an indirection to its
+ -- location in the register table.
+ case reg `elem` activeStgRegs platform of
+ True -> expr
+ False ->
+ let baseAddr = get_GlobalReg_addr dflags reg
+ in case reg of
+ BaseReg -> baseAddr
+ _other -> CmmLoad baseAddr (globalRegType dflags reg)
+
+ CmmRegOff (CmmGlobal reg) offset ->
+ -- RegOf leaves are just a shorthand form. If the reg maps
+ -- to a real reg, we keep the shorthand, otherwise, we just
+ -- expand it and defer to the above code.
+ case reg `elem` activeStgRegs platform of
+ True -> expr
+ False -> CmmMachOp (MO_Add (wordWidth dflags)) [
+ fixExpr (CmmReg (CmmGlobal reg)),
+ CmmLit (CmmInt (fromIntegral offset)
+ (wordWidth dflags))]
+
+ other_expr -> other_expr
diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs
new file mode 100644
index 0000000000..b56b06f399
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Closure.hs
@@ -0,0 +1,1008 @@
+{-# LANGUAGE CPP, RecordWildCards #-}
+
+-----------------------------------------------------------------------------
+--
+-- Stg to C-- code generation:
+--
+-- The types LambdaFormInfo
+-- ClosureInfo
+--
+-- Nothing monadic in here!
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.Closure (
+ DynTag, tagForCon, isSmallFamily,
+
+ idPrimRep, isVoidRep, isGcPtrRep, addIdReps, addArgReps,
+ argPrimRep,
+
+ NonVoid(..), fromNonVoid, nonVoidIds, nonVoidStgArgs,
+ assertNonVoidIds, assertNonVoidStgArgs,
+
+ -- * LambdaFormInfo
+ LambdaFormInfo, -- Abstract
+ StandardFormInfo, -- ...ditto...
+ mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo,
+ mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape,
+ mkLFStringLit,
+ lfDynTag,
+ isLFThunk, isLFReEntrant, lfUpdatable,
+
+ -- * Used by other modules
+ CgLoc(..), SelfLoopInfo, CallMethod(..),
+ nodeMustPointToIt, isKnownFun, funTag, tagForArity, getCallMethod,
+
+ -- * ClosureInfo
+ ClosureInfo,
+ mkClosureInfo,
+ mkCmmInfo,
+
+ -- ** Inspection
+ closureLFInfo, closureName,
+
+ -- ** Labels
+ -- These just need the info table label
+ closureInfoLabel, staticClosureLabel,
+ closureSlowEntryLabel, closureLocalEntryLabel,
+
+ -- ** Predicates
+ -- These are really just functions on LambdaFormInfo
+ closureUpdReqd, closureSingleEntry,
+ closureReEntrant, closureFunInfo,
+ isToplevClosure,
+
+ blackHoleOnEntry, -- Needs LambdaFormInfo and SMRep
+ isStaticClosure, -- Needs SMPre
+
+ -- * InfoTables
+ mkDataConInfoTable,
+ cafBlackHoleInfoTable,
+ indStaticInfoTable,
+ staticClosureNeedsLink,
+ ) where
+
+#include "../includes/MachDeps.h"
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import StgSyn
+import SMRep
+import Cmm
+import PprCmmExpr() -- For Outputable instances
+
+import CostCentre
+import BlockId
+import CLabel
+import Id
+import IdInfo
+import DataCon
+import Name
+import Type
+import TyCoRep
+import TcType
+import TyCon
+import RepType
+import BasicTypes
+import Outputable
+import DynFlags
+import Util
+
+import Data.Coerce (coerce)
+import qualified Data.ByteString.Char8 as BS8
+
+-----------------------------------------------------------------------------
+-- Data types and synonyms
+-----------------------------------------------------------------------------
+
+-- These data types are mostly used by other modules, especially
+-- GHC.StgToCmm.Monad, but we define them here because some functions in this
+-- module need to have access to them as well
+
+data CgLoc
+ = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning
+ -- Hp, so that it remains valid across calls
+
+ | LneLoc BlockId [LocalReg] -- A join point
+ -- A join point (= let-no-escape) should only
+ -- be tail-called, and in a saturated way.
+ -- To tail-call it, assign to these locals,
+ -- and branch to the block id
+
+instance Outputable CgLoc where
+ ppr (CmmLoc e) = text "cmm" <+> ppr e
+ ppr (LneLoc b rs) = text "lne" <+> ppr b <+> ppr rs
+
+type SelfLoopInfo = (Id, BlockId, [LocalReg])
+
+-- used by ticky profiling
+isKnownFun :: LambdaFormInfo -> Bool
+isKnownFun LFReEntrant{} = True
+isKnownFun LFLetNoEscape = True
+isKnownFun _ = False
+
+
+-------------------------------------
+-- Non-void types
+-------------------------------------
+-- We frequently need the invariant that an Id or a an argument
+-- is of a non-void type. This type is a witness to the invariant.
+
+newtype NonVoid a = NonVoid a
+ deriving (Eq, Show)
+
+fromNonVoid :: NonVoid a -> a
+fromNonVoid (NonVoid a) = a
+
+instance (Outputable a) => Outputable (NonVoid a) where
+ ppr (NonVoid a) = ppr a
+
+nonVoidIds :: [Id] -> [NonVoid Id]
+nonVoidIds ids = [NonVoid id | id <- ids, not (isVoidTy (idType id))]
+
+-- | Used in places where some invariant ensures that all these Ids are
+-- non-void; e.g. constructor field binders in case expressions.
+-- See Note [Post-unarisation invariants] in UnariseStg.
+assertNonVoidIds :: [Id] -> [NonVoid Id]
+assertNonVoidIds ids = ASSERT(not (any (isVoidTy . idType) ids))
+ coerce ids
+
+nonVoidStgArgs :: [StgArg] -> [NonVoid StgArg]
+nonVoidStgArgs args = [NonVoid arg | arg <- args, not (isVoidTy (stgArgType arg))]
+
+-- | Used in places where some invariant ensures that all these arguments are
+-- non-void; e.g. constructor arguments.
+-- See Note [Post-unarisation invariants] in UnariseStg.
+assertNonVoidStgArgs :: [StgArg] -> [NonVoid StgArg]
+assertNonVoidStgArgs args = ASSERT(not (any (isVoidTy . stgArgType) args))
+ coerce args
+
+
+-----------------------------------------------------------------------------
+-- Representations
+-----------------------------------------------------------------------------
+
+-- Why are these here?
+
+-- | Assumes that there is precisely one 'PrimRep' of the type. This assumption
+-- holds after unarise.
+-- See Note [Post-unarisation invariants]
+idPrimRep :: Id -> PrimRep
+idPrimRep id = typePrimRep1 (idType id)
+ -- See also Note [VoidRep] in RepType
+
+-- | Assumes that Ids have one PrimRep, which holds after unarisation.
+-- See Note [Post-unarisation invariants]
+addIdReps :: [NonVoid Id] -> [NonVoid (PrimRep, Id)]
+addIdReps = map (\id -> let id' = fromNonVoid id
+ in NonVoid (idPrimRep id', id'))
+
+-- | Assumes that arguments have one PrimRep, which holds after unarisation.
+-- See Note [Post-unarisation invariants]
+addArgReps :: [NonVoid StgArg] -> [NonVoid (PrimRep, StgArg)]
+addArgReps = map (\arg -> let arg' = fromNonVoid arg
+ in NonVoid (argPrimRep arg', arg'))
+
+-- | Assumes that the argument has one PrimRep, which holds after unarisation.
+-- See Note [Post-unarisation invariants]
+argPrimRep :: StgArg -> PrimRep
+argPrimRep arg = typePrimRep1 (stgArgType arg)
+
+
+-----------------------------------------------------------------------------
+-- LambdaFormInfo
+-----------------------------------------------------------------------------
+
+-- Information about an identifier, from the code generator's point of
+-- view. Every identifier is bound to a LambdaFormInfo in the
+-- environment, which gives the code generator enough info to be able to
+-- tail call or return that identifier.
+
+data LambdaFormInfo
+ = LFReEntrant -- Reentrant closure (a function)
+ TopLevelFlag -- True if top level
+ OneShotInfo
+ !RepArity -- Arity. Invariant: always > 0
+ !Bool -- True <=> no fvs
+ ArgDescr -- Argument descriptor (should really be in ClosureInfo)
+
+ | LFThunk -- Thunk (zero arity)
+ TopLevelFlag
+ !Bool -- True <=> no free vars
+ !Bool -- True <=> updatable (i.e., *not* single-entry)
+ StandardFormInfo
+ !Bool -- True <=> *might* be a function type
+
+ | LFCon -- A saturated constructor application
+ DataCon -- The constructor
+
+ | LFUnknown -- Used for function arguments and imported things.
+ -- We know nothing about this closure.
+ -- Treat like updatable "LFThunk"...
+ -- Imported things which we *do* know something about use
+ -- one of the other LF constructors (eg LFReEntrant for
+ -- known functions)
+ !Bool -- True <=> *might* be a function type
+ -- The False case is good when we want to enter it,
+ -- because then we know the entry code will do
+ -- For a function, the entry code is the fast entry point
+
+ | LFUnlifted -- A value of unboxed type;
+ -- always a value, needs evaluation
+
+ | LFLetNoEscape -- See LetNoEscape module for precise description
+
+
+-------------------------
+-- StandardFormInfo tells whether this thunk has one of
+-- a small number of standard forms
+
+data StandardFormInfo
+ = NonStandardThunk
+ -- The usual case: not of the standard forms
+
+ | SelectorThunk
+ -- A SelectorThunk is of form
+ -- case x of
+ -- con a1,..,an -> ak
+ -- and the constructor is from a single-constr type.
+ WordOff -- 0-origin offset of ak within the "goods" of
+ -- constructor (Recall that the a1,...,an may be laid
+ -- out in the heap in a non-obvious order.)
+
+ | ApThunk
+ -- An ApThunk is of form
+ -- x1 ... xn
+ -- The code for the thunk just pushes x2..xn on the stack and enters x1.
+ -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled
+ -- in the RTS to save space.
+ RepArity -- Arity, n
+
+
+------------------------------------------------------
+-- Building LambdaFormInfo
+------------------------------------------------------
+
+mkLFArgument :: Id -> LambdaFormInfo
+mkLFArgument id
+ | isUnliftedType ty = LFUnlifted
+ | might_be_a_function ty = LFUnknown True
+ | otherwise = LFUnknown False
+ where
+ ty = idType id
+
+-------------
+mkLFLetNoEscape :: LambdaFormInfo
+mkLFLetNoEscape = LFLetNoEscape
+
+-------------
+mkLFReEntrant :: TopLevelFlag -- True of top level
+ -> [Id] -- Free vars
+ -> [Id] -- Args
+ -> ArgDescr -- Argument descriptor
+ -> LambdaFormInfo
+
+mkLFReEntrant _ _ [] _
+ = pprPanic "mkLFReEntrant" empty
+mkLFReEntrant top fvs args arg_descr
+ = LFReEntrant top os_info (length args) (null fvs) arg_descr
+ where os_info = idOneShotInfo (head args)
+
+-------------
+mkLFThunk :: Type -> TopLevelFlag -> [Id] -> UpdateFlag -> LambdaFormInfo
+mkLFThunk thunk_ty top fvs upd_flag
+ = ASSERT( not (isUpdatable upd_flag) || not (isUnliftedType thunk_ty) )
+ LFThunk top (null fvs)
+ (isUpdatable upd_flag)
+ NonStandardThunk
+ (might_be_a_function thunk_ty)
+
+--------------
+might_be_a_function :: Type -> Bool
+-- Return False only if we are *sure* it's a data type
+-- Look through newtypes etc as much as poss
+might_be_a_function ty
+ | [LiftedRep] <- typePrimRep ty
+ , Just tc <- tyConAppTyCon_maybe (unwrapType ty)
+ , isDataTyCon tc
+ = False
+ | otherwise
+ = True
+
+-------------
+mkConLFInfo :: DataCon -> LambdaFormInfo
+mkConLFInfo con = LFCon con
+
+-------------
+mkSelectorLFInfo :: Id -> Int -> Bool -> LambdaFormInfo
+mkSelectorLFInfo id offset updatable
+ = LFThunk NotTopLevel False updatable (SelectorThunk offset)
+ (might_be_a_function (idType id))
+
+-------------
+mkApLFInfo :: Id -> UpdateFlag -> Arity -> LambdaFormInfo
+mkApLFInfo id upd_flag arity
+ = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity)
+ (might_be_a_function (idType id))
+
+-------------
+mkLFImported :: Id -> LambdaFormInfo
+mkLFImported id
+ | Just con <- isDataConWorkId_maybe id
+ , isNullaryRepDataCon con
+ = LFCon con -- An imported nullary constructor
+ -- We assume that the constructor is evaluated so that
+ -- the id really does point directly to the constructor
+
+ | arity > 0
+ = LFReEntrant TopLevel noOneShotInfo arity True (panic "arg_descr")
+
+ | otherwise
+ = mkLFArgument id -- Not sure of exact arity
+ where
+ arity = idFunRepArity id
+
+-------------
+mkLFStringLit :: LambdaFormInfo
+mkLFStringLit = LFUnlifted
+
+-----------------------------------------------------
+-- Dynamic pointer tagging
+-----------------------------------------------------
+
+type DynTag = Int -- The tag on a *pointer*
+ -- (from the dynamic-tagging paper)
+
+-- Note [Data constructor dynamic tags]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- The family size of a data type (the number of constructors
+-- or the arity of a function) can be either:
+-- * small, if the family size < 2**tag_bits
+-- * big, otherwise.
+--
+-- Small families can have the constructor tag in the tag bits.
+-- Big families only use the tag value 1 to represent evaluatedness.
+-- We don't have very many tag bits: for example, we have 2 bits on
+-- x86-32 and 3 bits on x86-64.
+
+isSmallFamily :: DynFlags -> Int -> Bool
+isSmallFamily dflags fam_size = fam_size <= mAX_PTR_TAG dflags
+
+tagForCon :: DynFlags -> DataCon -> DynTag
+tagForCon dflags con
+ | isSmallFamily dflags fam_size = con_tag
+ | otherwise = 1
+ where
+ con_tag = dataConTag con -- NB: 1-indexed
+ fam_size = tyConFamilySize (dataConTyCon con)
+
+tagForArity :: DynFlags -> RepArity -> DynTag
+tagForArity dflags arity
+ | isSmallFamily dflags arity = arity
+ | otherwise = 0
+
+lfDynTag :: DynFlags -> LambdaFormInfo -> DynTag
+-- Return the tag in the low order bits of a variable bound
+-- to this LambdaForm
+lfDynTag dflags (LFCon con) = tagForCon dflags con
+lfDynTag dflags (LFReEntrant _ _ arity _ _) = tagForArity dflags arity
+lfDynTag _ _other = 0
+
+
+-----------------------------------------------------------------------------
+-- Observing LambdaFormInfo
+-----------------------------------------------------------------------------
+
+------------
+isLFThunk :: LambdaFormInfo -> Bool
+isLFThunk (LFThunk {}) = True
+isLFThunk _ = False
+
+isLFReEntrant :: LambdaFormInfo -> Bool
+isLFReEntrant (LFReEntrant {}) = True
+isLFReEntrant _ = False
+
+-----------------------------------------------------------------------------
+-- Choosing SM reps
+-----------------------------------------------------------------------------
+
+lfClosureType :: LambdaFormInfo -> ClosureTypeInfo
+lfClosureType (LFReEntrant _ _ arity _ argd) = Fun arity argd
+lfClosureType (LFCon con) = Constr (dataConTagZ con)
+ (dataConIdentity con)
+lfClosureType (LFThunk _ _ _ is_sel _) = thunkClosureType is_sel
+lfClosureType _ = panic "lfClosureType"
+
+thunkClosureType :: StandardFormInfo -> ClosureTypeInfo
+thunkClosureType (SelectorThunk off) = ThunkSelector off
+thunkClosureType _ = Thunk
+
+-- We *do* get non-updatable top-level thunks sometimes. eg. f = g
+-- gets compiled to a jump to g (if g has non-zero arity), instead of
+-- messing around with update frames and PAPs. We set the closure type
+-- to FUN_STATIC in this case.
+
+-----------------------------------------------------------------------------
+-- nodeMustPointToIt
+-----------------------------------------------------------------------------
+
+nodeMustPointToIt :: DynFlags -> LambdaFormInfo -> Bool
+-- If nodeMustPointToIt is true, then the entry convention for
+-- this closure has R1 (the "Node" register) pointing to the
+-- closure itself --- the "self" argument
+
+nodeMustPointToIt _ (LFReEntrant top _ _ no_fvs _)
+ = not no_fvs -- Certainly if it has fvs we need to point to it
+ || isNotTopLevel top -- See Note [GC recovery]
+ -- For lex_profiling we also access the cost centre for a
+ -- non-inherited (i.e. non-top-level) function.
+ -- The isNotTopLevel test above ensures this is ok.
+
+nodeMustPointToIt dflags (LFThunk top no_fvs updatable NonStandardThunk _)
+ = not no_fvs -- Self parameter
+ || isNotTopLevel top -- Note [GC recovery]
+ || updatable -- Need to push update frame
+ || gopt Opt_SccProfilingOn dflags
+ -- For the non-updatable (single-entry case):
+ --
+ -- True if has fvs (in which case we need access to them, and we
+ -- should black-hole it)
+ -- or profiling (in which case we need to recover the cost centre
+ -- from inside it) ToDo: do we need this even for
+ -- top-level thunks? If not,
+ -- isNotTopLevel subsumes this
+
+nodeMustPointToIt _ (LFThunk {}) -- Node must point to a standard-form thunk
+ = True
+
+nodeMustPointToIt _ (LFCon _) = True
+
+ -- Strictly speaking, the above two don't need Node to point
+ -- to it if the arity = 0. But this is a *really* unlikely
+ -- situation. If we know it's nil (say) and we are entering
+ -- it. Eg: let x = [] in x then we will certainly have inlined
+ -- x, since nil is a simple atom. So we gain little by not
+ -- having Node point to known zero-arity things. On the other
+ -- hand, we do lose something; Patrick's code for figuring out
+ -- when something has been updated but not entered relies on
+ -- having Node point to the result of an update. SLPJ
+ -- 27/11/92.
+
+nodeMustPointToIt _ (LFUnknown _) = True
+nodeMustPointToIt _ LFUnlifted = False
+nodeMustPointToIt _ LFLetNoEscape = False
+
+{- Note [GC recovery]
+~~~~~~~~~~~~~~~~~~~~~
+If we a have a local let-binding (function or thunk)
+ let f = <body> in ...
+AND <body> allocates, then the heap-overflow check needs to know how
+to re-start the evaluation. It uses the "self" pointer to do this.
+So even if there are no free variables in <body>, we still make
+nodeMustPointToIt be True for non-top-level bindings.
+
+Why do any such bindings exist? After all, let-floating should have
+floated them out. Well, a clever optimiser might leave one there to
+avoid a space leak, deliberately recomputing a thunk. Also (and this
+really does happen occasionally) let-floating may make a function f smaller
+so it can be inlined, so now (f True) may generate a local no-fv closure.
+This actually happened during bootstrapping GHC itself, with f=mkRdrFunBind
+in TcGenDeriv.) -}
+
+-----------------------------------------------------------------------------
+-- getCallMethod
+-----------------------------------------------------------------------------
+
+{- The entry conventions depend on the type of closure being entered,
+whether or not it has free variables, and whether we're running
+sequentially or in parallel.
+
+Closure Node Argument Enter
+Characteristics Par Req'd Passing Via
+---------------------------------------------------------------------------
+Unknown & no & yes & stack & node
+Known fun (>1 arg), no fvs & no & no & registers & fast entry (enough args)
+ & slow entry (otherwise)
+Known fun (>1 arg), fvs & no & yes & registers & fast entry (enough args)
+0 arg, no fvs \r,\s & no & no & n/a & direct entry
+0 arg, no fvs \u & no & yes & n/a & node
+0 arg, fvs \r,\s,selector & no & yes & n/a & node
+0 arg, fvs \r,\s & no & yes & n/a & direct entry
+0 arg, fvs \u & no & yes & n/a & node
+Unknown & yes & yes & stack & node
+Known fun (>1 arg), no fvs & yes & no & registers & fast entry (enough args)
+ & slow entry (otherwise)
+Known fun (>1 arg), fvs & yes & yes & registers & node
+0 arg, fvs \r,\s,selector & yes & yes & n/a & node
+0 arg, no fvs \r,\s & yes & no & n/a & direct entry
+0 arg, no fvs \u & yes & yes & n/a & node
+0 arg, fvs \r,\s & yes & yes & n/a & node
+0 arg, fvs \u & yes & yes & n/a & node
+
+When black-holing, single-entry closures could also be entered via node
+(rather than directly) to catch double-entry. -}
+
+data CallMethod
+ = EnterIt -- No args, not a function
+
+ | JumpToIt BlockId [LocalReg] -- A join point or a header of a local loop
+
+ | ReturnIt -- It's a value (function, unboxed value,
+ -- or constructor), so just return it.
+
+ | SlowCall -- Unknown fun, or known fun with
+ -- too few args.
+
+ | DirectEntry -- Jump directly, with args in regs
+ CLabel -- The code label
+ RepArity -- Its arity
+
+getCallMethod :: DynFlags
+ -> Name -- Function being applied
+ -> Id -- Function Id used to chech if it can refer to
+ -- CAF's and whether the function is tail-calling
+ -- itself
+ -> LambdaFormInfo -- Its info
+ -> RepArity -- Number of available arguments
+ -> RepArity -- Number of them being void arguments
+ -> CgLoc -- Passed in from cgIdApp so that we can
+ -- handle let-no-escape bindings and self-recursive
+ -- tail calls using the same data constructor,
+ -- JumpToIt. This saves us one case branch in
+ -- cgIdApp
+ -> Maybe SelfLoopInfo -- can we perform a self-recursive tail call?
+ -> CallMethod
+
+getCallMethod dflags _ id _ n_args v_args _cg_loc
+ (Just (self_loop_id, block_id, args))
+ | gopt Opt_Loopification dflags
+ , id == self_loop_id
+ , args `lengthIs` (n_args - v_args)
+ -- If these patterns match then we know that:
+ -- * loopification optimisation is turned on
+ -- * function is performing a self-recursive call in a tail position
+ -- * number of non-void parameters of the function matches functions arity.
+ -- See Note [Self-recursive tail calls] and Note [Void arguments in
+ -- self-recursive tail calls] in GHC.StgToCmm.Expr for more details
+ = JumpToIt block_id args
+
+getCallMethod dflags name id (LFReEntrant _ _ arity _ _) n_args _v_args _cg_loc
+ _self_loop_info
+ | n_args == 0 -- No args at all
+ && not (gopt Opt_SccProfilingOn dflags)
+ -- See Note [Evaluating functions with profiling] in rts/Apply.cmm
+ = ASSERT( arity /= 0 ) ReturnIt
+ | n_args < arity = SlowCall -- Not enough args
+ | otherwise = DirectEntry (enterIdLabel dflags name (idCafInfo id)) arity
+
+getCallMethod _ _name _ LFUnlifted n_args _v_args _cg_loc _self_loop_info
+ = ASSERT( n_args == 0 ) ReturnIt
+
+getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc _self_loop_info
+ = ASSERT( n_args == 0 ) ReturnIt
+ -- n_args=0 because it'd be ill-typed to apply a saturated
+ -- constructor application to anything
+
+getCallMethod dflags name id (LFThunk _ _ updatable std_form_info is_fun)
+ n_args _v_args _cg_loc _self_loop_info
+ | is_fun -- it *might* be a function, so we must "call" it (which is always safe)
+ = SlowCall -- We cannot just enter it [in eval/apply, the entry code
+ -- is the fast-entry code]
+
+ -- Since is_fun is False, we are *definitely* looking at a data value
+ | updatable || gopt Opt_Ticky dflags -- to catch double entry
+ {- OLD: || opt_SMP
+ I decided to remove this, because in SMP mode it doesn't matter
+ if we enter the same thunk multiple times, so the optimisation
+ of jumping directly to the entry code is still valid. --SDM
+ -}
+ = EnterIt
+
+ -- even a non-updatable selector thunk can be updated by the garbage
+ -- collector, so we must enter it. (#8817)
+ | SelectorThunk{} <- std_form_info
+ = EnterIt
+
+ -- We used to have ASSERT( n_args == 0 ), but actually it is
+ -- possible for the optimiser to generate
+ -- let bot :: Int = error Int "urk"
+ -- in (bot `cast` unsafeCoerce Int (Int -> Int)) 3
+ -- This happens as a result of the case-of-error transformation
+ -- So the right thing to do is just to enter the thing
+
+ | otherwise -- Jump direct to code for single-entry thunks
+ = ASSERT( n_args == 0 )
+ DirectEntry (thunkEntryLabel dflags name (idCafInfo id) std_form_info
+ updatable) 0
+
+getCallMethod _ _name _ (LFUnknown True) _n_arg _v_args _cg_locs _self_loop_info
+ = SlowCall -- might be a function
+
+getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info
+ = ASSERT2( n_args == 0, ppr name <+> ppr n_args )
+ EnterIt -- Not a function
+
+getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs)
+ _self_loop_info
+ = JumpToIt blk_id lne_regs
+
+getCallMethod _ _ _ _ _ _ _ _ = panic "Unknown call method"
+
+-----------------------------------------------------------------------------
+-- Data types for closure information
+-----------------------------------------------------------------------------
+
+
+{- ClosureInfo: information about a binding
+
+ We make a ClosureInfo for each let binding (both top level and not),
+ but not bindings for data constructors: for those we build a CmmInfoTable
+ directly (see mkDataConInfoTable).
+
+ To a first approximation:
+ ClosureInfo = (LambdaFormInfo, CmmInfoTable)
+
+ A ClosureInfo has enough information
+ a) to construct the info table itself, and build other things
+ related to the binding (e.g. slow entry points for a function)
+ b) to allocate a closure containing that info pointer (i.e.
+ it knows the info table label)
+-}
+
+data ClosureInfo
+ = ClosureInfo {
+ closureName :: !Name, -- The thing bound to this closure
+ -- we don't really need this field: it's only used in generating
+ -- code for ticky and profiling, and we could pass the information
+ -- around separately, but it doesn't do much harm to keep it here.
+
+ closureLFInfo :: !LambdaFormInfo, -- NOTE: not an LFCon
+ -- this tells us about what the closure contains: it's right-hand-side.
+
+ -- the rest is just an unpacked CmmInfoTable.
+ closureInfoLabel :: !CLabel,
+ closureSMRep :: !SMRep, -- representation used by storage mgr
+ closureProf :: !ProfilingInfo
+ }
+
+-- | Convert from 'ClosureInfo' to 'CmmInfoTable'.
+mkCmmInfo :: ClosureInfo -> Id -> CostCentreStack -> CmmInfoTable
+mkCmmInfo ClosureInfo {..} id ccs
+ = CmmInfoTable { cit_lbl = closureInfoLabel
+ , cit_rep = closureSMRep
+ , cit_prof = closureProf
+ , cit_srt = Nothing
+ , cit_clo = if isStaticRep closureSMRep
+ then Just (id,ccs)
+ else Nothing }
+
+--------------------------------------
+-- Building ClosureInfos
+--------------------------------------
+
+mkClosureInfo :: DynFlags
+ -> Bool -- Is static
+ -> Id
+ -> LambdaFormInfo
+ -> Int -> Int -- Total and pointer words
+ -> String -- String descriptor
+ -> ClosureInfo
+mkClosureInfo dflags is_static id lf_info tot_wds ptr_wds val_descr
+ = ClosureInfo { closureName = name
+ , closureLFInfo = lf_info
+ , closureInfoLabel = info_lbl -- These three fields are
+ , closureSMRep = sm_rep -- (almost) an info table
+ , closureProf = prof } -- (we don't have an SRT yet)
+ where
+ name = idName id
+ sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType lf_info)
+ prof = mkProfilingInfo dflags id val_descr
+ nonptr_wds = tot_wds - ptr_wds
+
+ info_lbl = mkClosureInfoTableLabel id lf_info
+
+--------------------------------------
+-- Other functions over ClosureInfo
+--------------------------------------
+
+-- Eager blackholing is normally disabled, but can be turned on with
+-- -feager-blackholing. When it is on, we replace the info pointer of
+-- the thunk with stg_EAGER_BLACKHOLE_info on entry.
+
+-- If we wanted to do eager blackholing with slop filling,
+-- we'd need to do it at the *end* of a basic block, otherwise
+-- we overwrite the free variables in the thunk that we still
+-- need. We have a patch for this from Andy Cheadle, but not
+-- incorporated yet. --SDM [6/2004]
+--
+-- Previously, eager blackholing was enabled when ticky-ticky
+-- was on. But it didn't work, and it wasn't strictly necessary
+-- to bring back minimal ticky-ticky, so now EAGER_BLACKHOLING
+-- is unconditionally disabled. -- krc 1/2007
+--
+-- Static closures are never themselves black-holed.
+
+blackHoleOnEntry :: ClosureInfo -> Bool
+blackHoleOnEntry cl_info
+ | isStaticRep (closureSMRep cl_info)
+ = False -- Never black-hole a static closure
+
+ | otherwise
+ = case closureLFInfo cl_info of
+ LFReEntrant {} -> False
+ LFLetNoEscape -> False
+ LFThunk _ _no_fvs upd _ _ -> upd -- See Note [Black-holing non-updatable thunks]
+ _other -> panic "blackHoleOnEntry"
+
+{- Note [Black-holing non-updatable thunks]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We must not black-hole non-updatable (single-entry) thunks otherwise
+we run into issues like #10414. Specifically:
+
+ * There is no reason to black-hole a non-updatable thunk: it should
+ not be competed for by multiple threads
+
+ * It could, conceivably, cause a space leak if we don't black-hole
+ it, if there was a live but never-followed pointer pointing to it.
+ Let's hope that doesn't happen.
+
+ * It is dangerous to black-hole a non-updatable thunk because
+ - is not updated (of course)
+ - hence, if it is black-holed and another thread tries to evaluate
+ it, that thread will block forever
+ This actually happened in #10414. So we do not black-hole
+ non-updatable thunks.
+
+ * How could two threads evaluate the same non-updatable (single-entry)
+ thunk? See Reid Barton's example below.
+
+ * Only eager blackholing could possibly black-hole a non-updatable
+ thunk, because lazy black-holing only affects thunks with an
+ update frame on the stack.
+
+Here is and example due to Reid Barton (#10414):
+ x = \u [] concat [[1], []]
+with the following definitions,
+
+ concat x = case x of
+ [] -> []
+ (:) x xs -> (++) x (concat xs)
+
+ (++) xs ys = case xs of
+ [] -> ys
+ (:) x rest -> (:) x ((++) rest ys)
+
+Where we use the syntax @\u []@ to denote an updatable thunk and @\s []@ to
+denote a single-entry (i.e. non-updatable) thunk. After a thread evaluates @x@
+to WHNF and calls @(++)@ the heap will contain the following thunks,
+
+ x = 1 : y
+ y = \u [] (++) [] z
+ z = \s [] concat []
+
+Now that the stage is set, consider the follow evaluations by two racing threads
+A and B,
+
+ 1. Both threads enter @y@ before either is able to replace it with an
+ indirection
+
+ 2. Thread A does the case analysis in @(++)@ and consequently enters @z@,
+ replacing it with a black-hole
+
+ 3. At some later point thread B does the same case analysis and also attempts
+ to enter @z@. However, it finds that it has been replaced with a black-hole
+ so it blocks.
+
+ 4. Thread A eventually finishes evaluating @z@ (to @[]@) and updates @y@
+ accordingly. It does *not* update @z@, however, as it is single-entry. This
+ leaves Thread B blocked forever on a black-hole which will never be
+ updated.
+
+To avoid this sort of condition we never black-hole non-updatable thunks.
+-}
+
+isStaticClosure :: ClosureInfo -> Bool
+isStaticClosure cl_info = isStaticRep (closureSMRep cl_info)
+
+closureUpdReqd :: ClosureInfo -> Bool
+closureUpdReqd ClosureInfo{ closureLFInfo = lf_info } = lfUpdatable lf_info
+
+lfUpdatable :: LambdaFormInfo -> Bool
+lfUpdatable (LFThunk _ _ upd _ _) = upd
+lfUpdatable _ = False
+
+closureSingleEntry :: ClosureInfo -> Bool
+closureSingleEntry (ClosureInfo { closureLFInfo = LFThunk _ _ upd _ _}) = not upd
+closureSingleEntry (ClosureInfo { closureLFInfo = LFReEntrant _ OneShotLam _ _ _}) = True
+closureSingleEntry _ = False
+
+closureReEntrant :: ClosureInfo -> Bool
+closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant {} }) = True
+closureReEntrant _ = False
+
+closureFunInfo :: ClosureInfo -> Maybe (RepArity, ArgDescr)
+closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info
+
+lfFunInfo :: LambdaFormInfo -> Maybe (RepArity, ArgDescr)
+lfFunInfo (LFReEntrant _ _ arity _ arg_desc) = Just (arity, arg_desc)
+lfFunInfo _ = Nothing
+
+funTag :: DynFlags -> ClosureInfo -> DynTag
+funTag dflags (ClosureInfo { closureLFInfo = lf_info })
+ = lfDynTag dflags lf_info
+
+isToplevClosure :: ClosureInfo -> Bool
+isToplevClosure (ClosureInfo { closureLFInfo = lf_info })
+ = case lf_info of
+ LFReEntrant TopLevel _ _ _ _ -> True
+ LFThunk TopLevel _ _ _ _ -> True
+ _other -> False
+
+--------------------------------------
+-- Label generation
+--------------------------------------
+
+staticClosureLabel :: ClosureInfo -> CLabel
+staticClosureLabel = toClosureLbl . closureInfoLabel
+
+closureSlowEntryLabel :: ClosureInfo -> CLabel
+closureSlowEntryLabel = toSlowEntryLbl . closureInfoLabel
+
+closureLocalEntryLabel :: DynFlags -> ClosureInfo -> CLabel
+closureLocalEntryLabel dflags
+ | tablesNextToCode dflags = toInfoLbl . closureInfoLabel
+ | otherwise = toEntryLbl . closureInfoLabel
+
+mkClosureInfoTableLabel :: Id -> LambdaFormInfo -> CLabel
+mkClosureInfoTableLabel id lf_info
+ = case lf_info of
+ LFThunk _ _ upd_flag (SelectorThunk offset) _
+ -> mkSelectorInfoLabel upd_flag offset
+
+ LFThunk _ _ upd_flag (ApThunk arity) _
+ -> mkApInfoTableLabel upd_flag arity
+
+ LFThunk{} -> std_mk_lbl name cafs
+ LFReEntrant{} -> std_mk_lbl name cafs
+ _other -> panic "closureInfoTableLabel"
+
+ where
+ name = idName id
+
+ std_mk_lbl | is_local = mkLocalInfoTableLabel
+ | otherwise = mkInfoTableLabel
+
+ cafs = idCafInfo id
+ is_local = isDataConWorkId id
+ -- Make the _info pointer for the implicit datacon worker
+ -- binding local. The reason we can do this is that importing
+ -- code always either uses the _closure or _con_info. By the
+ -- invariants in CorePrep anything else gets eta expanded.
+
+
+thunkEntryLabel :: DynFlags -> Name -> CafInfo -> StandardFormInfo -> Bool -> CLabel
+-- thunkEntryLabel is a local help function, not exported. It's used from
+-- getCallMethod.
+thunkEntryLabel dflags _thunk_id _ (ApThunk arity) upd_flag
+ = enterApLabel dflags upd_flag arity
+thunkEntryLabel dflags _thunk_id _ (SelectorThunk offset) upd_flag
+ = enterSelectorLabel dflags upd_flag offset
+thunkEntryLabel dflags thunk_id c _ _
+ = enterIdLabel dflags thunk_id c
+
+enterApLabel :: DynFlags -> Bool -> Arity -> CLabel
+enterApLabel dflags is_updatable arity
+ | tablesNextToCode dflags = mkApInfoTableLabel is_updatable arity
+ | otherwise = mkApEntryLabel is_updatable arity
+
+enterSelectorLabel :: DynFlags -> Bool -> WordOff -> CLabel
+enterSelectorLabel dflags upd_flag offset
+ | tablesNextToCode dflags = mkSelectorInfoLabel upd_flag offset
+ | otherwise = mkSelectorEntryLabel upd_flag offset
+
+enterIdLabel :: DynFlags -> Name -> CafInfo -> CLabel
+enterIdLabel dflags id c
+ | tablesNextToCode dflags = mkInfoTableLabel id c
+ | otherwise = mkEntryLabel id c
+
+
+--------------------------------------
+-- Profiling
+--------------------------------------
+
+-- Profiling requires two pieces of information to be determined for
+-- each closure's info table --- description and type.
+
+-- The description is stored directly in the @CClosureInfoTable@ when the
+-- info table is built.
+
+-- The type is determined from the type information stored with the @Id@
+-- in the closure info using @closureTypeDescr@.
+
+mkProfilingInfo :: DynFlags -> Id -> String -> ProfilingInfo
+mkProfilingInfo dflags id val_descr
+ | not (gopt Opt_SccProfilingOn dflags) = NoProfilingInfo
+ | otherwise = ProfilingInfo ty_descr_w8 (BS8.pack val_descr)
+ where
+ ty_descr_w8 = BS8.pack (getTyDescription (idType id))
+
+getTyDescription :: Type -> String
+getTyDescription ty
+ = case (tcSplitSigmaTy ty) of { (_, _, tau_ty) ->
+ case tau_ty of
+ TyVarTy _ -> "*"
+ AppTy fun _ -> getTyDescription fun
+ TyConApp tycon _ -> getOccString tycon
+ FunTy {} -> '-' : fun_result tau_ty
+ ForAllTy _ ty -> getTyDescription ty
+ LitTy n -> getTyLitDescription n
+ CastTy ty _ -> getTyDescription ty
+ CoercionTy co -> pprPanic "getTyDescription" (ppr co)
+ }
+ where
+ fun_result (FunTy { ft_res = res }) = '>' : fun_result res
+ fun_result other = getTyDescription other
+
+getTyLitDescription :: TyLit -> String
+getTyLitDescription l =
+ case l of
+ NumTyLit n -> show n
+ StrTyLit n -> show n
+
+--------------------------------------
+-- CmmInfoTable-related things
+--------------------------------------
+
+mkDataConInfoTable :: DynFlags -> DataCon -> Bool -> Int -> Int -> CmmInfoTable
+mkDataConInfoTable dflags data_con is_static ptr_wds nonptr_wds
+ = CmmInfoTable { cit_lbl = info_lbl
+ , cit_rep = sm_rep
+ , cit_prof = prof
+ , cit_srt = Nothing
+ , cit_clo = Nothing }
+ where
+ name = dataConName data_con
+ info_lbl = mkConInfoTableLabel name NoCafRefs
+ sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type
+ cl_type = Constr (dataConTagZ data_con) (dataConIdentity data_con)
+ -- We keep the *zero-indexed* tag in the srt_len field
+ -- of the info table of a data constructor.
+
+ prof | not (gopt Opt_SccProfilingOn dflags) = NoProfilingInfo
+ | otherwise = ProfilingInfo ty_descr val_descr
+
+ ty_descr = BS8.pack $ occNameString $ getOccName $ dataConTyCon data_con
+ val_descr = BS8.pack $ occNameString $ getOccName data_con
+
+-- We need a black-hole closure info to pass to @allocDynClosure@ when we
+-- want to allocate the black hole on entry to a CAF.
+
+cafBlackHoleInfoTable :: CmmInfoTable
+cafBlackHoleInfoTable
+ = CmmInfoTable { cit_lbl = mkCAFBlackHoleInfoTableLabel
+ , cit_rep = blackHoleRep
+ , cit_prof = NoProfilingInfo
+ , cit_srt = Nothing
+ , cit_clo = Nothing }
+
+indStaticInfoTable :: CmmInfoTable
+indStaticInfoTable
+ = CmmInfoTable { cit_lbl = mkIndStaticInfoLabel
+ , cit_rep = indStaticRep
+ , cit_prof = NoProfilingInfo
+ , cit_srt = Nothing
+ , cit_clo = Nothing }
+
+staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool
+-- A static closure needs a link field to aid the GC when traversing
+-- the static closure graph. But it only needs such a field if either
+-- a) it has an SRT
+-- b) it's a constructor with one or more pointer fields
+-- In case (b), the constructor's fields themselves play the role
+-- of the SRT.
+staticClosureNeedsLink has_srt CmmInfoTable{ cit_rep = smrep }
+ | isConRep smrep = not (isStaticNoCafCon smrep)
+ | otherwise = has_srt
diff --git a/compiler/GHC/StgToCmm/Con.hs b/compiler/GHC/StgToCmm/Con.hs
new file mode 100644
index 0000000000..08508fbecc
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Con.hs
@@ -0,0 +1,285 @@
+{-# LANGUAGE CPP #-}
+
+-----------------------------------------------------------------------------
+--
+-- Stg to C--: code generation for constructors
+--
+-- This module provides the support code for StgToCmm to deal with with
+-- constructors on the RHSs of let(rec)s.
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.Con (
+ cgTopRhsCon, buildDynCon, bindConArgs
+ ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import StgSyn
+import CoreSyn ( AltCon(..) )
+
+import GHC.StgToCmm.Monad
+import GHC.StgToCmm.Env
+import GHC.StgToCmm.Heap
+import GHC.StgToCmm.Layout
+import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Closure
+
+import CmmExpr
+import CmmUtils
+import CLabel
+import MkGraph
+import SMRep
+import CostCentre
+import Module
+import DataCon
+import DynFlags
+import FastString
+import Id
+import RepType (countConRepArgs)
+import Literal
+import PrelInfo
+import Outputable
+import GHC.Platform
+import Util
+import MonadUtils (mapMaybeM)
+
+import Control.Monad
+import Data.Char
+
+
+
+---------------------------------------------------------------
+-- Top-level constructors
+---------------------------------------------------------------
+
+cgTopRhsCon :: DynFlags
+ -> Id -- Name of thing bound to this RHS
+ -> DataCon -- Id
+ -> [NonVoid StgArg] -- Args
+ -> (CgIdInfo, FCode ())
+cgTopRhsCon dflags id con args =
+ let id_info = litIdInfo dflags id (mkConLFInfo con) (CmmLabel closure_label)
+ in (id_info, gen_code)
+ where
+ name = idName id
+ caffy = idCafInfo id -- any stgArgHasCafRefs args
+ closure_label = mkClosureLabel name caffy
+
+ gen_code =
+ do { this_mod <- getModuleName
+ ; when (platformOS (targetPlatform dflags) == OSMinGW32) $
+ -- Windows DLLs have a problem with static cross-DLL refs.
+ MASSERT( not (isDllConApp dflags this_mod con (map fromNonVoid args)) )
+ ; ASSERT( args `lengthIs` countConRepArgs con ) return ()
+
+ -- LAY IT OUT
+ ; let
+ (tot_wds, -- #ptr_wds + #nonptr_wds
+ ptr_wds, -- #ptr_wds
+ nv_args_w_offsets) =
+ mkVirtHeapOffsetsWithPadding dflags StdHeader (addArgReps args)
+
+ mk_payload (Padding len _) = return (CmmInt 0 (widthFromBytes len))
+ mk_payload (FieldOff arg _) = do
+ amode <- getArgAmode arg
+ case amode of
+ CmmLit lit -> return lit
+ _ -> panic "GHC.StgToCmm.Con.cgTopRhsCon"
+
+ nonptr_wds = tot_wds - ptr_wds
+
+ -- we're not really going to emit an info table, so having
+ -- to make a CmmInfoTable is a bit overkill, but mkStaticClosureFields
+ -- needs to poke around inside it.
+ info_tbl = mkDataConInfoTable dflags con True ptr_wds nonptr_wds
+
+
+ ; payload <- mapM mk_payload nv_args_w_offsets
+ -- NB1: nv_args_w_offsets is sorted into ptrs then non-ptrs
+ -- NB2: all the amodes should be Lits!
+ -- TODO (osa): Why?
+
+ ; let closure_rep = mkStaticClosureFields
+ dflags
+ info_tbl
+ dontCareCCS -- Because it's static data
+ caffy -- Has CAF refs
+ payload
+
+ -- BUILD THE OBJECT
+ ; emitDataLits closure_label closure_rep
+
+ ; return () }
+
+
+---------------------------------------------------------------
+-- Lay out and allocate non-top-level constructors
+---------------------------------------------------------------
+
+buildDynCon :: Id -- Name of the thing to which this constr will
+ -- be bound
+ -> Bool -- is it genuinely bound to that name, or just
+ -- for profiling?
+ -> CostCentreStack -- Where to grab cost centre from;
+ -- current CCS if currentOrSubsumedCCS
+ -> DataCon -- The data constructor
+ -> [NonVoid StgArg] -- Its args
+ -> FCode (CgIdInfo, FCode CmmAGraph)
+ -- Return details about how to find it and initialization code
+buildDynCon binder actually_bound cc con args
+ = do dflags <- getDynFlags
+ buildDynCon' dflags (targetPlatform dflags) binder actually_bound cc con args
+
+
+buildDynCon' :: DynFlags
+ -> Platform
+ -> Id -> Bool
+ -> CostCentreStack
+ -> DataCon
+ -> [NonVoid StgArg]
+ -> FCode (CgIdInfo, FCode CmmAGraph)
+
+{- We used to pass a boolean indicating whether all the
+args were of size zero, so we could use a static
+constructor; but I concluded that it just isn't worth it.
+Now I/O uses unboxed tuples there just aren't any constructors
+with all size-zero args.
+
+The reason for having a separate argument, rather than looking at
+the addr modes of the args is that we may be in a "knot", and
+premature looking at the args will cause the compiler to black-hole!
+-}
+
+
+-------- buildDynCon': Nullary constructors --------------
+-- First we deal with the case of zero-arity constructors. They
+-- will probably be unfolded, so we don't expect to see this case much,
+-- if at all, but it does no harm, and sets the scene for characters.
+--
+-- In the case of zero-arity constructors, or, more accurately, those
+-- which have exclusively size-zero (VoidRep) args, we generate no code
+-- at all.
+
+buildDynCon' dflags _ binder _ _cc con []
+ | isNullaryRepDataCon con
+ = return (litIdInfo dflags binder (mkConLFInfo con)
+ (CmmLabel (mkClosureLabel (dataConName con) (idCafInfo binder))),
+ return mkNop)
+
+-------- buildDynCon': Charlike and Intlike constructors -----------
+{- The following three paragraphs about @Char@-like and @Int@-like
+closures are obsolete, but I don't understand the details well enough
+to properly word them, sorry. I've changed the treatment of @Char@s to
+be analogous to @Int@s: only a subset is preallocated, because @Char@
+has now 31 bits. Only literals are handled here. -- Qrczak
+
+Now for @Char@-like closures. We generate an assignment of the
+address of the closure to a temporary. It would be possible simply to
+generate no code, and record the addressing mode in the environment,
+but we'd have to be careful if the argument wasn't a constant --- so
+for simplicity we just always assign to a temporary.
+
+Last special case: @Int@-like closures. We only special-case the
+situation in which the argument is a literal in the range
+@mIN_INTLIKE@..@mAX_INTLILKE@. NB: for @Char@-like closures we can
+work with any old argument, but for @Int@-like ones the argument has
+to be a literal. Reason: @Char@ like closures have an argument type
+which is guaranteed in range.
+
+Because of this, we use can safely return an addressing mode.
+
+We don't support this optimisation when compiling into Windows DLLs yet
+because they don't support cross package data references well.
+-}
+
+buildDynCon' dflags platform binder _ _cc con [arg]
+ | maybeIntLikeCon con
+ , platformOS platform /= OSMinGW32 || not (positionIndependent dflags)
+ , NonVoid (StgLitArg (LitNumber LitNumInt val _)) <- arg
+ , val <= fromIntegral (mAX_INTLIKE dflags) -- Comparisons at type Integer!
+ , val >= fromIntegral (mIN_INTLIKE dflags) -- ...ditto...
+ = do { let intlike_lbl = mkCmmClosureLabel rtsUnitId (fsLit "stg_INTLIKE")
+ val_int = fromIntegral val :: Int
+ offsetW = (val_int - mIN_INTLIKE dflags) * (fixedHdrSizeW dflags + 1)
+ -- INTLIKE closures consist of a header and one word payload
+ intlike_amode = cmmLabelOffW dflags intlike_lbl offsetW
+ ; return ( litIdInfo dflags binder (mkConLFInfo con) intlike_amode
+ , return mkNop) }
+
+buildDynCon' dflags platform binder _ _cc con [arg]
+ | maybeCharLikeCon con
+ , platformOS platform /= OSMinGW32 || not (positionIndependent dflags)
+ , NonVoid (StgLitArg (LitChar val)) <- arg
+ , let val_int = ord val :: Int
+ , val_int <= mAX_CHARLIKE dflags
+ , val_int >= mIN_CHARLIKE dflags
+ = do { let charlike_lbl = mkCmmClosureLabel rtsUnitId (fsLit "stg_CHARLIKE")
+ offsetW = (val_int - mIN_CHARLIKE dflags) * (fixedHdrSizeW dflags + 1)
+ -- CHARLIKE closures consist of a header and one word payload
+ charlike_amode = cmmLabelOffW dflags charlike_lbl offsetW
+ ; return ( litIdInfo dflags binder (mkConLFInfo con) charlike_amode
+ , return mkNop) }
+
+-------- buildDynCon': the general case -----------
+buildDynCon' dflags _ binder actually_bound ccs con args
+ = do { (id_info, reg) <- rhsIdInfo binder lf_info
+ ; return (id_info, gen_code reg)
+ }
+ where
+ lf_info = mkConLFInfo con
+
+ gen_code reg
+ = do { let (tot_wds, ptr_wds, args_w_offsets)
+ = mkVirtConstrOffsets dflags (addArgReps args)
+ nonptr_wds = tot_wds - ptr_wds
+ info_tbl = mkDataConInfoTable dflags con False
+ ptr_wds nonptr_wds
+ ; let ticky_name | actually_bound = Just binder
+ | otherwise = Nothing
+
+ ; hp_plus_n <- allocDynClosure ticky_name info_tbl lf_info
+ use_cc blame_cc args_w_offsets
+ ; return (mkRhsInit dflags reg lf_info hp_plus_n) }
+ where
+ use_cc -- cost-centre to stick in the object
+ | isCurrentCCS ccs = cccsExpr
+ | otherwise = panic "buildDynCon: non-current CCS not implemented"
+
+ blame_cc = use_cc -- cost-centre on which to blame the alloc (same)
+
+
+---------------------------------------------------------------
+-- Binding constructor arguments
+---------------------------------------------------------------
+
+bindConArgs :: AltCon -> LocalReg -> [NonVoid Id] -> FCode [LocalReg]
+-- bindConArgs is called from cgAlt of a case
+-- (bindConArgs con args) augments the environment with bindings for the
+-- binders args, assuming that we have just returned from a 'case' which
+-- found a con
+bindConArgs (DataAlt con) base args
+ = ASSERT(not (isUnboxedTupleCon con))
+ do dflags <- getDynFlags
+ let (_, _, args_w_offsets) = mkVirtConstrOffsets dflags (addIdReps args)
+ tag = tagForCon dflags con
+
+ -- The binding below forces the masking out of the tag bits
+ -- when accessing the constructor field.
+ bind_arg :: (NonVoid Id, ByteOff) -> FCode (Maybe LocalReg)
+ bind_arg (arg@(NonVoid b), offset)
+ | isDeadBinder b -- See Note [Dead-binder optimisation] in GHC.StgToCmm.Expr
+ = return Nothing
+ | otherwise
+ = do { emit $ mkTaggedObjectLoad dflags (idToReg dflags arg)
+ base offset tag
+ ; Just <$> bindArgToReg arg }
+
+ mapMaybeM bind_arg args_w_offsets
+
+bindConArgs _other_con _base args
+ = ASSERT( null args ) return []
diff --git a/compiler/GHC/StgToCmm/Env.hs b/compiler/GHC/StgToCmm/Env.hs
new file mode 100644
index 0000000000..e32c6a1ecb
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Env.hs
@@ -0,0 +1,208 @@
+{-# LANGUAGE CPP #-}
+
+-----------------------------------------------------------------------------
+--
+-- Stg to C-- code generation: the binding environment
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+module GHC.StgToCmm.Env (
+ CgIdInfo,
+
+ litIdInfo, lneIdInfo, rhsIdInfo, mkRhsInit,
+ idInfoToAmode,
+
+ addBindC, addBindsC,
+
+ bindArgsToRegs, bindToReg, rebindToReg,
+ bindArgToReg, idToReg,
+ getArgAmode, getNonVoidArgAmodes,
+ getCgIdInfo,
+ maybeLetNoEscape,
+ ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import TyCon
+import GHC.StgToCmm.Monad
+import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Closure
+
+import CLabel
+
+import BlockId
+import CmmExpr
+import CmmUtils
+import DynFlags
+import Id
+import MkGraph
+import Name
+import Outputable
+import StgSyn
+import Type
+import TysPrim
+import UniqFM
+import Util
+import VarEnv
+
+-------------------------------------
+-- Manipulating CgIdInfo
+-------------------------------------
+
+mkCgIdInfo :: Id -> LambdaFormInfo -> CmmExpr -> CgIdInfo
+mkCgIdInfo id lf expr
+ = CgIdInfo { cg_id = id, cg_lf = lf
+ , cg_loc = CmmLoc expr }
+
+litIdInfo :: DynFlags -> Id -> LambdaFormInfo -> CmmLit -> CgIdInfo
+litIdInfo dflags id lf lit
+ = CgIdInfo { cg_id = id, cg_lf = lf
+ , cg_loc = CmmLoc (addDynTag dflags (CmmLit lit) tag) }
+ where
+ tag = lfDynTag dflags lf
+
+lneIdInfo :: DynFlags -> Id -> [NonVoid Id] -> CgIdInfo
+lneIdInfo dflags id regs
+ = CgIdInfo { cg_id = id, cg_lf = lf
+ , cg_loc = LneLoc blk_id (map (idToReg dflags) regs) }
+ where
+ lf = mkLFLetNoEscape
+ blk_id = mkBlockId (idUnique id)
+
+
+rhsIdInfo :: Id -> LambdaFormInfo -> FCode (CgIdInfo, LocalReg)
+rhsIdInfo id lf_info
+ = do dflags <- getDynFlags
+ reg <- newTemp (gcWord dflags)
+ return (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg)), reg)
+
+mkRhsInit :: DynFlags -> LocalReg -> LambdaFormInfo -> CmmExpr -> CmmAGraph
+mkRhsInit dflags reg lf_info expr
+ = mkAssign (CmmLocal reg) (addDynTag dflags expr (lfDynTag dflags lf_info))
+
+idInfoToAmode :: CgIdInfo -> CmmExpr
+-- Returns a CmmExpr for the *tagged* pointer
+idInfoToAmode (CgIdInfo { cg_loc = CmmLoc e }) = e
+idInfoToAmode cg_info
+ = pprPanic "idInfoToAmode" (ppr (cg_id cg_info)) -- LneLoc
+
+addDynTag :: DynFlags -> CmmExpr -> DynTag -> CmmExpr
+-- A tag adds a byte offset to the pointer
+addDynTag dflags expr tag = cmmOffsetB dflags expr tag
+
+maybeLetNoEscape :: CgIdInfo -> Maybe (BlockId, [LocalReg])
+maybeLetNoEscape (CgIdInfo { cg_loc = LneLoc blk_id args}) = Just (blk_id, args)
+maybeLetNoEscape _other = Nothing
+
+
+
+---------------------------------------------------------
+-- The binding environment
+--
+-- There are three basic routines, for adding (addBindC),
+-- modifying(modifyBindC) and looking up (getCgIdInfo) bindings.
+---------------------------------------------------------
+
+addBindC :: CgIdInfo -> FCode ()
+addBindC stuff_to_bind = do
+ binds <- getBinds
+ setBinds $ extendVarEnv binds (cg_id stuff_to_bind) stuff_to_bind
+
+addBindsC :: [CgIdInfo] -> FCode ()
+addBindsC new_bindings = do
+ binds <- getBinds
+ let new_binds = foldl' (\ binds info -> extendVarEnv binds (cg_id info) info)
+ binds
+ new_bindings
+ setBinds new_binds
+
+getCgIdInfo :: Id -> FCode CgIdInfo
+getCgIdInfo id
+ = do { dflags <- getDynFlags
+ ; local_binds <- getBinds -- Try local bindings first
+ ; case lookupVarEnv local_binds id of {
+ Just info -> return info ;
+ Nothing -> do {
+
+ -- Should be imported; make up a CgIdInfo for it
+ let name = idName id
+ ; if isExternalName name then
+ let ext_lbl
+ | isUnliftedType (idType id) =
+ -- An unlifted external Id must refer to a top-level
+ -- string literal. See Note [Bytes label] in CLabel.
+ ASSERT( idType id `eqType` addrPrimTy )
+ mkBytesLabel name
+ | otherwise = mkClosureLabel name $ idCafInfo id
+ in return $
+ litIdInfo dflags id (mkLFImported id) (CmmLabel ext_lbl)
+ else
+ cgLookupPanic id -- Bug
+ }}}
+
+cgLookupPanic :: Id -> FCode a
+cgLookupPanic id
+ = do local_binds <- getBinds
+ pprPanic "GHC.StgToCmm.Env: variable not found"
+ (vcat [ppr id,
+ text "local binds for:",
+ pprUFM local_binds $ \infos ->
+ vcat [ ppr (cg_id info) | info <- infos ]
+ ])
+
+
+--------------------
+getArgAmode :: NonVoid StgArg -> FCode CmmExpr
+getArgAmode (NonVoid (StgVarArg var)) = idInfoToAmode <$> getCgIdInfo var
+getArgAmode (NonVoid (StgLitArg lit)) = CmmLit <$> cgLit lit
+
+getNonVoidArgAmodes :: [StgArg] -> FCode [CmmExpr]
+-- NB: Filters out void args,
+-- so the result list may be shorter than the argument list
+getNonVoidArgAmodes [] = return []
+getNonVoidArgAmodes (arg:args)
+ | isVoidRep (argPrimRep arg) = getNonVoidArgAmodes args
+ | otherwise = do { amode <- getArgAmode (NonVoid arg)
+ ; amodes <- getNonVoidArgAmodes args
+ ; return ( amode : amodes ) }
+
+
+------------------------------------------------------------------------
+-- Interface functions for binding and re-binding names
+------------------------------------------------------------------------
+
+bindToReg :: NonVoid Id -> LambdaFormInfo -> FCode LocalReg
+-- Bind an Id to a fresh LocalReg
+bindToReg nvid@(NonVoid id) lf_info
+ = do dflags <- getDynFlags
+ let reg = idToReg dflags nvid
+ addBindC (mkCgIdInfo id lf_info (CmmReg (CmmLocal reg)))
+ return reg
+
+rebindToReg :: NonVoid Id -> FCode LocalReg
+-- Like bindToReg, but the Id is already in scope, so
+-- get its LF info from the envt
+rebindToReg nvid@(NonVoid id)
+ = do { info <- getCgIdInfo id
+ ; bindToReg nvid (cg_lf info) }
+
+bindArgToReg :: NonVoid Id -> FCode LocalReg
+bindArgToReg nvid@(NonVoid id) = bindToReg nvid (mkLFArgument id)
+
+bindArgsToRegs :: [NonVoid Id] -> FCode [LocalReg]
+bindArgsToRegs args = mapM bindArgToReg args
+
+idToReg :: DynFlags -> NonVoid Id -> LocalReg
+-- Make a register from an Id, typically a function argument,
+-- free variable, or case binder
+--
+-- We re-use the Unique from the Id to make it easier to see what is going on
+--
+-- By now the Ids should be uniquely named; else one would worry
+-- about accidental collision
+idToReg dflags (NonVoid id)
+ = LocalReg (idUnique id)
+ (primRepCmmType dflags (idPrimRep id))
diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs
new file mode 100644
index 0000000000..59cd246441
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Expr.hs
@@ -0,0 +1,992 @@
+{-# LANGUAGE CPP #-}
+
+-----------------------------------------------------------------------------
+--
+-- Stg to C-- code generation: expressions
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.Expr ( cgExpr ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude hiding ((<*>))
+
+import {-# SOURCE #-} GHC.StgToCmm.Bind ( cgBind )
+
+import GHC.StgToCmm.Monad
+import GHC.StgToCmm.Heap
+import GHC.StgToCmm.Env
+import GHC.StgToCmm.Con
+import GHC.StgToCmm.Prof (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
+import GHC.StgToCmm.Layout
+import GHC.StgToCmm.Prim
+import GHC.StgToCmm.Hpc
+import GHC.StgToCmm.Ticky
+import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Closure
+
+import StgSyn
+
+import MkGraph
+import BlockId
+import Cmm
+import CmmInfo
+import CoreSyn
+import DataCon
+import ForeignCall
+import Id
+import PrimOp
+import TyCon
+import Type ( isUnliftedType )
+import RepType ( isVoidTy, countConRepArgs, primRepSlot )
+import CostCentre ( CostCentreStack, currentCCS )
+import Maybes
+import Util
+import FastString
+import Outputable
+
+import Control.Monad (unless,void)
+import Control.Arrow (first)
+import Data.Function ( on )
+
+------------------------------------------------------------------------
+-- cgExpr: the main function
+------------------------------------------------------------------------
+
+cgExpr :: CgStgExpr -> FCode ReturnKind
+
+cgExpr (StgApp fun args) = cgIdApp fun args
+
+-- seq# a s ==> a
+-- See Note [seq# magic] in PrelRules
+cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
+ cgIdApp a []
+
+-- dataToTag# :: a -> Int#
+-- See Note [dataToTag#] in primops.txt.pp
+cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
+ dflags <- getDynFlags
+ emitComment (mkFastString "dataToTag#")
+ tmp <- newTemp (bWord dflags)
+ _ <- withSequel (AssignTo [tmp] False) (cgIdApp a [])
+ -- TODO: For small types look at the tag bits instead of reading info table
+ emitReturn [getConstrTag dflags (cmmUntag dflags (CmmReg (CmmLocal tmp)))]
+
+cgExpr (StgOpApp op args ty) = cgOpApp op args ty
+cgExpr (StgConApp con args _)= cgConApp con args
+cgExpr (StgTick t e) = cgTick t >> cgExpr e
+cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
+ emitReturn [CmmLit cmm_lit]
+
+cgExpr (StgLet _ binds expr) = do { cgBind binds; cgExpr expr }
+cgExpr (StgLetNoEscape _ binds expr) =
+ do { u <- newUnique
+ ; let join_id = mkBlockId u
+ ; cgLneBinds join_id binds
+ ; r <- cgExpr expr
+ ; emitLabel join_id
+ ; return r }
+
+cgExpr (StgCase expr bndr alt_type alts) =
+ cgCase expr bndr alt_type alts
+
+cgExpr (StgLam {}) = panic "cgExpr: StgLam"
+
+------------------------------------------------------------------------
+-- Let no escape
+------------------------------------------------------------------------
+
+{- Generating code for a let-no-escape binding, aka join point is very
+very similar to what we do for a case expression. The duality is
+between
+ let-no-escape x = b
+ in e
+and
+ case e of ... -> b
+
+That is, the RHS of 'x' (ie 'b') will execute *later*, just like
+the alternative of the case; it needs to be compiled in an environment
+in which all volatile bindings are forgotten, and the free vars are
+bound only to stable things like stack locations.. The 'e' part will
+execute *next*, just like the scrutinee of a case. -}
+
+-------------------------
+cgLneBinds :: BlockId -> CgStgBinding -> FCode ()
+cgLneBinds join_id (StgNonRec bndr rhs)
+ = do { local_cc <- saveCurrentCostCentre
+ -- See Note [Saving the current cost centre]
+ ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
+ ; fcode
+ ; addBindC info }
+
+cgLneBinds join_id (StgRec pairs)
+ = do { local_cc <- saveCurrentCostCentre
+ ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
+ ; let (infos, fcodes) = unzip r
+ ; addBindsC infos
+ ; sequence_ fcodes
+ }
+
+-------------------------
+cgLetNoEscapeRhs
+ :: BlockId -- join point for successor of let-no-escape
+ -> Maybe LocalReg -- Saved cost centre
+ -> Id
+ -> CgStgRhs
+ -> FCode (CgIdInfo, FCode ())
+
+cgLetNoEscapeRhs join_id local_cc bndr rhs =
+ do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
+ ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
+ ; let code = do { (_, body) <- getCodeScoped rhs_code
+ ; emitOutOfLine bid (first (<*> mkBranch join_id) body) }
+ ; return (info, code)
+ }
+
+cgLetNoEscapeRhsBody
+ :: Maybe LocalReg -- Saved cost centre
+ -> Id
+ -> CgStgRhs
+ -> FCode (CgIdInfo, FCode ())
+cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure _ cc _upd args body)
+ = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
+cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
+ = cgLetNoEscapeClosure bndr local_cc cc []
+ (StgConApp con args (pprPanic "cgLetNoEscapeRhsBody" $
+ text "StgRhsCon doesn't have type args"))
+ -- For a constructor RHS we want to generate a single chunk of
+ -- code which can be jumped to from many places, which will
+ -- return the constructor. It's easy; just behave as if it
+ -- was an StgRhsClosure with a ConApp inside!
+
+-------------------------
+cgLetNoEscapeClosure
+ :: Id -- binder
+ -> Maybe LocalReg -- Slot for saved current cost centre
+ -> CostCentreStack -- XXX: *** NOT USED *** why not?
+ -> [NonVoid Id] -- Args (as in \ args -> body)
+ -> CgStgExpr -- Body (as in above)
+ -> FCode (CgIdInfo, FCode ())
+
+cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
+ = do dflags <- getDynFlags
+ return ( lneIdInfo dflags bndr args
+ , code )
+ where
+ code = forkLneBody $ do {
+ ; withNewTickyCounterLNE (idName bndr) args $ do
+ ; restoreCurrentCostCentre cc_slot
+ ; arg_regs <- bindArgsToRegs args
+ ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
+
+
+------------------------------------------------------------------------
+-- Case expressions
+------------------------------------------------------------------------
+
+{- Note [Compiling case expressions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It is quite interesting to decide whether to put a heap-check at the
+start of each alternative. Of course we certainly have to do so if
+the case forces an evaluation, or if there is a primitive op which can
+trigger GC.
+
+A more interesting situation is this (a Plan-B situation)
+
+ !P!;
+ ...P...
+ case x# of
+ 0# -> !Q!; ...Q...
+ default -> !R!; ...R...
+
+where !x! indicates a possible heap-check point. The heap checks
+in the alternatives *can* be omitted, in which case the topmost
+heapcheck will take their worst case into account.
+
+In favour of omitting !Q!, !R!:
+
+ - *May* save a heap overflow test,
+ if ...P... allocates anything.
+
+ - We can use relative addressing from a single Hp to
+ get at all the closures so allocated.
+
+ - No need to save volatile vars etc across heap checks
+ in !Q!, !R!
+
+Against omitting !Q!, !R!
+
+ - May put a heap-check into the inner loop. Suppose
+ the main loop is P -> R -> P -> R...
+ Q is the loop exit, and only it does allocation.
+ This only hurts us if P does no allocation. If P allocates,
+ then there is a heap check in the inner loop anyway.
+
+ - May do more allocation than reqd. This sometimes bites us
+ badly. For example, nfib (ha!) allocates about 30\% more space if the
+ worst-casing is done, because many many calls to nfib are leaf calls
+ which don't need to allocate anything.
+
+ We can un-allocate, but that costs an instruction
+
+Neither problem hurts us if there is only one alternative.
+
+Suppose the inner loop is P->R->P->R etc. Then here is
+how many heap checks we get in the *inner loop* under various
+conditions
+
+ Alloc Heap check in branches (!Q!, !R!)?
+ P Q R yes no (absorb to !P!)
+--------------------------------------
+ n n n 0 0
+ n y n 0 1
+ n . y 1 1
+ y . y 2 1
+ y . n 1 1
+
+Best choices: absorb heap checks from Q and R into !P! iff
+ a) P itself does some allocation
+or
+ b) P does allocation, or there is exactly one alternative
+
+We adopt (b) because that is more likely to put the heap check at the
+entry to a function, when not many things are live. After a bunch of
+single-branch cases, we may have lots of things live
+
+Hence: two basic plans for
+
+ case e of r { alts }
+
+------ Plan A: the general case ---------
+
+ ...save current cost centre...
+
+ ...code for e,
+ with sequel (SetLocals r)
+
+ ...restore current cost centre...
+ ...code for alts...
+ ...alts do their own heap checks
+
+------ Plan B: special case when ---------
+ (i) e does not allocate or call GC
+ (ii) either upstream code performs allocation
+ or there is just one alternative
+
+ Then heap allocation in the (single) case branch
+ is absorbed by the upstream check.
+ Very common example: primops on unboxed values
+
+ ...code for e,
+ with sequel (SetLocals r)...
+
+ ...code for alts...
+ ...no heap check...
+-}
+
+
+
+-------------------------------------
+data GcPlan
+ = GcInAlts -- Put a GC check at the start the case alternatives,
+ [LocalReg] -- which binds these registers
+ | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
+ -- primitive op which does no GC. Absorb the allocation
+ -- of the case alternative(s) into the upstream check
+
+-------------------------------------
+cgCase :: CgStgExpr -> Id -> AltType -> [CgStgAlt] -> FCode ReturnKind
+
+cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
+ | isEnumerationTyCon tycon -- Note [case on bool]
+ = do { tag_expr <- do_enum_primop op args
+
+ -- If the binder is not dead, convert the tag to a constructor
+ -- and assign it. See Note [Dead-binder optimisation]
+ ; unless (isDeadBinder bndr) $ do
+ { dflags <- getDynFlags
+ ; tmp_reg <- bindArgToReg (NonVoid bndr)
+ ; emitAssign (CmmLocal tmp_reg)
+ (tagToClosure dflags tycon tag_expr) }
+
+ ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
+ (NonVoid bndr) alts
+ -- See Note [GC for conditionals]
+ ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
+ ; return AssignedDirectly
+ }
+ where
+ do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
+ do_enum_primop TagToEnumOp [arg] -- No code!
+ = getArgAmode (NonVoid arg)
+ do_enum_primop primop args
+ = do dflags <- getDynFlags
+ tmp <- newTemp (bWord dflags)
+ cgPrimOp [tmp] primop args
+ return (CmmReg (CmmLocal tmp))
+
+{-
+Note [case on bool]
+~~~~~~~~~~~~~~~~~~~
+This special case handles code like
+
+ case a <# b of
+ True ->
+ False ->
+
+--> case tagToEnum# (a <$# b) of
+ True -> .. ; False -> ...
+
+--> case (a <$# b) of r ->
+ case tagToEnum# r of
+ True -> .. ; False -> ...
+
+If we let the ordinary case code handle it, we'll get something like
+
+ tmp1 = a < b
+ tmp2 = Bool_closure_tbl[tmp1]
+ if (tmp2 & 7 != 0) then ... // normal tagged case
+
+but this junk won't optimise away. What we really want is just an
+inline comparison:
+
+ if (a < b) then ...
+
+So we add a special case to generate
+
+ tmp1 = a < b
+ if (tmp1 == 0) then ...
+
+and later optimisations will further improve this.
+
+Now that #6135 has been resolved it should be possible to remove that
+special case. The idea behind this special case and pre-6135 implementation
+of Bool-returning primops was that tagToEnum# was added implicitly in the
+codegen and then optimized away. Now the call to tagToEnum# is explicit
+in the source code, which allows to optimize it away at the earlier stages
+of compilation (i.e. at the Core level).
+
+Note [Scrutinising VoidRep]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose we have this STG code:
+ f = \[s : State# RealWorld] ->
+ case s of _ -> blah
+This is very odd. Why are we scrutinising a state token? But it
+can arise with bizarre NOINLINE pragmas (#9964)
+ crash :: IO ()
+ crash = IO (\s -> let {-# NOINLINE s' #-}
+ s' = s
+ in (# s', () #))
+
+Now the trouble is that 's' has VoidRep, and we do not bind void
+arguments in the environment; they don't live anywhere. See the
+calls to nonVoidIds in various places. So we must not look up
+'s' in the environment. Instead, just evaluate the RHS! Simple.
+
+Note [Dead-binder optimisation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A case-binder, or data-constructor argument, may be marked as dead,
+because we preserve occurrence-info on binders in CoreTidy (see
+CoreTidy.tidyIdBndr).
+
+If the binder is dead, we can sometimes eliminate a load. While
+CmmSink will eliminate that load, it's very easy to kill it at source
+(giving CmmSink less work to do), and in any case CmmSink only runs
+with -O. Since the majority of case binders are dead, this
+optimisation probably still has a great benefit-cost ratio and we want
+to keep it for -O0. See also Phab:D5358.
+
+This probably also was the reason for occurrence hack in Phab:D5339 to
+exist, perhaps because the occurrence information preserved by
+'CoreTidy.tidyIdBndr' was insufficient. But now that CmmSink does the
+job we deleted the hacks.
+-}
+
+cgCase (StgApp v []) _ (PrimAlt _) alts
+ | isVoidRep (idPrimRep v) -- See Note [Scrutinising VoidRep]
+ , [(DEFAULT, _, rhs)] <- alts
+ = cgExpr rhs
+
+{- Note [Dodgy unsafeCoerce 1]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ case (x :: HValue) |> co of (y :: MutVar# Int)
+ DEFAULT -> ...
+We want to gnerate an assignment
+ y := x
+We want to allow this assignment to be generated in the case when the
+types are compatible, because this allows some slightly-dodgy but
+occasionally-useful casts to be used, such as in RtClosureInspect
+where we cast an HValue to a MutVar# so we can print out the contents
+of the MutVar#. If instead we generate code that enters the HValue,
+then we'll get a runtime panic, because the HValue really is a
+MutVar#. The types are compatible though, so we can just generate an
+assignment.
+-}
+cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
+ | isUnliftedType (idType v) -- Note [Dodgy unsafeCoerce 1]
+ || reps_compatible
+ = -- assignment suffices for unlifted types
+ do { dflags <- getDynFlags
+ ; unless reps_compatible $
+ pprPanic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
+ (pp_bndr v $$ pp_bndr bndr)
+ ; v_info <- getCgIdInfo v
+ ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr)))
+ (idInfoToAmode v_info)
+ -- Add bndr to the environment
+ ; _ <- bindArgToReg (NonVoid bndr)
+ ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
+ where
+ reps_compatible = ((==) `on` (primRepSlot . idPrimRep)) v bndr
+ -- Must compare SlotTys, not proper PrimReps, because with unboxed sums,
+ -- the types of the binders are generated from slotPrimRep and might not
+ -- match. Test case:
+ -- swap :: (# Int | Int #) -> (# Int | Int #)
+ -- swap (# x | #) = (# | x #)
+ -- swap (# | y #) = (# y | #)
+
+ pp_bndr id = ppr id <+> dcolon <+> ppr (idType id) <+> parens (ppr (idPrimRep id))
+
+{- Note [Dodgy unsafeCoerce 2, #3132]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In all other cases of a lifted Id being cast to an unlifted type, the
+Id should be bound to bottom, otherwise this is an unsafe use of
+unsafeCoerce. We can generate code to enter the Id and assume that
+it will never return. Hence, we emit the usual enter/return code, and
+because bottom must be untagged, it will be entered. The Sequel is a
+type-correct assignment, albeit bogus. The (dead) continuation loops;
+it would be better to invoke some kind of panic function here.
+-}
+cgCase scrut@(StgApp v []) _ (PrimAlt _) _
+ = do { dflags <- getDynFlags
+ ; mb_cc <- maybeSaveCostCentre True
+ ; _ <- withSequel
+ (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
+ ; restoreCurrentCostCentre mb_cc
+ ; emitComment $ mkFastString "should be unreachable code"
+ ; l <- newBlockId
+ ; emitLabel l
+ ; emit (mkBranch l) -- an infinite loop
+ ; return AssignedDirectly
+ }
+
+{- Note [Handle seq#]
+~~~~~~~~~~~~~~~~~~~~~
+See Note [seq# magic] in PrelRules.
+The special case for seq# in cgCase does this:
+
+ case seq# a s of v
+ (# s', a' #) -> e
+==>
+ case a of v
+ (# s', a' #) -> e
+
+(taking advantage of the fact that the return convention for (# State#, a #)
+is the same as the return convention for just 'a')
+-}
+
+cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
+ = -- Note [Handle seq#]
+ -- And see Note [seq# magic] in PrelRules
+ -- Use the same return convention as vanilla 'a'.
+ cgCase (StgApp a []) bndr alt_type alts
+
+cgCase scrut bndr alt_type alts
+ = -- the general case
+ do { dflags <- getDynFlags
+ ; up_hp_usg <- getVirtHp -- Upstream heap usage
+ ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
+ alt_regs = map (idToReg dflags) ret_bndrs
+ ; simple_scrut <- isSimpleScrut scrut alt_type
+ ; let do_gc | is_cmp_op scrut = False -- See Note [GC for conditionals]
+ | not simple_scrut = True
+ | isSingleton alts = False
+ | up_hp_usg > 0 = False
+ | otherwise = True
+ -- cf Note [Compiling case expressions]
+ gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
+
+ ; mb_cc <- maybeSaveCostCentre simple_scrut
+
+ ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
+ ; ret_kind <- withSequel sequel (cgExpr scrut)
+ ; restoreCurrentCostCentre mb_cc
+ ; _ <- bindArgsToRegs ret_bndrs
+ ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
+ }
+ where
+ is_cmp_op (StgOpApp (StgPrimOp op) _ _) = isComparisonPrimOp op
+ is_cmp_op _ = False
+
+{- Note [GC for conditionals]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For boolean conditionals it seems that we have always done NoGcInAlts.
+That is, we have always done the GC check before the conditional.
+This is enshrined in the special case for
+ case tagToEnum# (a>b) of ...
+See Note [case on bool]
+
+It's odd, and it's flagrantly inconsistent with the rules described
+Note [Compiling case expressions]. However, after eliminating the
+tagToEnum# (#13397) we will have:
+ case (a>b) of ...
+Rather than make it behave quite differently, I am testing for a
+comparison operator here in in the general case as well.
+
+ToDo: figure out what the Right Rule should be.
+
+Note [scrut sequel]
+~~~~~~~~~~~~~~~~~~~
+The job of the scrutinee is to assign its value(s) to alt_regs.
+Additionally, if we plan to do a heap-check in the alternatives (see
+Note [Compiling case expressions]), then we *must* retreat Hp to
+recover any unused heap before passing control to the sequel. If we
+don't do this, then any unused heap will become slop because the heap
+check will reset the heap usage. Slop in the heap breaks LDV profiling
+(+RTS -hb) which needs to do a linear sweep through the nursery.
+
+
+Note [Inlining out-of-line primops and heap checks]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If shouldInlinePrimOp returns True when called from GHC.StgToCmm.Expr for the
+purpose of heap check placement, we *must* inline the primop later in
+GHC.StgToCmm.Prim. If we don't things will go wrong.
+-}
+
+-----------------
+maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
+maybeSaveCostCentre simple_scrut
+ | simple_scrut = return Nothing
+ | otherwise = saveCurrentCostCentre
+
+
+-----------------
+isSimpleScrut :: CgStgExpr -> AltType -> FCode Bool
+-- Simple scrutinee, does not block or allocate; hence safe to amalgamate
+-- heap usage from alternatives into the stuff before the case
+-- NB: if you get this wrong, and claim that the expression doesn't allocate
+-- when it does, you'll deeply mess up allocation
+isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
+isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
+isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
+isSimpleScrut _ _ = return False
+
+isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
+-- True iff the op cannot block or allocate
+isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
+-- dataToTag# evalautes its argument, see Note [dataToTag#] in primops.txt.pp
+isSimpleOp (StgPrimOp DataToTagOp) _ = return False
+isSimpleOp (StgPrimOp op) stg_args = do
+ arg_exprs <- getNonVoidArgAmodes stg_args
+ dflags <- getDynFlags
+ -- See Note [Inlining out-of-line primops and heap checks]
+ return $! isJust $ shouldInlinePrimOp dflags op arg_exprs
+isSimpleOp (StgPrimCallOp _) _ = return False
+
+-----------------
+chooseReturnBndrs :: Id -> AltType -> [CgStgAlt] -> [NonVoid Id]
+-- These are the binders of a case that are assigned by the evaluation of the
+-- scrutinee.
+-- They're non-void, see Note [Post-unarisation invariants] in UnariseStg.
+chooseReturnBndrs bndr (PrimAlt _) _alts
+ = assertNonVoidIds [bndr]
+
+chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _)]
+ = ASSERT2(ids `lengthIs` n, ppr n $$ ppr ids $$ ppr _bndr)
+ assertNonVoidIds ids -- 'bndr' is not assigned!
+
+chooseReturnBndrs bndr (AlgAlt _) _alts
+ = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
+
+chooseReturnBndrs bndr PolyAlt _alts
+ = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
+
+chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
+ -- MultiValAlt has only one alternative
+
+-------------------------------------
+cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [CgStgAlt]
+ -> FCode ReturnKind
+-- At this point the result of the case are in the binders
+cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)]
+ = maybeAltHeapCheck gc_plan (cgExpr rhs)
+
+cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs)]
+ = maybeAltHeapCheck gc_plan (cgExpr rhs)
+ -- Here bndrs are *already* in scope, so don't rebind them
+
+cgAlts gc_plan bndr (PrimAlt _) alts
+ = do { dflags <- getDynFlags
+
+ ; tagged_cmms <- cgAltRhss gc_plan bndr alts
+
+ ; let bndr_reg = CmmLocal (idToReg dflags bndr)
+ (DEFAULT,deflt) = head tagged_cmms
+ -- PrimAlts always have a DEFAULT case
+ -- and it always comes first
+
+ tagged_cmms' = [(lit,code)
+ | (LitAlt lit, code) <- tagged_cmms]
+ ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
+ ; return AssignedDirectly }
+
+cgAlts gc_plan bndr (AlgAlt tycon) alts
+ = do { dflags <- getDynFlags
+
+ ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
+
+ ; let fam_sz = tyConFamilySize tycon
+ bndr_reg = CmmLocal (idToReg dflags bndr)
+
+ -- Is the constructor tag in the node reg?
+ ; if isSmallFamily dflags fam_sz
+ then do
+ let -- Yes, bndr_reg has constr. tag in ls bits
+ tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
+ branches' = [(tag+1,branch) | (tag,branch) <- branches]
+ emitSwitch tag_expr branches' mb_deflt 1 fam_sz
+
+ else -- No, get tag from info table
+ let -- Note that ptr _always_ has tag 1
+ -- when the family size is big enough
+ untagged_ptr = cmmRegOffB bndr_reg (-1)
+ tag_expr = getConstrTag dflags (untagged_ptr)
+ in emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
+
+ ; return AssignedDirectly }
+
+cgAlts _ _ _ _ = panic "cgAlts"
+ -- UbxTupAlt and PolyAlt have only one alternative
+
+
+-- Note [alg-alt heap check]
+--
+-- In an algebraic case with more than one alternative, we will have
+-- code like
+--
+-- L0:
+-- x = R1
+-- goto L1
+-- L1:
+-- if (x & 7 >= 2) then goto L2 else goto L3
+-- L2:
+-- Hp = Hp + 16
+-- if (Hp > HpLim) then goto L4
+-- ...
+-- L4:
+-- call gc() returns to L5
+-- L5:
+-- x = R1
+-- goto L1
+
+-------------------
+cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
+ -> FCode ( Maybe CmmAGraphScoped
+ , [(ConTagZ, CmmAGraphScoped)] )
+cgAlgAltRhss gc_plan bndr alts
+ = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
+
+ ; let { mb_deflt = case tagged_cmms of
+ ((DEFAULT,rhs) : _) -> Just rhs
+ _other -> Nothing
+ -- DEFAULT is always first, if present
+
+ ; branches = [ (dataConTagZ con, cmm)
+ | (DataAlt con, cmm) <- tagged_cmms ]
+ }
+
+ ; return (mb_deflt, branches)
+ }
+
+
+-------------------
+cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
+ -> FCode [(AltCon, CmmAGraphScoped)]
+cgAltRhss gc_plan bndr alts = do
+ dflags <- getDynFlags
+ let
+ base_reg = idToReg dflags bndr
+ cg_alt :: CgStgAlt -> FCode (AltCon, CmmAGraphScoped)
+ cg_alt (con, bndrs, rhs)
+ = getCodeScoped $
+ maybeAltHeapCheck gc_plan $
+ do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs)
+ -- alt binders are always non-void,
+ -- see Note [Post-unarisation invariants] in UnariseStg
+ ; _ <- cgExpr rhs
+ ; return con }
+ forkAlts (map cg_alt alts)
+
+maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
+maybeAltHeapCheck (NoGcInAlts,_) code = code
+maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
+ altHeapCheck regs code
+maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
+ altHeapCheckReturnsTo regs lret off code
+
+-----------------------------------------------------------------------------
+-- Tail calls
+-----------------------------------------------------------------------------
+
+cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
+cgConApp con stg_args
+ | isUnboxedTupleCon con -- Unboxed tuple: assign and return
+ = do { arg_exprs <- getNonVoidArgAmodes stg_args
+ ; tickyUnboxedTupleReturn (length arg_exprs)
+ ; emitReturn arg_exprs }
+
+ | otherwise -- Boxed constructors; allocate and return
+ = ASSERT2( stg_args `lengthIs` countConRepArgs con, ppr con <> parens (ppr (countConRepArgs con)) <+> ppr stg_args )
+ do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
+ currentCCS con (assertNonVoidStgArgs stg_args)
+ -- con args are always non-void,
+ -- see Note [Post-unarisation invariants] in UnariseStg
+ -- The first "con" says that the name bound to this
+ -- closure is "con", which is a bit of a fudge, but
+ -- it only affects profiling (hence the False)
+
+ ; emit =<< fcode_init
+ ; tickyReturnNewCon (length stg_args)
+ ; emitReturn [idInfoToAmode idinfo] }
+
+cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
+cgIdApp fun_id args = do
+ dflags <- getDynFlags
+ fun_info <- getCgIdInfo fun_id
+ self_loop_info <- getSelfLoop
+ let fun_arg = StgVarArg fun_id
+ fun_name = idName fun_id
+ fun = idInfoToAmode fun_info
+ lf_info = cg_lf fun_info
+ n_args = length args
+ v_args = length $ filter (isVoidTy . stgArgType) args
+ node_points dflags = nodeMustPointToIt dflags lf_info
+ case getCallMethod dflags fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of
+ -- A value in WHNF, so we can just return it.
+ ReturnIt
+ | isVoidTy (idType fun_id) -> emitReturn []
+ | otherwise -> emitReturn [fun]
+ -- ToDo: does ReturnIt guarantee tagged?
+
+ EnterIt -> ASSERT( null args ) -- Discarding arguments
+ emitEnter fun
+
+ SlowCall -> do -- A slow function call via the RTS apply routines
+ { tickySlowCall lf_info args
+ ; emitComment $ mkFastString "slowCall"
+ ; slowCall fun args }
+
+ -- A direct function call (possibly with some left-over arguments)
+ DirectEntry lbl arity -> do
+ { tickyDirectCall arity args
+ ; if node_points dflags
+ then directCall NativeNodeCall lbl arity (fun_arg:args)
+ else directCall NativeDirectCall lbl arity args }
+
+ -- Let-no-escape call or self-recursive tail-call
+ JumpToIt blk_id lne_regs -> do
+ { adjustHpBackwards -- always do this before a tail-call
+ ; cmm_args <- getNonVoidArgAmodes args
+ ; emitMultiAssign lne_regs cmm_args
+ ; emit (mkBranch blk_id)
+ ; return AssignedDirectly }
+
+-- Note [Self-recursive tail calls]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Self-recursive tail calls can be optimized into a local jump in the same
+-- way as let-no-escape bindings (see Note [What is a non-escaping let] in
+-- stgSyn/CoreToStg.hs). Consider this:
+--
+-- foo.info:
+-- a = R1 // calling convention
+-- b = R2
+-- goto L1
+-- L1: ...
+-- ...
+-- ...
+-- L2: R1 = x
+-- R2 = y
+-- call foo(R1,R2)
+--
+-- Instead of putting x and y into registers (or other locations required by the
+-- calling convention) and performing a call we can put them into local
+-- variables a and b and perform jump to L1:
+--
+-- foo.info:
+-- a = R1
+-- b = R2
+-- goto L1
+-- L1: ...
+-- ...
+-- ...
+-- L2: a = x
+-- b = y
+-- goto L1
+--
+-- This can be done only when function is calling itself in a tail position
+-- and only if the call passes number of parameters equal to function's arity.
+-- Note that this cannot be performed if a function calls itself with a
+-- continuation.
+--
+-- This in fact implements optimization known as "loopification". It was
+-- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
+-- by Krzysztof WoÅ›, though we use different approach. Krzysztof performed his
+-- optimization at the Cmm level, whereas we perform ours during code generation
+-- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
+-- generated in the first place.
+--
+-- Implementation is spread across a couple of places in the code:
+--
+-- * FCode monad stores additional information in its reader environment
+-- (cgd_self_loop field). This information tells us which function can
+-- tail call itself in an optimized way (it is the function currently
+-- being compiled), what is the label of a loop header (L1 in example above)
+-- and information about local registers in which we should arguments
+-- before making a call (this would be a and b in example above).
+--
+-- * Whenever we are compiling a function, we set that information to reflect
+-- the fact that function currently being compiled can be jumped to, instead
+-- of called. This is done in closureCodyBody in GHC.StgToCmm.Bind.
+--
+-- * We also have to emit a label to which we will be jumping. We make sure
+-- that the label is placed after a stack check but before the heap
+-- check. The reason is that making a recursive tail-call does not increase
+-- the stack so we only need to check once. But it may grow the heap, so we
+-- have to repeat the heap check in every self-call. This is done in
+-- do_checks in GHC.StgToCmm.Heap.
+--
+-- * When we begin compilation of another closure we remove the additional
+-- information from the environment. This is done by forkClosureBody
+-- in GHC.StgToCmm.Monad. Other functions that duplicate the environment -
+-- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
+-- words, we only need to clean the environment of the self-loop information
+-- when compiling right hand side of a closure (binding).
+--
+-- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
+-- of call will be generated. getCallMethod decides to generate a self
+-- recursive tail call when (a) environment stores information about
+-- possible self tail-call; (b) that tail call is to a function currently
+-- being compiled; (c) number of passed non-void arguments is equal to
+-- function's arity. (d) loopification is turned on via -floopification
+-- command-line option.
+--
+-- * Command line option to turn loopification on and off is implemented in
+-- DynFlags.
+--
+--
+-- Note [Void arguments in self-recursive tail calls]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- State# tokens can get in the way of the loopification optimization as seen in
+-- #11372. Consider this:
+--
+-- foo :: [a]
+-- -> (a -> State# s -> (# State s, Bool #))
+-- -> State# s
+-- -> (# State# s, Maybe a #)
+-- foo [] f s = (# s, Nothing #)
+-- foo (x:xs) f s = case f x s of
+-- (# s', b #) -> case b of
+-- True -> (# s', Just x #)
+-- False -> foo xs f s'
+--
+-- We would like to compile the call to foo as a local jump instead of a call
+-- (see Note [Self-recursive tail calls]). However, the generated function has
+-- an arity of 2 while we apply it to 3 arguments, one of them being of void
+-- type. Thus, we mustn't count arguments of void type when checking whether
+-- we can turn a call into a self-recursive jump.
+--
+
+emitEnter :: CmmExpr -> FCode ReturnKind
+emitEnter fun = do
+ { dflags <- getDynFlags
+ ; adjustHpBackwards
+ ; sequel <- getSequel
+ ; updfr_off <- getUpdFrameOff
+ ; case sequel of
+ -- For a return, we have the option of generating a tag-test or
+ -- not. If the value is tagged, we can return directly, which
+ -- is quicker than entering the value. This is a code
+ -- size/speed trade-off: when optimising for speed rather than
+ -- size we could generate the tag test.
+ --
+ -- Right now, we do what the old codegen did, and omit the tag
+ -- test, just generating an enter.
+ Return -> do
+ { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
+ ; emit $ mkJump dflags NativeNodeCall entry
+ [cmmUntag dflags fun] updfr_off
+ ; return AssignedDirectly
+ }
+
+ -- The result will be scrutinised in the sequel. This is where
+ -- we generate a tag-test to avoid entering the closure if
+ -- possible.
+ --
+ -- The generated code will be something like this:
+ --
+ -- R1 = fun -- copyout
+ -- if (fun & 7 != 0) goto Lret else goto Lcall
+ -- Lcall:
+ -- call [fun] returns to Lret
+ -- Lret:
+ -- fun' = R1 -- copyin
+ -- ...
+ --
+ -- Note in particular that the label Lret is used as a
+ -- destination by both the tag-test and the call. This is
+ -- because Lret will necessarily be a proc-point, and we want to
+ -- ensure that we generate only one proc-point for this
+ -- sequence.
+ --
+ -- Furthermore, we tell the caller that we generated a native
+ -- return continuation by returning (ReturnedTo Lret off), so
+ -- that the continuation can be reused by the heap-check failure
+ -- code in the enclosing case expression.
+ --
+ AssignTo res_regs _ -> do
+ { lret <- newBlockId
+ ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
+ ; lcall <- newBlockId
+ ; updfr_off <- getUpdFrameOff
+ ; let area = Young lret
+ ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
+ [fun] updfr_off []
+ -- refer to fun via nodeReg after the copyout, to avoid having
+ -- both live simultaneously; this sometimes enables fun to be
+ -- inlined in the RHS of the R1 assignment.
+ ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
+ the_call = toCall entry (Just lret) updfr_off off outArgs regs
+ ; tscope <- getTickScope
+ ; emit $
+ copyout <*>
+ mkCbranch (cmmIsTagged dflags (CmmReg nodeReg))
+ lret lcall Nothing <*>
+ outOfLine lcall (the_call,tscope) <*>
+ mkLabel lret tscope <*>
+ copyin
+ ; return (ReturnedTo lret off)
+ }
+ }
+
+------------------------------------------------------------------------
+-- Ticks
+------------------------------------------------------------------------
+
+-- | Generate Cmm code for a tick. Depending on the type of Tickish,
+-- this will either generate actual Cmm instrumentation code, or
+-- simply pass on the annotation as a @CmmTickish@.
+cgTick :: Tickish Id -> FCode ()
+cgTick tick
+ = do { dflags <- getDynFlags
+ ; case tick of
+ ProfNote cc t p -> emitSetCCC cc t p
+ HpcTick m n -> emit (mkTickBox dflags m n)
+ SourceNote s n -> emitTick $ SourceNote s n
+ _other -> return () -- ignore
+ }
diff --git a/compiler/GHC/StgToCmm/ExtCode.hs b/compiler/GHC/StgToCmm/ExtCode.hs
new file mode 100644
index 0000000000..be2592edd3
--- /dev/null
+++ b/compiler/GHC/StgToCmm/ExtCode.hs
@@ -0,0 +1,252 @@
+{-# LANGUAGE DeriveFunctor #-}
+-- | Our extended FCode monad.
+
+-- We add a mapping from names to CmmExpr, to support local variable names in
+-- the concrete C-- code. The unique supply of the underlying FCode monad
+-- is used to grab a new unique for each local variable.
+
+-- In C--, a local variable can be declared anywhere within a proc,
+-- and it scopes from the beginning of the proc to the end. Hence, we have
+-- to collect declarations as we parse the proc, and feed the environment
+-- back in circularly (to avoid a two-pass algorithm).
+
+module GHC.StgToCmm.ExtCode (
+ CmmParse, unEC,
+ Named(..), Env,
+
+ loopDecls,
+ getEnv,
+
+ withName,
+ getName,
+
+ newLocal,
+ newLabel,
+ newBlockId,
+ newFunctionName,
+ newImport,
+ lookupLabel,
+ lookupName,
+
+ code,
+ emit, emitLabel, emitAssign, emitStore,
+ getCode, getCodeR, getCodeScoped,
+ emitOutOfLine,
+ withUpdFrameOff, getUpdFrameOff
+)
+
+where
+
+import GhcPrelude
+
+import qualified GHC.StgToCmm.Monad as F
+import GHC.StgToCmm.Monad (FCode, newUnique)
+
+import Cmm
+import CLabel
+import MkGraph
+
+import BlockId
+import DynFlags
+import FastString
+import Module
+import UniqFM
+import Unique
+import UniqSupply
+
+import Control.Monad (ap)
+
+-- | The environment contains variable definitions or blockids.
+data Named
+ = VarN CmmExpr -- ^ Holds CmmLit(CmmLabel ..) which gives the label type,
+ -- eg, RtsLabel, ForeignLabel, CmmLabel etc.
+
+ | FunN UnitId -- ^ A function name from this package
+ | LabelN BlockId -- ^ A blockid of some code or data.
+
+-- | An environment of named things.
+type Env = UniqFM Named
+
+-- | Local declarations that are in scope during code generation.
+type Decls = [(FastString,Named)]
+
+-- | Does a computation in the FCode monad, with a current environment
+-- and a list of local declarations. Returns the resulting list of declarations.
+newtype CmmParse a
+ = EC { unEC :: String -> Env -> Decls -> FCode (Decls, a) }
+ deriving (Functor)
+
+type ExtCode = CmmParse ()
+
+returnExtFC :: a -> CmmParse a
+returnExtFC a = EC $ \_ _ s -> return (s, a)
+
+thenExtFC :: CmmParse a -> (a -> CmmParse b) -> CmmParse b
+thenExtFC (EC m) k = EC $ \c e s -> do (s',r) <- m c e s; unEC (k r) c e s'
+
+instance Applicative CmmParse where
+ pure = returnExtFC
+ (<*>) = ap
+
+instance Monad CmmParse where
+ (>>=) = thenExtFC
+
+instance MonadUnique CmmParse where
+ getUniqueSupplyM = code getUniqueSupplyM
+ getUniqueM = EC $ \_ _ decls -> do
+ u <- getUniqueM
+ return (decls, u)
+
+instance HasDynFlags CmmParse where
+ getDynFlags = EC (\_ _ d -> do dflags <- getDynFlags
+ return (d, dflags))
+
+
+-- | Takes the variable decarations and imports from the monad
+-- and makes an environment, which is looped back into the computation.
+-- In this way, we can have embedded declarations that scope over the whole
+-- procedure, and imports that scope over the entire module.
+-- Discards the local declaration contained within decl'
+--
+loopDecls :: CmmParse a -> CmmParse a
+loopDecls (EC fcode) =
+ EC $ \c e globalDecls -> do
+ (_, a) <- F.fixC $ \ ~(decls, _) ->
+ fcode c (addListToUFM e decls) globalDecls
+ return (globalDecls, a)
+
+
+-- | Get the current environment from the monad.
+getEnv :: CmmParse Env
+getEnv = EC $ \_ e s -> return (s, e)
+
+-- | Get the current context name from the monad
+getName :: CmmParse String
+getName = EC $ \c _ s -> return (s, c)
+
+-- | Set context name for a sub-parse
+withName :: String -> CmmParse a -> CmmParse a
+withName c' (EC fcode) = EC $ \_ e s -> fcode c' e s
+
+addDecl :: FastString -> Named -> ExtCode
+addDecl name named = EC $ \_ _ s -> return ((name, named) : s, ())
+
+
+-- | Add a new variable to the list of local declarations.
+-- The CmmExpr says where the value is stored.
+addVarDecl :: FastString -> CmmExpr -> ExtCode
+addVarDecl var expr = addDecl var (VarN expr)
+
+-- | Add a new label to the list of local declarations.
+addLabel :: FastString -> BlockId -> ExtCode
+addLabel name block_id = addDecl name (LabelN block_id)
+
+
+-- | Create a fresh local variable of a given type.
+newLocal
+ :: CmmType -- ^ data type
+ -> FastString -- ^ name of variable
+ -> CmmParse LocalReg -- ^ register holding the value
+
+newLocal ty name = do
+ u <- code newUnique
+ let reg = LocalReg u ty
+ addVarDecl name (CmmReg (CmmLocal reg))
+ return reg
+
+
+-- | Allocate a fresh label.
+newLabel :: FastString -> CmmParse BlockId
+newLabel name = do
+ u <- code newUnique
+ addLabel name (mkBlockId u)
+ return (mkBlockId u)
+
+-- | Add add a local function to the environment.
+newFunctionName
+ :: FastString -- ^ name of the function
+ -> UnitId -- ^ package of the current module
+ -> ExtCode
+
+newFunctionName name pkg = addDecl name (FunN pkg)
+
+
+-- | Add an imported foreign label to the list of local declarations.
+-- If this is done at the start of the module the declaration will scope
+-- over the whole module.
+newImport
+ :: (FastString, CLabel)
+ -> CmmParse ()
+
+newImport (name, cmmLabel)
+ = addVarDecl name (CmmLit (CmmLabel cmmLabel))
+
+
+-- | Lookup the BlockId bound to the label with this name.
+-- If one hasn't been bound yet, create a fresh one based on the
+-- Unique of the name.
+lookupLabel :: FastString -> CmmParse BlockId
+lookupLabel name = do
+ env <- getEnv
+ return $
+ case lookupUFM env name of
+ Just (LabelN l) -> l
+ _other -> mkBlockId (newTagUnique (getUnique name) 'L')
+
+
+-- | Lookup the location of a named variable.
+-- Unknown names are treated as if they had been 'import'ed from the runtime system.
+-- This saves us a lot of bother in the RTS sources, at the expense of
+-- deferring some errors to link time.
+lookupName :: FastString -> CmmParse CmmExpr
+lookupName name = do
+ env <- getEnv
+ return $
+ case lookupUFM env name of
+ Just (VarN e) -> e
+ Just (FunN pkg) -> CmmLit (CmmLabel (mkCmmCodeLabel pkg name))
+ _other -> CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId name))
+
+
+-- | Lift an FCode computation into the CmmParse monad
+code :: FCode a -> CmmParse a
+code fc = EC $ \_ _ s -> do
+ r <- fc
+ return (s, r)
+
+emit :: CmmAGraph -> CmmParse ()
+emit = code . F.emit
+
+emitLabel :: BlockId -> CmmParse ()
+emitLabel = code . F.emitLabel
+
+emitAssign :: CmmReg -> CmmExpr -> CmmParse ()
+emitAssign l r = code (F.emitAssign l r)
+
+emitStore :: CmmExpr -> CmmExpr -> CmmParse ()
+emitStore l r = code (F.emitStore l r)
+
+getCode :: CmmParse a -> CmmParse CmmAGraph
+getCode (EC ec) = EC $ \c e s -> do
+ ((s',_), gr) <- F.getCodeR (ec c e s)
+ return (s', gr)
+
+getCodeR :: CmmParse a -> CmmParse (a, CmmAGraph)
+getCodeR (EC ec) = EC $ \c e s -> do
+ ((s', r), gr) <- F.getCodeR (ec c e s)
+ return (s', (r,gr))
+
+getCodeScoped :: CmmParse a -> CmmParse (a, CmmAGraphScoped)
+getCodeScoped (EC ec) = EC $ \c e s -> do
+ ((s', r), gr) <- F.getCodeScoped (ec c e s)
+ return (s', (r,gr))
+
+emitOutOfLine :: BlockId -> CmmAGraphScoped -> CmmParse ()
+emitOutOfLine l g = code (F.emitOutOfLine l g)
+
+withUpdFrameOff :: UpdFrameOffset -> CmmParse () -> CmmParse ()
+withUpdFrameOff size inner
+ = EC $ \c e s -> F.withUpdFrameOff size $ (unEC inner) c e s
+
+getUpdFrameOff :: CmmParse UpdFrameOffset
+getUpdFrameOff = code $ F.getUpdFrameOff
diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs
new file mode 100644
index 0000000000..dacaff41ba
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Foreign.hs
@@ -0,0 +1,627 @@
+-----------------------------------------------------------------------------
+--
+-- Code generation for foreign calls.
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.Foreign (
+ cgForeignCall,
+ emitPrimCall, emitCCall,
+ emitForeignCall, -- For CmmParse
+ emitSaveThreadState,
+ saveThreadState,
+ emitLoadThreadState,
+ loadThreadState,
+ emitOpenNursery,
+ emitCloseNursery,
+ ) where
+
+import GhcPrelude hiding( succ, (<*>) )
+
+import StgSyn
+import GHC.StgToCmm.Prof (storeCurCCS, ccsType)
+import GHC.StgToCmm.Env
+import GHC.StgToCmm.Monad
+import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Closure
+import GHC.StgToCmm.Layout
+
+import BlockId (newBlockId)
+import Cmm
+import CmmUtils
+import MkGraph
+import Type
+import RepType
+import CLabel
+import SMRep
+import ForeignCall
+import DynFlags
+import Maybes
+import Outputable
+import UniqSupply
+import BasicTypes
+
+import TyCoRep
+import TysPrim
+import Util (zipEqual)
+
+import Control.Monad
+
+-----------------------------------------------------------------------------
+-- Code generation for Foreign Calls
+-----------------------------------------------------------------------------
+
+-- | Emit code for a foreign call, and return the results to the sequel.
+-- Precondition: the length of the arguments list is the same as the
+-- arity of the foreign function.
+cgForeignCall :: ForeignCall -- the op
+ -> Type -- type of foreign function
+ -> [StgArg] -- x,y arguments
+ -> Type -- result type
+ -> FCode ReturnKind
+
+cgForeignCall (CCall (CCallSpec target cconv safety)) typ stg_args res_ty
+ = do { dflags <- getDynFlags
+ ; let -- in the stdcall calling convention, the symbol needs @size appended
+ -- to it, where size is the total number of bytes of arguments. We
+ -- attach this info to the CLabel here, and the CLabel pretty printer
+ -- will generate the suffix when the label is printed.
+ call_size args
+ | StdCallConv <- cconv = Just (sum (map arg_size args))
+ | otherwise = Nothing
+
+ -- ToDo: this might not be correct for 64-bit API
+ arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType dflags arg)
+ (wORD_SIZE dflags)
+ ; cmm_args <- getFCallArgs stg_args typ
+ ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
+ ; let ((call_args, arg_hints), cmm_target)
+ = case target of
+ StaticTarget _ _ _ False ->
+ panic "cgForeignCall: unexpected FFI value import"
+ StaticTarget _ lbl mPkgId True
+ -> let labelSource
+ = case mPkgId of
+ Nothing -> ForeignLabelInThisPackage
+ Just pkgId -> ForeignLabelInPackage pkgId
+ size = call_size cmm_args
+ in ( unzip cmm_args
+ , CmmLit (CmmLabel
+ (mkForeignLabel lbl size labelSource IsFunction)))
+
+ DynamicTarget -> case cmm_args of
+ (fn,_):rest -> (unzip rest, fn)
+ [] -> panic "cgForeignCall []"
+ fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn
+ call_target = ForeignTarget cmm_target fc
+
+ -- we want to emit code for the call, and then emitReturn.
+ -- However, if the sequel is AssignTo, we shortcut a little
+ -- and generate a foreign call that assigns the results
+ -- directly. Otherwise we end up generating a bunch of
+ -- useless "r = r" assignments, which are not merely annoying:
+ -- they prevent the common block elimination from working correctly
+ -- in the case of a safe foreign call.
+ -- See Note [safe foreign call convention]
+ --
+ ; sequel <- getSequel
+ ; case sequel of
+ AssignTo assign_to_these _ ->
+ emitForeignCall safety assign_to_these call_target call_args
+
+ _something_else ->
+ do { _ <- emitForeignCall safety res_regs call_target call_args
+ ; emitReturn (map (CmmReg . CmmLocal) res_regs)
+ }
+ }
+
+{- Note [safe foreign call convention]
+
+The simple thing to do for a safe foreign call would be the same as an
+unsafe one: just
+
+ emitForeignCall ...
+ emitReturn ...
+
+but consider what happens in this case
+
+ case foo x y z of
+ (# s, r #) -> ...
+
+The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
+as the result reg, and we generate
+
+ r = foo(x,y,z) returns to L1 -- emitForeignCall
+ L1:
+ r = r -- emitReturn
+ goto L2
+L2:
+ ...
+
+Now L1 is a proc point (by definition, it is the continuation of the
+safe foreign call). If L2 does a heap check, then L2 will also be a
+proc point.
+
+Furthermore, the stack layout algorithm has to arrange to save r
+somewhere between the call and the jump to L1, which is annoying: we
+would have to treat r differently from the other live variables, which
+have to be saved *before* the call.
+
+So we adopt a special convention for safe foreign calls: the results
+are copied out according to the NativeReturn convention by the call,
+and the continuation of the call should copyIn the results. (The
+copyOut code is actually inserted when the safe foreign call is
+lowered later). The result regs attached to the safe foreign call are
+only used temporarily to hold the results before they are copied out.
+
+We will now generate this:
+
+ r = foo(x,y,z) returns to L1
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ goto L2
+ L2:
+ ... r ...
+
+And when the safe foreign call is lowered later (see Note [lower safe
+foreign calls]) we get this:
+
+ suspendThread()
+ r = foo(x,y,z)
+ resumeThread()
+ R1 = r -- copyOut, inserted by lowerSafeForeignCall
+ jump L1
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ goto L2
+ L2:
+ ... r ...
+
+Now consider what happens if L2 does a heap check: the Adams
+optimisation kicks in and commons up L1 with the heap-check
+continuation, resulting in just one proc point instead of two. Yay!
+-}
+
+
+emitCCall :: [(CmmFormal,ForeignHint)]
+ -> CmmExpr
+ -> [(CmmActual,ForeignHint)]
+ -> FCode ()
+emitCCall hinted_results fn hinted_args
+ = void $ emitForeignCall PlayRisky results target args
+ where
+ (args, arg_hints) = unzip hinted_args
+ (results, result_hints) = unzip hinted_results
+ target = ForeignTarget fn fc
+ fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn
+
+
+emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
+emitPrimCall res op args
+ = void $ emitForeignCall PlayRisky res (PrimTarget op) args
+
+-- alternative entry point, used by CmmParse
+emitForeignCall
+ :: Safety
+ -> [CmmFormal] -- where to put the results
+ -> ForeignTarget -- the op
+ -> [CmmActual] -- arguments
+ -> FCode ReturnKind
+emitForeignCall safety results target args
+ | not (playSafe safety) = do
+ dflags <- getDynFlags
+ let (caller_save, caller_load) = callerSaveVolatileRegs dflags
+ emit caller_save
+ target' <- load_target_into_temp target
+ args' <- mapM maybe_assign_temp args
+ emit $ mkUnsafeCall target' results args'
+ emit caller_load
+ return AssignedDirectly
+
+ | otherwise = do
+ dflags <- getDynFlags
+ updfr_off <- getUpdFrameOff
+ target' <- load_target_into_temp target
+ args' <- mapM maybe_assign_temp args
+ k <- newBlockId
+ let (off, _, copyout) = copyInOflow dflags NativeReturn (Young k) results []
+ -- see Note [safe foreign call convention]
+ tscope <- getTickScope
+ emit $
+ ( mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth dflags)))
+ (CmmLit (CmmBlock k))
+ <*> mkLast (CmmForeignCall { tgt = target'
+ , res = results
+ , args = args'
+ , succ = k
+ , ret_args = off
+ , ret_off = updfr_off
+ , intrbl = playInterruptible safety })
+ <*> mkLabel k tscope
+ <*> copyout
+ )
+ return (ReturnedTo k off)
+
+load_target_into_temp :: ForeignTarget -> FCode ForeignTarget
+load_target_into_temp (ForeignTarget expr conv) = do
+ tmp <- maybe_assign_temp expr
+ return (ForeignTarget tmp conv)
+load_target_into_temp other_target@(PrimTarget _) =
+ return other_target
+
+-- What we want to do here is create a new temporary for the foreign
+-- call argument if it is not safe to use the expression directly,
+-- because the expression mentions caller-saves GlobalRegs (see
+-- Note [Register Parameter Passing]).
+--
+-- However, we can't pattern-match on the expression here, because
+-- this is used in a loop by CmmParse, and testing the expression
+-- results in a black hole. So we always create a temporary, and rely
+-- on CmmSink to clean it up later. (Yuck, ToDo). The generated code
+-- ends up being the same, at least for the RTS .cmm code.
+--
+maybe_assign_temp :: CmmExpr -> FCode CmmExpr
+maybe_assign_temp e = do
+ dflags <- getDynFlags
+ reg <- newTemp (cmmExprType dflags e)
+ emitAssign (CmmLocal reg) e
+ return (CmmReg (CmmLocal reg))
+
+-- -----------------------------------------------------------------------------
+-- Save/restore the thread state in the TSO
+
+-- This stuff can't be done in suspendThread/resumeThread, because it
+-- refers to global registers which aren't available in the C world.
+
+emitSaveThreadState :: FCode ()
+emitSaveThreadState = do
+ dflags <- getDynFlags
+ code <- saveThreadState dflags
+ emit code
+
+-- | Produce code to save the current thread state to @CurrentTSO@
+saveThreadState :: MonadUnique m => DynFlags -> m CmmAGraph
+saveThreadState dflags = do
+ tso <- newTemp (gcWord dflags)
+ close_nursery <- closeNursery dflags tso
+ pure $ catAGraphs [
+ -- tso = CurrentTSO;
+ mkAssign (CmmLocal tso) currentTSOExpr,
+ -- tso->stackobj->sp = Sp;
+ mkStore (cmmOffset dflags
+ (CmmLoad (cmmOffset dflags
+ (CmmReg (CmmLocal tso))
+ (tso_stackobj dflags))
+ (bWord dflags))
+ (stack_SP dflags))
+ spExpr,
+ close_nursery,
+ -- and save the current cost centre stack in the TSO when profiling:
+ if gopt Opt_SccProfilingOn dflags then
+ mkStore (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) cccsExpr
+ else mkNop
+ ]
+
+emitCloseNursery :: FCode ()
+emitCloseNursery = do
+ dflags <- getDynFlags
+ tso <- newTemp (bWord dflags)
+ code <- closeNursery dflags tso
+ emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code
+
+{- |
+@closeNursery dflags tso@ produces code to close the nursery.
+A local register holding the value of @CurrentTSO@ is expected for
+efficiency.
+
+Closing the nursery corresponds to the following code:
+
+@
+ tso = CurrentTSO;
+ cn = CurrentNuresry;
+
+ // Update the allocation limit for the current thread. We don't
+ // check to see whether it has overflowed at this point, that check is
+ // made when we run out of space in the current heap block (stg_gc_noregs)
+ // and in the scheduler when context switching (schedulePostRunThread).
+ tso->alloc_limit -= Hp + WDS(1) - cn->start;
+
+ // Set cn->free to the next unoccupied word in the block
+ cn->free = Hp + WDS(1);
+@
+-}
+closeNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph
+closeNursery df tso = do
+ let tsoreg = CmmLocal tso
+ cnreg <- CmmLocal <$> newTemp (bWord df)
+ pure $ catAGraphs [
+ mkAssign cnreg currentNurseryExpr,
+
+ -- CurrentNursery->free = Hp+1;
+ mkStore (nursery_bdescr_free df cnreg) (cmmOffsetW df hpExpr 1),
+
+ let alloc =
+ CmmMachOp (mo_wordSub df)
+ [ cmmOffsetW df hpExpr 1
+ , CmmLoad (nursery_bdescr_start df cnreg) (bWord df)
+ ]
+
+ alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
+ in
+
+ -- tso->alloc_limit += alloc
+ mkStore alloc_limit (CmmMachOp (MO_Sub W64)
+ [ CmmLoad alloc_limit b64
+ , CmmMachOp (mo_WordTo64 df) [alloc] ])
+ ]
+
+emitLoadThreadState :: FCode ()
+emitLoadThreadState = do
+ dflags <- getDynFlags
+ code <- loadThreadState dflags
+ emit code
+
+-- | Produce code to load the current thread state from @CurrentTSO@
+loadThreadState :: MonadUnique m => DynFlags -> m CmmAGraph
+loadThreadState dflags = do
+ tso <- newTemp (gcWord dflags)
+ stack <- newTemp (gcWord dflags)
+ open_nursery <- openNursery dflags tso
+ pure $ catAGraphs [
+ -- tso = CurrentTSO;
+ mkAssign (CmmLocal tso) currentTSOExpr,
+ -- stack = tso->stackobj;
+ mkAssign (CmmLocal stack) (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_stackobj dflags)) (bWord dflags)),
+ -- Sp = stack->sp;
+ mkAssign spReg (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_SP dflags)) (bWord dflags)),
+ -- SpLim = stack->stack + RESERVED_STACK_WORDS;
+ mkAssign spLimReg (cmmOffsetW dflags (cmmOffset dflags (CmmReg (CmmLocal stack)) (stack_STACK dflags))
+ (rESERVED_STACK_WORDS dflags)),
+ -- HpAlloc = 0;
+ -- HpAlloc is assumed to be set to non-zero only by a failed
+ -- a heap check, see HeapStackCheck.cmm:GC_GENERIC
+ mkAssign hpAllocReg (zeroExpr dflags),
+ open_nursery,
+ -- and load the current cost centre stack from the TSO when profiling:
+ if gopt Opt_SccProfilingOn dflags
+ then storeCurCCS
+ (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso))
+ (tso_CCCS dflags)) (ccsType dflags))
+ else mkNop
+ ]
+
+
+emitOpenNursery :: FCode ()
+emitOpenNursery = do
+ dflags <- getDynFlags
+ tso <- newTemp (bWord dflags)
+ code <- openNursery dflags tso
+ emit $ mkAssign (CmmLocal tso) currentTSOExpr <*> code
+
+{- |
+@openNursery dflags tso@ produces code to open the nursery. A local register
+holding the value of @CurrentTSO@ is expected for efficiency.
+
+Opening the nursery corresponds to the following code:
+
+@
+ tso = CurrentTSO;
+ cn = CurrentNursery;
+ bdfree = CurrentNursery->free;
+ bdstart = CurrentNursery->start;
+
+ // We *add* the currently occupied portion of the nursery block to
+ // the allocation limit, because we will subtract it again in
+ // closeNursery.
+ tso->alloc_limit += bdfree - bdstart;
+
+ // Set Hp to the last occupied word of the heap block. Why not the
+ // next unocupied word? Doing it this way means that we get to use
+ // an offset of zero more often, which might lead to slightly smaller
+ // code on some architectures.
+ Hp = bdfree - WDS(1);
+
+ // Set HpLim to the end of the current nursery block (note that this block
+ // might be a block group, consisting of several adjacent blocks.
+ HpLim = bdstart + CurrentNursery->blocks*BLOCK_SIZE_W - 1;
+@
+-}
+openNursery :: MonadUnique m => DynFlags -> LocalReg -> m CmmAGraph
+openNursery df tso = do
+ let tsoreg = CmmLocal tso
+ cnreg <- CmmLocal <$> newTemp (bWord df)
+ bdfreereg <- CmmLocal <$> newTemp (bWord df)
+ bdstartreg <- CmmLocal <$> newTemp (bWord df)
+
+ -- These assignments are carefully ordered to reduce register
+ -- pressure and generate not completely awful code on x86. To see
+ -- what code we generate, look at the assembly for
+ -- stg_returnToStackTop in rts/StgStartup.cmm.
+ pure $ catAGraphs [
+ mkAssign cnreg currentNurseryExpr,
+ mkAssign bdfreereg (CmmLoad (nursery_bdescr_free df cnreg) (bWord df)),
+
+ -- Hp = CurrentNursery->free - 1;
+ mkAssign hpReg (cmmOffsetW df (CmmReg bdfreereg) (-1)),
+
+ mkAssign bdstartreg (CmmLoad (nursery_bdescr_start df cnreg) (bWord df)),
+
+ -- HpLim = CurrentNursery->start +
+ -- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
+ mkAssign hpLimReg
+ (cmmOffsetExpr df
+ (CmmReg bdstartreg)
+ (cmmOffset df
+ (CmmMachOp (mo_wordMul df) [
+ CmmMachOp (MO_SS_Conv W32 (wordWidth df))
+ [CmmLoad (nursery_bdescr_blocks df cnreg) b32],
+ mkIntExpr df (bLOCK_SIZE df)
+ ])
+ (-1)
+ )
+ ),
+
+ -- alloc = bd->free - bd->start
+ let alloc =
+ CmmMachOp (mo_wordSub df) [CmmReg bdfreereg, CmmReg bdstartreg]
+
+ alloc_limit = cmmOffset df (CmmReg tsoreg) (tso_alloc_limit df)
+ in
+
+ -- tso->alloc_limit += alloc
+ mkStore alloc_limit (CmmMachOp (MO_Add W64)
+ [ CmmLoad alloc_limit b64
+ , CmmMachOp (mo_WordTo64 df) [alloc] ])
+
+ ]
+
+nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks
+ :: DynFlags -> CmmReg -> CmmExpr
+nursery_bdescr_free dflags cn =
+ cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_free dflags)
+nursery_bdescr_start dflags cn =
+ cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_start dflags)
+nursery_bdescr_blocks dflags cn =
+ cmmOffset dflags (CmmReg cn) (oFFSET_bdescr_blocks dflags)
+
+tso_stackobj, tso_CCCS, tso_alloc_limit, stack_STACK, stack_SP :: DynFlags -> ByteOff
+tso_stackobj dflags = closureField dflags (oFFSET_StgTSO_stackobj dflags)
+tso_alloc_limit dflags = closureField dflags (oFFSET_StgTSO_alloc_limit dflags)
+tso_CCCS dflags = closureField dflags (oFFSET_StgTSO_cccs dflags)
+stack_STACK dflags = closureField dflags (oFFSET_StgStack_stack dflags)
+stack_SP dflags = closureField dflags (oFFSET_StgStack_sp dflags)
+
+
+closureField :: DynFlags -> ByteOff -> ByteOff
+closureField dflags off = off + fixedHdrSize dflags
+
+-- Note [Unlifted boxed arguments to foreign calls]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- For certain types passed to foreign calls, we adjust the actual
+-- value passed to the call. For ByteArray#, Array#, SmallArray#,
+-- and ArrayArray#, we pass the address of the array's payload, not
+-- the address of the heap object. For example, consider
+-- foreign import "c_foo" foo :: ByteArray# -> Int# -> IO ()
+-- At a Haskell call like `foo x y`, we'll generate a C call that
+-- is more like
+-- c_foo( x+8, y )
+-- where the "+8" takes the heap pointer (x :: ByteArray#) and moves
+-- it past the header words of the ByteArray object to point directly
+-- to the data inside the ByteArray#. (The exact offset depends
+-- on the target architecture and on profiling) By contrast, (y :: Int#)
+-- requires no such adjustment.
+--
+-- This adjustment is performed by 'add_shim'. The size of the
+-- adjustment depends on the type of heap object. But
+-- how can we determine that type? There are two available options.
+-- We could use the types of the actual values that the foreign call
+-- has been applied to, or we could use the types present in the
+-- foreign function's type. Prior to GHC 8.10, we used the former
+-- strategy since it's a little more simple. However, in issue #16650
+-- and more compellingly in the comments of
+-- https://gitlab.haskell.org/ghc/ghc/merge_requests/939, it was
+-- demonstrated that this leads to bad behavior in the presence
+-- of unsafeCoerce#. Returning to the above example, suppose the
+-- Haskell call looked like
+-- foo (unsafeCoerce# p)
+-- where the types of expressions comprising the arguments are
+-- p :: (Any :: TYPE 'UnliftedRep)
+-- i :: Int#
+-- so that the unsafe-coerce is between Any and ByteArray#.
+-- These two types have the same kind (they are both represented by
+-- a heap pointer) so no GC errors will occur if we do this unsafe coerce.
+-- By the time this gets to the code generator the cast has been
+-- discarded so we have
+-- foo p y
+-- But we *must* adjust the pointer to p by a ByteArray# shim,
+-- *not* by an Any shim (the Any shim involves no offset at all).
+--
+-- To avoid this bad behavior, we adopt the second strategy: use
+-- the types present in the foreign function's type.
+-- In collectStgFArgTypes, we convert the foreign function's
+-- type to a list of StgFArgType. Then, in add_shim, we interpret
+-- these as numeric offsets.
+
+getFCallArgs ::
+ [StgArg]
+ -> Type -- the type of the foreign function
+ -> FCode [(CmmExpr, ForeignHint)]
+-- (a) Drop void args
+-- (b) Add foreign-call shim code
+-- It's (b) that makes this differ from getNonVoidArgAmodes
+-- Precondition: args and typs have the same length
+-- See Note [Unlifted boxed arguments to foreign calls]
+getFCallArgs args typ
+ = do { mb_cmms <- mapM get (zipEqual "getFCallArgs" args (collectStgFArgTypes typ))
+ ; return (catMaybes mb_cmms) }
+ where
+ get (arg,typ)
+ | null arg_reps
+ = return Nothing
+ | otherwise
+ = do { cmm <- getArgAmode (NonVoid arg)
+ ; dflags <- getDynFlags
+ ; return (Just (add_shim dflags typ cmm, hint)) }
+ where
+ arg_ty = stgArgType arg
+ arg_reps = typePrimRep arg_ty
+ hint = typeForeignHint arg_ty
+
+-- The minimum amount of information needed to determine
+-- the offset to apply to an argument to a foreign call.
+-- See Note [Unlifted boxed arguments to foreign calls]
+data StgFArgType
+ = StgPlainType
+ | StgArrayType
+ | StgSmallArrayType
+ | StgByteArrayType
+
+-- See Note [Unlifted boxed arguments to foreign calls]
+add_shim :: DynFlags -> StgFArgType -> CmmExpr -> CmmExpr
+add_shim dflags ty expr = case ty of
+ StgPlainType -> expr
+ StgArrayType -> cmmOffsetB dflags expr (arrPtrsHdrSize dflags)
+ StgSmallArrayType -> cmmOffsetB dflags expr (smallArrPtrsHdrSize dflags)
+ StgByteArrayType -> cmmOffsetB dflags expr (arrWordsHdrSize dflags)
+
+-- From a function, extract information needed to determine
+-- the offset of each argument when used as a C FFI argument.
+-- See Note [Unlifted boxed arguments to foreign calls]
+collectStgFArgTypes :: Type -> [StgFArgType]
+collectStgFArgTypes = go []
+ where
+ -- Skip foralls
+ go bs (ForAllTy _ res) = go bs res
+ go bs (AppTy{}) = reverse bs
+ go bs (TyConApp{}) = reverse bs
+ go bs (LitTy{}) = reverse bs
+ go bs (TyVarTy{}) = reverse bs
+ go _ (CastTy{}) = panic "myCollectTypeArgs: CastTy"
+ go _ (CoercionTy{}) = panic "myCollectTypeArgs: CoercionTy"
+ go bs (FunTy {ft_arg = arg, ft_res=res}) =
+ go (typeToStgFArgType arg:bs) res
+
+-- Choose the offset based on the type. For anything other
+-- than an unlifted boxed type, there is no offset.
+-- See Note [Unlifted boxed arguments to foreign calls]
+typeToStgFArgType :: Type -> StgFArgType
+typeToStgFArgType typ
+ | tycon == arrayPrimTyCon = StgArrayType
+ | tycon == mutableArrayPrimTyCon = StgArrayType
+ | tycon == arrayArrayPrimTyCon = StgArrayType
+ | tycon == mutableArrayArrayPrimTyCon = StgArrayType
+ | tycon == smallArrayPrimTyCon = StgSmallArrayType
+ | tycon == smallMutableArrayPrimTyCon = StgSmallArrayType
+ | tycon == byteArrayPrimTyCon = StgByteArrayType
+ | tycon == mutableByteArrayPrimTyCon = StgByteArrayType
+ | otherwise = StgPlainType
+ where
+ -- Should be a tycon app, since this is a foreign call. We look
+ -- through newtypes so the offset does not change if a user replaces
+ -- a type in a foreign function signature with a representationally
+ -- equivalent newtype.
+ tycon = tyConAppTyCon (unwrapType typ)
+
diff --git a/compiler/GHC/StgToCmm/Heap.hs b/compiler/GHC/StgToCmm/Heap.hs
new file mode 100644
index 0000000000..a1f016c13c
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Heap.hs
@@ -0,0 +1,680 @@
+-----------------------------------------------------------------------------
+--
+-- Stg to C--: heap management functions
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.Heap (
+ getVirtHp, setVirtHp, setRealHp,
+ getHpRelOffset,
+
+ entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
+ heapStackCheckGen,
+ entryHeapCheck',
+
+ mkStaticClosureFields, mkStaticClosure,
+
+ allocDynClosure, allocDynClosureCmm, allocHeapClosure,
+ emitSetDynHdr
+ ) where
+
+import GhcPrelude hiding ((<*>))
+
+import StgSyn
+import CLabel
+import GHC.StgToCmm.Layout
+import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Monad
+import GHC.StgToCmm.Prof (profDynAlloc, dynProfHdr, staticProfHdr)
+import GHC.StgToCmm.Ticky
+import GHC.StgToCmm.Closure
+import GHC.StgToCmm.Env
+
+import MkGraph
+
+import Hoopl.Label
+import SMRep
+import BlockId
+import Cmm
+import CmmUtils
+import CostCentre
+import IdInfo( CafInfo(..), mayHaveCafRefs )
+import Id ( Id )
+import Module
+import DynFlags
+import FastString( mkFastString, fsLit )
+import Panic( sorry )
+
+import Control.Monad (when)
+import Data.Maybe (isJust)
+
+-----------------------------------------------------------
+-- Initialise dynamic heap objects
+-----------------------------------------------------------
+
+allocDynClosure
+ :: Maybe Id
+ -> CmmInfoTable
+ -> LambdaFormInfo
+ -> CmmExpr -- Cost Centre to stick in the object
+ -> CmmExpr -- Cost Centre to blame for this alloc
+ -- (usually the same; sometimes "OVERHEAD")
+
+ -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object
+ -- ie Info ptr has offset zero.
+ -- No void args in here
+ -> FCode CmmExpr -- returns Hp+n
+
+allocDynClosureCmm
+ :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
+ -> [(CmmExpr, ByteOff)]
+ -> FCode CmmExpr -- returns Hp+n
+
+-- allocDynClosure allocates the thing in the heap,
+-- and modifies the virtual Hp to account for this.
+-- The second return value is the graph that sets the value of the
+-- returned LocalReg, which should point to the closure after executing
+-- the graph.
+
+-- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
+-- only valid until Hp is changed. The caller should assign the
+-- result to a LocalReg if it is required to remain live.
+--
+-- The reason we don't assign it to a LocalReg here is that the caller
+-- is often about to call regIdInfo, which immediately assigns the
+-- result of allocDynClosure to a new temp in order to add the tag.
+-- So by not generating a LocalReg here we avoid a common source of
+-- new temporaries and save some compile time. This can be quite
+-- significant - see test T4801.
+
+
+allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
+ let (args, offsets) = unzip args_w_offsets
+ cmm_args <- mapM getArgAmode args -- No void args
+ allocDynClosureCmm mb_id info_tbl lf_info
+ use_cc _blame_cc (zip cmm_args offsets)
+
+
+allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
+ -- SAY WHAT WE ARE ABOUT TO DO
+ let rep = cit_rep info_tbl
+ tickyDynAlloc mb_id rep lf_info
+ let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
+ allocHeapClosure rep info_ptr use_cc amodes_w_offsets
+
+
+-- | Low-level heap object allocation.
+allocHeapClosure
+ :: SMRep -- ^ representation of the object
+ -> CmmExpr -- ^ info pointer
+ -> CmmExpr -- ^ cost centre
+ -> [(CmmExpr,ByteOff)] -- ^ payload
+ -> FCode CmmExpr -- ^ returns the address of the object
+allocHeapClosure rep info_ptr use_cc payload = do
+ profDynAlloc rep use_cc
+
+ virt_hp <- getVirtHp
+
+ -- Find the offset of the info-ptr word
+ let info_offset = virt_hp + 1
+ -- info_offset is the VirtualHpOffset of the first
+ -- word of the new object
+ -- Remember, virtHp points to last allocated word,
+ -- ie 1 *before* the info-ptr word of new object.
+
+ base <- getHpRelOffset info_offset
+ emitComment $ mkFastString "allocHeapClosure"
+ emitSetDynHdr base info_ptr use_cc
+
+ -- Fill in the fields
+ hpStore base payload
+
+ -- Bump the virtual heap pointer
+ dflags <- getDynFlags
+ setVirtHp (virt_hp + heapClosureSizeW dflags rep)
+
+ return base
+
+
+emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
+emitSetDynHdr base info_ptr ccs
+ = do dflags <- getDynFlags
+ hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..])
+ where
+ header :: DynFlags -> [CmmExpr]
+ header dflags = [info_ptr] ++ dynProfHdr dflags ccs
+ -- ToDo: Parallel stuff
+ -- No ticky header
+
+-- Store the item (expr,off) in base[off]
+hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
+hpStore base vals = do
+ dflags <- getDynFlags
+ sequence_ $
+ [ emitStore (cmmOffsetB dflags base off) val | (val,off) <- vals ]
+
+-----------------------------------------------------------
+-- Layout of static closures
+-----------------------------------------------------------
+
+-- Make a static closure, adding on any extra padding needed for CAFs,
+-- and adding a static link field if necessary.
+
+mkStaticClosureFields
+ :: DynFlags
+ -> CmmInfoTable
+ -> CostCentreStack
+ -> CafInfo
+ -> [CmmLit] -- Payload
+ -> [CmmLit] -- The full closure
+mkStaticClosureFields dflags info_tbl ccs caf_refs payload
+ = mkStaticClosure dflags info_lbl ccs payload padding
+ static_link_field saved_info_field
+ where
+ info_lbl = cit_lbl info_tbl
+
+ -- CAFs must have consistent layout, regardless of whether they
+ -- are actually updatable or not. The layout of a CAF is:
+ --
+ -- 3 saved_info
+ -- 2 static_link
+ -- 1 indirectee
+ -- 0 info ptr
+ --
+ -- the static_link and saved_info fields must always be in the
+ -- same place. So we use isThunkRep rather than closureUpdReqd
+ -- here:
+
+ is_caf = isThunkRep (cit_rep info_tbl)
+
+ padding
+ | is_caf && null payload = [mkIntCLit dflags 0]
+ | otherwise = []
+
+ static_link_field
+ | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
+ = [static_link_value]
+ | otherwise
+ = []
+
+ saved_info_field
+ | is_caf = [mkIntCLit dflags 0]
+ | otherwise = []
+
+ -- For a static constructor which has NoCafRefs, we set the
+ -- static link field to a non-zero value so the garbage
+ -- collector will ignore it.
+ static_link_value
+ | mayHaveCafRefs caf_refs = mkIntCLit dflags 0
+ | otherwise = mkIntCLit dflags 3 -- No CAF refs
+ -- See Note [STATIC_LINK fields]
+ -- in rts/sm/Storage.h
+
+mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
+ -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
+mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
+ = [CmmLabel info_lbl]
+ ++ staticProfHdr dflags ccs
+ ++ payload
+ ++ padding
+ ++ static_link_field
+ ++ saved_info_field
+
+-----------------------------------------------------------
+-- Heap overflow checking
+-----------------------------------------------------------
+
+{- Note [Heap checks]
+ ~~~~~~~~~~~~~~~~~~
+Heap checks come in various forms. We provide the following entry
+points to the runtime system, all of which use the native C-- entry
+convention.
+
+ * gc() performs garbage collection and returns
+ nothing to its caller
+
+ * A series of canned entry points like
+ r = gc_1p( r )
+ where r is a pointer. This performs gc, and
+ then returns its argument r to its caller.
+
+ * A series of canned entry points like
+ gcfun_2p( f, x, y )
+ where f is a function closure of arity 2
+ This performs garbage collection, keeping alive the
+ three argument ptrs, and then tail-calls f(x,y)
+
+These are used in the following circumstances
+
+* entryHeapCheck: Function entry
+ (a) With a canned GC entry sequence
+ f( f_clo, x:ptr, y:ptr ) {
+ Hp = Hp+8
+ if Hp > HpLim goto L
+ ...
+ L: HpAlloc = 8
+ jump gcfun_2p( f_clo, x, y ) }
+ Note the tail call to the garbage collector;
+ it should do no register shuffling
+
+ (b) No canned sequence
+ f( f_clo, x:ptr, y:ptr, ...etc... ) {
+ T: Hp = Hp+8
+ if Hp > HpLim goto L
+ ...
+ L: HpAlloc = 8
+ call gc() -- Needs an info table
+ goto T }
+
+* altHeapCheck: Immediately following an eval
+ Started as
+ case f x y of r { (p,q) -> rhs }
+ (a) With a canned sequence for the results of f
+ (which is the very common case since
+ all boxed cases return just one pointer
+ ...
+ r = f( x, y )
+ K: -- K needs an info table
+ Hp = Hp+8
+ if Hp > HpLim goto L
+ ...code for rhs...
+
+ L: r = gc_1p( r )
+ goto K }
+
+ Here, the info table needed by the call
+ to gc_1p should be the *same* as the
+ one for the call to f; the C-- optimiser
+ spots this sharing opportunity)
+
+ (b) No canned sequence for results of f
+ Note second info table
+ ...
+ (r1,r2,r3) = call f( x, y )
+ K:
+ Hp = Hp+8
+ if Hp > HpLim goto L
+ ...code for rhs...
+
+ L: call gc() -- Extra info table here
+ goto K
+
+* generalHeapCheck: Anywhere else
+ e.g. entry to thunk
+ case branch *not* following eval,
+ or let-no-escape
+ Exactly the same as the previous case:
+
+ K: -- K needs an info table
+ Hp = Hp+8
+ if Hp > HpLim goto L
+ ...
+
+ L: call gc()
+ goto K
+-}
+
+--------------------------------------------------------------
+-- A heap/stack check at a function or thunk entry point.
+
+entryHeapCheck :: ClosureInfo
+ -> Maybe LocalReg -- Function (closure environment)
+ -> Int -- Arity -- not same as len args b/c of voids
+ -> [LocalReg] -- Non-void args (empty for thunk)
+ -> FCode ()
+ -> FCode ()
+
+entryHeapCheck cl_info nodeSet arity args code
+ = entryHeapCheck' is_fastf node arity args code
+ where
+ node = case nodeSet of
+ Just r -> CmmReg (CmmLocal r)
+ Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)
+
+ is_fastf = case closureFunInfo cl_info of
+ Just (_, ArgGen _) -> False
+ _otherwise -> True
+
+-- | lower-level version for CmmParse
+entryHeapCheck' :: Bool -- is a known function pattern
+ -> CmmExpr -- expression for the closure pointer
+ -> Int -- Arity -- not same as len args b/c of voids
+ -> [LocalReg] -- Non-void args (empty for thunk)
+ -> FCode ()
+ -> FCode ()
+entryHeapCheck' is_fastf node arity args code
+ = do dflags <- getDynFlags
+ let is_thunk = arity == 0
+
+ args' = map (CmmReg . CmmLocal) args
+ stg_gc_fun = CmmReg (CmmGlobal GCFun)
+ stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
+
+ {- Thunks: jump stg_gc_enter_1
+
+ Function (fast): call (NativeNode) stg_gc_fun(fun, args)
+
+ Function (slow): call (slow) stg_gc_fun(fun, args)
+ -}
+ gc_call upd
+ | is_thunk
+ = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd
+
+ | is_fastf
+ = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd
+
+ | otherwise
+ = mkJump dflags Slow stg_gc_fun (node : args') upd
+
+ updfr_sz <- getUpdFrameOff
+
+ loop_id <- newBlockId
+ emitLabel loop_id
+ heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
+
+-- ------------------------------------------------------------
+-- A heap/stack check in a case alternative
+
+
+-- If there are multiple alts and we need to GC, but don't have a
+-- continuation already (the scrut was simple), then we should
+-- pre-generate the continuation. (if there are multiple alts it is
+-- always a canned GC point).
+
+-- altHeapCheck:
+-- If we have a return continuation,
+-- then if it is a canned GC pattern,
+-- then we do mkJumpReturnsTo
+-- else we do a normal call to stg_gc_noregs
+-- else if it is a canned GC pattern,
+-- then generate the continuation and do mkCallReturnsTo
+-- else we do a normal call to stg_gc_noregs
+
+altHeapCheck :: [LocalReg] -> FCode a -> FCode a
+altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code
+
+altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
+altOrNoEscapeHeapCheck checkYield regs code = do
+ dflags <- getDynFlags
+ case cannedGCEntryPoint dflags regs of
+ Nothing -> genericGC checkYield code
+ Just gc -> do
+ lret <- newBlockId
+ let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
+ lcont <- newBlockId
+ tscope <- getTickScope
+ emitOutOfLine lret (copyin <*> mkBranch lcont, tscope)
+ emitLabel lcont
+ cannedGCReturnsTo checkYield False gc regs lret off code
+
+altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
+altHeapCheckReturnsTo regs lret off code
+ = do dflags <- getDynFlags
+ case cannedGCEntryPoint dflags regs of
+ Nothing -> genericGC False code
+ Just gc -> cannedGCReturnsTo False True gc regs lret off code
+
+-- noEscapeHeapCheck is implemented identically to altHeapCheck (which
+-- is more efficient), but cannot be optimized away in the non-allocating
+-- case because it may occur in a loop
+noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
+noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
+
+cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
+ -> FCode a
+ -> FCode a
+cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
+ = do dflags <- getDynFlags
+ updfr_sz <- getUpdFrameOff
+ heapCheck False checkYield (gc_call dflags gc updfr_sz) code
+ where
+ reg_exprs = map (CmmReg . CmmLocal) regs
+ -- Note [stg_gc arguments]
+
+ -- NB. we use the NativeReturn convention for passing arguments
+ -- to the canned heap-check routines, because we are in a case
+ -- alternative and hence the [LocalReg] was passed to us in the
+ -- NativeReturn convention.
+ gc_call dflags label sp
+ | cont_on_stack
+ = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
+ | otherwise
+ = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
+
+genericGC :: Bool -> FCode a -> FCode a
+genericGC checkYield code
+ = do updfr_sz <- getUpdFrameOff
+ lretry <- newBlockId
+ emitLabel lretry
+ call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
+ heapCheck False checkYield (call <*> mkBranch lretry) code
+
+cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
+cannedGCEntryPoint dflags regs
+ = case map localRegType regs of
+ [] -> Just (mkGcLabel "stg_gc_noregs")
+ [ty]
+ | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
+ | isFloatType ty -> case width of
+ W32 -> Just (mkGcLabel "stg_gc_f1")
+ W64 -> Just (mkGcLabel "stg_gc_d1")
+ _ -> Nothing
+
+ | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
+ | width == W64 -> Just (mkGcLabel "stg_gc_l1")
+ | otherwise -> Nothing
+ where
+ width = typeWidth ty
+ [ty1,ty2]
+ | isGcPtrType ty1
+ && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
+ [ty1,ty2,ty3]
+ | isGcPtrType ty1
+ && isGcPtrType ty2
+ && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
+ [ty1,ty2,ty3,ty4]
+ | isGcPtrType ty1
+ && isGcPtrType ty2
+ && isGcPtrType ty3
+ && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
+ _otherwise -> Nothing
+
+-- Note [stg_gc arguments]
+-- It might seem that we could avoid passing the arguments to the
+-- stg_gc function, because they are already in the right registers.
+-- While this is usually the case, it isn't always. Sometimes the
+-- code generator has cleverly avoided the eval in a case, e.g. in
+-- ffi/should_run/4221.hs we found
+--
+-- case a_r1mb of z
+-- FunPtr x y -> ...
+--
+-- where a_r1mb is bound a top-level constructor, and is known to be
+-- evaluated. The codegen just assigns x, y and z, and continues;
+-- R1 is never assigned.
+--
+-- So we'll have to rely on optimisations to eliminatethese
+-- assignments where possible.
+
+
+-- | The generic GC procedure; no params, no results
+generic_gc :: CmmExpr
+generic_gc = mkGcLabel "stg_gc_noregs"
+
+-- | Create a CLabel for calling a garbage collector entry point
+mkGcLabel :: String -> CmmExpr
+mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit s)))
+
+-------------------------------
+heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
+heapCheck checkStack checkYield do_gc code
+ = getHeapUsage $ \ hpHw ->
+ -- Emit heap checks, but be sure to do it lazily so
+ -- that the conditionals on hpHw don't cause a black hole
+ do { dflags <- getDynFlags
+ ; let mb_alloc_bytes
+ | hpHw > mBLOCK_SIZE = sorry $ unlines
+ [" Trying to allocate more than "++show mBLOCK_SIZE++" bytes.",
+ "",
+ "This is currently not possible due to a limitation of GHC's code generator.",
+ "See https://gitlab.haskell.org/ghc/ghc/issues/4505 for details.",
+ "Suggestion: read data from a file instead of having large static data",
+ "structures in code."]
+ | hpHw > 0 = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
+ | otherwise = Nothing
+ where mBLOCK_SIZE = bLOCKS_PER_MBLOCK dflags * bLOCK_SIZE_W dflags
+ stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
+ | otherwise = Nothing
+ ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
+ ; tickyAllocHeap True hpHw
+ ; setRealHp hpHw
+ ; code }
+
+heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
+heapStackCheckGen stk_hwm mb_bytes
+ = do updfr_sz <- getUpdFrameOff
+ lretry <- newBlockId
+ emitLabel lretry
+ call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
+ do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
+
+-- Note [Single stack check]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~
+-- When compiling a function we can determine how much stack space it
+-- will use. We therefore need to perform only a single stack check at
+-- the beginning of a function to see if we have enough stack space.
+--
+-- The check boils down to comparing Sp-N with SpLim, where N is the
+-- amount of stack space needed (see Note [Stack usage] below). *BUT*
+-- at this stage of the pipeline we are not supposed to refer to Sp
+-- itself, because the stack is not yet manifest, so we don't quite
+-- know where Sp pointing.
+
+-- So instead of referring directly to Sp - as we used to do in the
+-- past - the code generator uses (old + 0) in the stack check. That
+-- is the address of the first word of the old area, so if we add N
+-- we'll get the address of highest used word.
+--
+-- This makes the check robust. For example, while we need to perform
+-- only one stack check for each function, we could in theory place
+-- more stack checks later in the function. They would be redundant,
+-- but not incorrect (in a sense that they should not change program
+-- behaviour). We need to make sure however that a stack check
+-- inserted after incrementing the stack pointer checks for a
+-- respectively smaller stack space. This would not be the case if the
+-- code generator produced direct references to Sp. By referencing
+-- (old + 0) we make sure that we always check for a correct amount of
+-- stack: when converting (old + 0) to Sp the stack layout phase takes
+-- into account changes already made to stack pointer. The idea for
+-- this change came from observations made while debugging #8275.
+
+-- Note [Stack usage]
+-- ~~~~~~~~~~~~~~~~~~
+-- At the moment we convert from STG to Cmm we don't know N, the
+-- number of bytes of stack that the function will use, so we use a
+-- special late-bound CmmLit, namely
+-- CmmHighStackMark
+-- to stand for the number of bytes needed. When the stack is made
+-- manifest, the number of bytes needed is calculated, and used to
+-- replace occurrences of CmmHighStackMark
+--
+-- The (Maybe CmmExpr) passed to do_checks is usually
+-- Just (CmmLit CmmHighStackMark)
+-- but can also (in certain hand-written RTS functions)
+-- Just (CmmLit 8) or some other fixed valuet
+-- If it is Nothing, we don't generate a stack check at all.
+
+do_checks :: Maybe CmmExpr -- Should we check the stack?
+ -- See Note [Stack usage]
+ -> Bool -- Should we check for preemption?
+ -> Maybe CmmExpr -- Heap headroom (bytes)
+ -> CmmAGraph -- What to do on failure
+ -> FCode ()
+do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
+ dflags <- getDynFlags
+ gc_id <- newBlockId
+
+ let
+ Just alloc_lit = mb_alloc_lit
+
+ bump_hp = cmmOffsetExprB dflags hpExpr alloc_lit
+
+ -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
+ -- At the beginning of a function old + 0 = Sp
+ -- See Note [Single stack check]
+ sp_oflo sp_hwm =
+ CmmMachOp (mo_wordULt dflags)
+ [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
+ [CmmStackSlot Old 0, sp_hwm],
+ CmmReg spLimReg]
+
+ -- Hp overflow if (Hp > HpLim)
+ -- (Hp has been incremented by now)
+ -- HpLim points to the LAST WORD of valid allocation space.
+ hp_oflo = CmmMachOp (mo_wordUGt dflags) [hpExpr, hpLimExpr]
+
+ alloc_n = mkAssign hpAllocReg alloc_lit
+
+ case mb_stk_hwm of
+ Nothing -> return ()
+ Just stk_hwm -> tickyStackCheck
+ >> (emit =<< mkCmmIfGoto' (sp_oflo stk_hwm) gc_id (Just False) )
+
+ -- Emit new label that might potentially be a header
+ -- of a self-recursive tail call.
+ -- See Note [Self-recursive loop header].
+ self_loop_info <- getSelfLoop
+ case self_loop_info of
+ Just (_, loop_header_id, _)
+ | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
+ _otherwise -> return ()
+
+ if (isJust mb_alloc_lit)
+ then do
+ tickyHeapCheck
+ emitAssign hpReg bump_hp
+ emit =<< mkCmmIfThen' hp_oflo (alloc_n <*> mkBranch gc_id) (Just False)
+ else do
+ when (checkYield && not (gopt Opt_OmitYields dflags)) $ do
+ -- Yielding if HpLim == 0
+ let yielding = CmmMachOp (mo_wordEq dflags)
+ [CmmReg hpLimReg,
+ CmmLit (zeroCLit dflags)]
+ emit =<< mkCmmIfGoto' yielding gc_id (Just False)
+
+ tscope <- getTickScope
+ emitOutOfLine gc_id
+ (do_gc, tscope) -- this is expected to jump back somewhere
+
+ -- Test for stack pointer exhaustion, then
+ -- bump heap pointer, and test for heap exhaustion
+ -- Note that we don't move the heap pointer unless the
+ -- stack check succeeds. Otherwise we might end up
+ -- with slop at the end of the current block, which can
+ -- confuse the LDV profiler.
+
+-- Note [Self-recursive loop header]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Self-recursive loop header is required by loopification optimization (See
+-- Note [Self-recursive tail calls] in GHC.StgToCmm.Expr). We emit it if:
+--
+-- 1. There is information about self-loop in the FCode environment. We don't
+-- check the binder (first component of the self_loop_info) because we are
+-- certain that if the self-loop info is present then we are compiling the
+-- binder body. Reason: the only possible way to get here with the
+-- self_loop_info present is from closureCodeBody.
+--
+-- 2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
+-- to preempt the heap check (see #367 for motivation behind this check). It
+-- is True for heap checks placed at the entry to a function and
+-- let-no-escape heap checks but false for other heap checks (eg. in case
+-- alternatives or created from hand-written high-level Cmm). The second
+-- check (isJust mb_stk_hwm) is true for heap checks at the entry to a
+-- function and some heap checks created in hand-written Cmm. Otherwise it
+-- is Nothing. In other words the only situation when both conditions are
+-- true is when compiling stack and heap checks at the entry to a
+-- function. This is the only situation when we want to emit a self-loop
+-- label.
diff --git a/compiler/GHC/StgToCmm/Hpc.hs b/compiler/GHC/StgToCmm/Hpc.hs
new file mode 100644
index 0000000000..e33d39245c
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Hpc.hs
@@ -0,0 +1,48 @@
+-----------------------------------------------------------------------------
+--
+-- Code generation for coverage
+--
+-- (c) Galois Connections, Inc. 2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.Hpc ( initHpc, mkTickBox ) where
+
+import GhcPrelude
+
+import GHC.StgToCmm.Monad
+
+import MkGraph
+import CmmExpr
+import CLabel
+import Module
+import CmmUtils
+import GHC.StgToCmm.Utils
+import HscTypes
+import DynFlags
+
+import Control.Monad
+
+mkTickBox :: DynFlags -> Module -> Int -> CmmAGraph
+mkTickBox dflags mod n
+ = mkStore tick_box (CmmMachOp (MO_Add W64)
+ [ CmmLoad tick_box b64
+ , CmmLit (CmmInt 1 W64)
+ ])
+ where
+ tick_box = cmmIndex dflags W64
+ (CmmLit $ CmmLabel $ mkHpcTicksLabel $ mod)
+ n
+
+initHpc :: Module -> HpcInfo -> FCode ()
+-- Emit top-level tables for HPC and return code to initialise
+initHpc _ (NoHpcInfo {})
+ = return ()
+initHpc this_mod (HpcInfo tickCount _hashNo)
+ = do dflags <- getDynFlags
+ when (gopt Opt_Hpc dflags) $
+ do emitDataLits (mkHpcTicksLabel this_mod)
+ [ (CmmInt 0 W64)
+ | _ <- take tickCount [0 :: Int ..]
+ ]
+
diff --git a/compiler/GHC/StgToCmm/Layout.hs b/compiler/GHC/StgToCmm/Layout.hs
new file mode 100644
index 0000000000..f4834376ed
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Layout.hs
@@ -0,0 +1,623 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE CPP #-}
+
+-----------------------------------------------------------------------------
+--
+-- Building info tables.
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.Layout (
+ mkArgDescr,
+ emitCall, emitReturn, adjustHpBackwards,
+
+ emitClosureProcAndInfoTable,
+ emitClosureAndInfoTable,
+
+ slowCall, directCall,
+
+ FieldOffOrPadding(..),
+ ClosureHeader(..),
+ mkVirtHeapOffsets,
+ mkVirtHeapOffsetsWithPadding,
+ mkVirtConstrOffsets,
+ mkVirtConstrSizes,
+ getHpRelOffset,
+
+ ArgRep(..), toArgRep, argRepSizeW -- re-exported from GHC.StgToCmm.ArgRep
+ ) where
+
+
+#include "HsVersions.h"
+
+import GhcPrelude hiding ((<*>))
+
+import GHC.StgToCmm.Closure
+import GHC.StgToCmm.Env
+import GHC.StgToCmm.ArgRep -- notably: ( slowCallPattern )
+import GHC.StgToCmm.Ticky
+import GHC.StgToCmm.Monad
+import GHC.StgToCmm.Utils
+
+import MkGraph
+import SMRep
+import BlockId
+import Cmm
+import CmmUtils
+import CmmInfo
+import CLabel
+import StgSyn
+import Id
+import TyCon ( PrimRep(..), primRepSizeB )
+import BasicTypes ( RepArity )
+import DynFlags
+import Module
+
+import Util
+import Data.List
+import Outputable
+import FastString
+import Control.Monad
+
+------------------------------------------------------------------------
+-- Call and return sequences
+------------------------------------------------------------------------
+
+-- | Return multiple values to the sequel
+--
+-- If the sequel is @Return@
+--
+-- > return (x,y)
+--
+-- If the sequel is @AssignTo [p,q]@
+--
+-- > p=x; q=y;
+--
+emitReturn :: [CmmExpr] -> FCode ReturnKind
+emitReturn results
+ = do { dflags <- getDynFlags
+ ; sequel <- getSequel
+ ; updfr_off <- getUpdFrameOff
+ ; case sequel of
+ Return ->
+ do { adjustHpBackwards
+ ; let e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags)
+ ; emit (mkReturn dflags (entryCode dflags e) results updfr_off)
+ }
+ AssignTo regs adjust ->
+ do { when adjust adjustHpBackwards
+ ; emitMultiAssign regs results }
+ ; return AssignedDirectly
+ }
+
+
+-- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
+-- using the call/return convention @conv@, passing @args@, and
+-- returning the results to the current sequel.
+--
+emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ReturnKind
+emitCall convs fun args
+ = emitCallWithExtraStack convs fun args noExtraStack
+
+
+-- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
+-- entry-code of @fun@, using the call/return convention @conv@,
+-- passing @args@, pushing some extra stack frames described by
+-- @stack@, and returning the results to the current sequel.
+--
+emitCallWithExtraStack
+ :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
+ -> [CmmExpr] -> FCode ReturnKind
+emitCallWithExtraStack (callConv, retConv) fun args extra_stack
+ = do { dflags <- getDynFlags
+ ; adjustHpBackwards
+ ; sequel <- getSequel
+ ; updfr_off <- getUpdFrameOff
+ ; case sequel of
+ Return -> do
+ emit $ mkJumpExtra dflags callConv fun args updfr_off extra_stack
+ return AssignedDirectly
+ AssignTo res_regs _ -> do
+ k <- newBlockId
+ let area = Young k
+ (off, _, copyin) = copyInOflow dflags retConv area res_regs []
+ copyout = mkCallReturnsTo dflags fun callConv args k off updfr_off
+ extra_stack
+ tscope <- getTickScope
+ emit (copyout <*> mkLabel k tscope <*> copyin)
+ return (ReturnedTo k off)
+ }
+
+
+adjustHpBackwards :: FCode ()
+-- This function adjusts the heap pointer just before a tail call or
+-- return. At a call or return, the virtual heap pointer may be less
+-- than the real Hp, because the latter was advanced to deal with
+-- the worst-case branch of the code, and we may be in a better-case
+-- branch. In that case, move the real Hp *back* and retract some
+-- ticky allocation count.
+--
+-- It *does not* deal with high-water-mark adjustment. That's done by
+-- functions which allocate heap.
+adjustHpBackwards
+ = do { hp_usg <- getHpUsage
+ ; let rHp = realHp hp_usg
+ vHp = virtHp hp_usg
+ adjust_words = vHp -rHp
+ ; new_hp <- getHpRelOffset vHp
+
+ ; emit (if adjust_words == 0
+ then mkNop
+ else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp
+
+ ; tickyAllocHeap False adjust_words -- ...ditto
+
+ ; setRealHp vHp
+ }
+
+
+-------------------------------------------------------------------------
+-- Making calls: directCall and slowCall
+-------------------------------------------------------------------------
+
+-- General plan is:
+-- - we'll make *one* fast call, either to the function itself
+-- (directCall) or to stg_ap_<pat>_fast (slowCall)
+-- Any left-over arguments will be pushed on the stack,
+--
+-- e.g. Sp[old+8] = arg1
+-- Sp[old+16] = arg2
+-- Sp[old+32] = stg_ap_pp_info
+-- R2 = arg3
+-- R3 = arg4
+-- call f() return to Nothing updfr_off: 32
+
+
+directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind
+-- (directCall f n args)
+-- calls f(arg1, ..., argn), and applies the result to the remaining args
+-- The function f has arity n, and there are guaranteed at least n args
+-- Both arity and args include void args
+directCall conv lbl arity stg_args
+ = do { argreps <- getArgRepsAmodes stg_args
+ ; direct_call "directCall" conv lbl arity argreps }
+
+
+slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind
+-- (slowCall fun args) applies fun to args, returning the results to Sequel
+slowCall fun stg_args
+ = do dflags <- getDynFlags
+ argsreps <- getArgRepsAmodes stg_args
+ let (rts_fun, arity) = slowCallPattern (map fst argsreps)
+
+ (r, slow_code) <- getCodeR $ do
+ r <- direct_call "slow_call" NativeNodeCall
+ (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps)
+ emitComment $ mkFastString ("slow_call for " ++
+ showSDoc dflags (ppr fun) ++
+ " with pat " ++ unpackFS rts_fun)
+ return r
+
+ -- Note [avoid intermediate PAPs]
+ let n_args = length stg_args
+ if n_args > arity && optLevel dflags >= 2
+ then do
+ funv <- (CmmReg . CmmLocal) `fmap` assignTemp fun
+ fun_iptr <- (CmmReg . CmmLocal) `fmap`
+ assignTemp (closureInfoPtr dflags (cmmUntag dflags funv))
+
+ -- ToDo: we could do slightly better here by reusing the
+ -- continuation from the slow call, which we have in r.
+ -- Also we'd like to push the continuation on the stack
+ -- before the branch, so that we only get one copy of the
+ -- code that saves all the live variables across the
+ -- call, but that might need some improvements to the
+ -- special case in the stack layout code to handle this
+ -- (see Note [diamond proc point]).
+
+ fast_code <- getCode $
+ emitCall (NativeNodeCall, NativeReturn)
+ (entryCode dflags fun_iptr)
+ (nonVArgs ((P,Just funv):argsreps))
+
+ slow_lbl <- newBlockId
+ fast_lbl <- newBlockId
+ is_tagged_lbl <- newBlockId
+ end_lbl <- newBlockId
+
+ let correct_arity = cmmEqWord dflags (funInfoArity dflags fun_iptr)
+ (mkIntExpr dflags n_args)
+
+ tscope <- getTickScope
+ emit (mkCbranch (cmmIsTagged dflags funv)
+ is_tagged_lbl slow_lbl (Just True)
+ <*> mkLabel is_tagged_lbl tscope
+ <*> mkCbranch correct_arity fast_lbl slow_lbl (Just True)
+ <*> mkLabel fast_lbl tscope
+ <*> fast_code
+ <*> mkBranch end_lbl
+ <*> mkLabel slow_lbl tscope
+ <*> slow_code
+ <*> mkLabel end_lbl tscope)
+ return r
+
+ else do
+ emit slow_code
+ return r
+
+
+-- Note [avoid intermediate PAPs]
+--
+-- A slow call which needs multiple generic apply patterns will be
+-- almost guaranteed to create one or more intermediate PAPs when
+-- applied to a function that takes the correct number of arguments.
+-- We try to avoid this situation by generating code to test whether
+-- we are calling a function with the correct number of arguments
+-- first, i.e.:
+--
+-- if (TAG(f) != 0} { // f is not a thunk
+-- if (f->info.arity == n) {
+-- ... make a fast call to f ...
+-- }
+-- }
+-- ... otherwise make the slow call ...
+--
+-- We *only* do this when the call requires multiple generic apply
+-- functions, which requires pushing extra stack frames and probably
+-- results in intermediate PAPs. (I say probably, because it might be
+-- that we're over-applying a function, but that seems even less
+-- likely).
+--
+-- This very rarely applies, but if it does happen in an inner loop it
+-- can have a severe impact on performance (#6084).
+
+
+--------------
+direct_call :: String
+ -> Convention -- e.g. NativeNodeCall or NativeDirectCall
+ -> CLabel -> RepArity
+ -> [(ArgRep,Maybe CmmExpr)] -> FCode ReturnKind
+direct_call caller call_conv lbl arity args
+ | debugIsOn && args `lengthLessThan` real_arity -- Too few args
+ = do -- Caller should ensure that there enough args!
+ pprPanic "direct_call" $
+ text caller <+> ppr arity <+>
+ ppr lbl <+> ppr (length args) <+>
+ ppr (map snd args) <+> ppr (map fst args)
+
+ | null rest_args -- Precisely the right number of arguments
+ = emitCall (call_conv, NativeReturn) target (nonVArgs args)
+
+ | otherwise -- Note [over-saturated calls]
+ = do dflags <- getDynFlags
+ emitCallWithExtraStack (call_conv, NativeReturn)
+ target
+ (nonVArgs fast_args)
+ (nonVArgs (stack_args dflags))
+ where
+ target = CmmLit (CmmLabel lbl)
+ (fast_args, rest_args) = splitAt real_arity args
+ stack_args dflags = slowArgs dflags rest_args
+ real_arity = case call_conv of
+ NativeNodeCall -> arity+1
+ _ -> arity
+
+
+-- When constructing calls, it is easier to keep the ArgReps and the
+-- CmmExprs zipped together. However, a void argument has no
+-- representation, so we need to use Maybe CmmExpr (the alternative of
+-- using zeroCLit or even undefined would work, but would be ugly).
+--
+getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
+getArgRepsAmodes = mapM getArgRepAmode
+ where getArgRepAmode arg
+ | V <- rep = return (V, Nothing)
+ | otherwise = do expr <- getArgAmode (NonVoid arg)
+ return (rep, Just expr)
+ where rep = toArgRep (argPrimRep arg)
+
+nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
+nonVArgs [] = []
+nonVArgs ((_,Nothing) : args) = nonVArgs args
+nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
+
+{-
+Note [over-saturated calls]
+
+The natural thing to do for an over-saturated call would be to call
+the function with the correct number of arguments, and then apply the
+remaining arguments to the value returned, e.g.
+
+ f a b c d (where f has arity 2)
+ -->
+ r = call f(a,b)
+ call r(c,d)
+
+but this entails
+ - saving c and d on the stack
+ - making a continuation info table
+ - at the continuation, loading c and d off the stack into regs
+ - finally, call r
+
+Note that since there are a fixed number of different r's
+(e.g. stg_ap_pp_fast), we can also pre-compile continuations
+that correspond to each of them, rather than generating a fresh
+one for each over-saturated call.
+
+Not only does this generate much less code, it is faster too. We will
+generate something like:
+
+Sp[old+16] = c
+Sp[old+24] = d
+Sp[old+32] = stg_ap_pp_info
+call f(a,b) -- usual calling convention
+
+For the purposes of the CmmCall node, we count this extra stack as
+just more arguments that we are passing on the stack (cml_args).
+-}
+
+-- | 'slowArgs' takes a list of function arguments and prepares them for
+-- pushing on the stack for "extra" arguments to a function which requires
+-- fewer arguments than we currently have.
+slowArgs :: DynFlags -> [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
+slowArgs _ [] = []
+slowArgs dflags args -- careful: reps contains voids (V), but args does not
+ | gopt Opt_SccProfilingOn dflags
+ = save_cccs ++ this_pat ++ slowArgs dflags rest_args
+ | otherwise = this_pat ++ slowArgs dflags rest_args
+ where
+ (arg_pat, n) = slowCallPattern (map fst args)
+ (call_args, rest_args) = splitAt n args
+
+ stg_ap_pat = mkCmmRetInfoLabel rtsUnitId arg_pat
+ this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args
+ save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just cccsExpr)]
+ save_cccs_lbl = mkCmmRetInfoLabel rtsUnitId (fsLit "stg_restore_cccs")
+
+-------------------------------------------------------------------------
+---- Laying out objects on the heap and stack
+-------------------------------------------------------------------------
+
+-- The heap always grows upwards, so hpRel is easy to compute
+hpRel :: VirtualHpOffset -- virtual offset of Hp
+ -> VirtualHpOffset -- virtual offset of The Thing
+ -> WordOff -- integer word offset
+hpRel hp off = off - hp
+
+getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
+-- See Note [Virtual and real heap pointers] in GHC.StgToCmm.Monad
+getHpRelOffset virtual_offset
+ = do dflags <- getDynFlags
+ hp_usg <- getHpUsage
+ return (cmmRegOffW dflags hpReg (hpRel (realHp hp_usg) virtual_offset))
+
+data FieldOffOrPadding a
+ = FieldOff (NonVoid a) -- Something that needs an offset.
+ ByteOff -- Offset in bytes.
+ | Padding ByteOff -- Length of padding in bytes.
+ ByteOff -- Offset in bytes.
+
+-- | Used to tell the various @mkVirtHeapOffsets@ functions what kind
+-- of header the object has. This will be accounted for in the
+-- offsets of the fields returned.
+data ClosureHeader
+ = NoHeader
+ | StdHeader
+ | ThunkHeader
+
+mkVirtHeapOffsetsWithPadding
+ :: DynFlags
+ -> ClosureHeader -- What kind of header to account for
+ -> [NonVoid (PrimRep, a)] -- Things to make offsets for
+ -> ( WordOff -- Total number of words allocated
+ , WordOff -- Number of words allocated for *pointers*
+ , [FieldOffOrPadding a] -- Either an offset or padding.
+ )
+
+-- Things with their offsets from start of object in order of
+-- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER
+-- First in list gets lowest offset, which is initial offset + 1.
+--
+-- mkVirtHeapOffsetsWithPadding always returns boxed things with smaller offsets
+-- than the unboxed things
+
+mkVirtHeapOffsetsWithPadding dflags header things =
+ ASSERT(not (any (isVoidRep . fst . fromNonVoid) things))
+ ( tot_wds
+ , bytesToWordsRoundUp dflags bytes_of_ptrs
+ , concat (ptrs_w_offsets ++ non_ptrs_w_offsets) ++ final_pad
+ )
+ where
+ hdr_words = case header of
+ NoHeader -> 0
+ StdHeader -> fixedHdrSizeW dflags
+ ThunkHeader -> thunkHdrSize dflags
+ hdr_bytes = wordsToBytes dflags hdr_words
+
+ (ptrs, non_ptrs) = partition (isGcPtrRep . fst . fromNonVoid) things
+
+ (bytes_of_ptrs, ptrs_w_offsets) =
+ mapAccumL computeOffset 0 ptrs
+ (tot_bytes, non_ptrs_w_offsets) =
+ mapAccumL computeOffset bytes_of_ptrs non_ptrs
+
+ tot_wds = bytesToWordsRoundUp dflags tot_bytes
+
+ final_pad_size = tot_wds * word_size - tot_bytes
+ final_pad
+ | final_pad_size > 0 = [(Padding final_pad_size
+ (hdr_bytes + tot_bytes))]
+ | otherwise = []
+
+ word_size = wORD_SIZE dflags
+
+ computeOffset bytes_so_far nv_thing =
+ (new_bytes_so_far, with_padding field_off)
+ where
+ (rep, thing) = fromNonVoid nv_thing
+
+ -- Size of the field in bytes.
+ !sizeB = primRepSizeB dflags rep
+
+ -- Align the start offset (eg, 2-byte value should be 2-byte aligned).
+ -- But not more than to a word.
+ !align = min word_size sizeB
+ !start = roundUpTo bytes_so_far align
+ !padding = start - bytes_so_far
+
+ -- Final offset is:
+ -- size of header + bytes_so_far + padding
+ !final_offset = hdr_bytes + bytes_so_far + padding
+ !new_bytes_so_far = start + sizeB
+ field_off = FieldOff (NonVoid thing) final_offset
+
+ with_padding field_off
+ | padding == 0 = [field_off]
+ | otherwise = [ Padding padding (hdr_bytes + bytes_so_far)
+ , field_off
+ ]
+
+
+mkVirtHeapOffsets
+ :: DynFlags
+ -> ClosureHeader -- What kind of header to account for
+ -> [NonVoid (PrimRep,a)] -- Things to make offsets for
+ -> (WordOff, -- _Total_ number of words allocated
+ WordOff, -- Number of words allocated for *pointers*
+ [(NonVoid a, ByteOff)])
+mkVirtHeapOffsets dflags header things =
+ ( tot_wds
+ , ptr_wds
+ , [ (field, offset) | (FieldOff field offset) <- things_offsets ]
+ )
+ where
+ (tot_wds, ptr_wds, things_offsets) =
+ mkVirtHeapOffsetsWithPadding dflags header things
+
+-- | Just like mkVirtHeapOffsets, but for constructors
+mkVirtConstrOffsets
+ :: DynFlags -> [NonVoid (PrimRep, a)]
+ -> (WordOff, WordOff, [(NonVoid a, ByteOff)])
+mkVirtConstrOffsets dflags = mkVirtHeapOffsets dflags StdHeader
+
+-- | Just like mkVirtConstrOffsets, but used when we don't have the actual
+-- arguments. Useful when e.g. generating info tables; we just need to know
+-- sizes of pointer and non-pointer fields.
+mkVirtConstrSizes :: DynFlags -> [NonVoid PrimRep] -> (WordOff, WordOff)
+mkVirtConstrSizes dflags field_reps
+ = (tot_wds, ptr_wds)
+ where
+ (tot_wds, ptr_wds, _) =
+ mkVirtConstrOffsets dflags
+ (map (\nv_rep -> NonVoid (fromNonVoid nv_rep, ())) field_reps)
+
+-------------------------------------------------------------------------
+--
+-- Making argument descriptors
+--
+-- An argument descriptor describes the layout of args on the stack,
+-- both for * GC (stack-layout) purposes, and
+-- * saving/restoring registers when a heap-check fails
+--
+-- Void arguments aren't important, therefore (contrast constructSlowCall)
+--
+-------------------------------------------------------------------------
+
+-- bring in ARG_P, ARG_N, etc.
+#include "../includes/rts/storage/FunTypes.h"
+
+mkArgDescr :: DynFlags -> [Id] -> ArgDescr
+mkArgDescr dflags args
+ = let arg_bits = argBits dflags arg_reps
+ arg_reps = filter isNonV (map idArgRep args)
+ -- Getting rid of voids eases matching of standard patterns
+ in case stdPattern arg_reps of
+ Just spec_id -> ArgSpec spec_id
+ Nothing -> ArgGen arg_bits
+
+argBits :: DynFlags -> [ArgRep] -> [Bool] -- True for non-ptr, False for ptr
+argBits _ [] = []
+argBits dflags (P : args) = False : argBits dflags args
+argBits dflags (arg : args) = take (argRepSizeW dflags arg) (repeat True)
+ ++ argBits dflags args
+
+----------------------
+stdPattern :: [ArgRep] -> Maybe Int
+stdPattern reps
+ = case reps of
+ [] -> Just ARG_NONE -- just void args, probably
+ [N] -> Just ARG_N
+ [P] -> Just ARG_P
+ [F] -> Just ARG_F
+ [D] -> Just ARG_D
+ [L] -> Just ARG_L
+ [V16] -> Just ARG_V16
+ [V32] -> Just ARG_V32
+ [V64] -> Just ARG_V64
+
+ [N,N] -> Just ARG_NN
+ [N,P] -> Just ARG_NP
+ [P,N] -> Just ARG_PN
+ [P,P] -> Just ARG_PP
+
+ [N,N,N] -> Just ARG_NNN
+ [N,N,P] -> Just ARG_NNP
+ [N,P,N] -> Just ARG_NPN
+ [N,P,P] -> Just ARG_NPP
+ [P,N,N] -> Just ARG_PNN
+ [P,N,P] -> Just ARG_PNP
+ [P,P,N] -> Just ARG_PPN
+ [P,P,P] -> Just ARG_PPP
+
+ [P,P,P,P] -> Just ARG_PPPP
+ [P,P,P,P,P] -> Just ARG_PPPPP
+ [P,P,P,P,P,P] -> Just ARG_PPPPPP
+
+ _ -> Nothing
+
+-------------------------------------------------------------------------
+--
+-- Generating the info table and code for a closure
+--
+-------------------------------------------------------------------------
+
+-- Here we make an info table of type 'CmmInfo'. The concrete
+-- representation as a list of 'CmmAddr' is handled later
+-- in the pipeline by 'cmmToRawCmm'.
+-- When loading the free variables, a function closure pointer may be tagged,
+-- so we must take it into account.
+
+emitClosureProcAndInfoTable :: Bool -- top-level?
+ -> Id -- name of the closure
+ -> LambdaFormInfo
+ -> CmmInfoTable
+ -> [NonVoid Id] -- incoming arguments
+ -> ((Int, LocalReg, [LocalReg]) -> FCode ()) -- function body
+ -> FCode ()
+emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body
+ = do { dflags <- getDynFlags
+ -- Bind the binder itself, but only if it's not a top-level
+ -- binding. We need non-top let-bindings to refer to the
+ -- top-level binding, which this binding would incorrectly shadow.
+ ; node <- if top_lvl then return $ idToReg dflags (NonVoid bndr)
+ else bindToReg (NonVoid bndr) lf_info
+ ; let node_points = nodeMustPointToIt dflags lf_info
+ ; arg_regs <- bindArgsToRegs args
+ ; let args' = if node_points then (node : arg_regs) else arg_regs
+ conv = if nodeMustPointToIt dflags lf_info then NativeNodeCall
+ else NativeDirectCall
+ (offset, _, _) = mkCallEntry dflags conv args' []
+ ; emitClosureAndInfoTable info_tbl conv args' $ body (offset, node, arg_regs)
+ }
+
+-- Data constructors need closures, but not with all the argument handling
+-- needed for functions. The shared part goes here.
+emitClosureAndInfoTable ::
+ CmmInfoTable -> Convention -> [LocalReg] -> FCode () -> FCode ()
+emitClosureAndInfoTable info_tbl conv args body
+ = do { (_, blks) <- getCodeScoped body
+ ; let entry_lbl = toEntryLbl (cit_lbl info_tbl)
+ ; emitProcWithConvention conv (Just info_tbl) entry_lbl args blks
+ }
diff --git a/compiler/GHC/StgToCmm/Monad.hs b/compiler/GHC/StgToCmm/Monad.hs
new file mode 100644
index 0000000000..716cbdab78
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Monad.hs
@@ -0,0 +1,861 @@
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE GADTs #-}
+
+-----------------------------------------------------------------------------
+--
+-- Monad for Stg to C-- code generation
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.Monad (
+ FCode, -- type
+
+ initC, runC, fixC,
+ newUnique,
+
+ emitLabel,
+
+ emit, emitDecl,
+ emitProcWithConvention, emitProcWithStackFrame,
+ emitOutOfLine, emitAssign, emitStore,
+ emitComment, emitTick, emitUnwind,
+
+ getCmm, aGraphToGraph,
+ getCodeR, getCode, getCodeScoped, getHeapUsage,
+
+ mkCmmIfThenElse, mkCmmIfThen, mkCmmIfGoto,
+ mkCmmIfThenElse', mkCmmIfThen', mkCmmIfGoto',
+
+ mkCall, mkCmmCall,
+
+ forkClosureBody, forkLneBody, forkAlts, forkAltPair, codeOnly,
+
+ ConTagZ,
+
+ Sequel(..), ReturnKind(..),
+ withSequel, getSequel,
+
+ setTickyCtrLabel, getTickyCtrLabel,
+ tickScope, getTickScope,
+
+ withUpdFrameOff, getUpdFrameOff, initUpdFrameOff,
+
+ HeapUsage(..), VirtualHpOffset, initHpUsage,
+ getHpUsage, setHpUsage, heapHWM,
+ setVirtHp, getVirtHp, setRealHp,
+
+ getModuleName,
+
+ -- ideally we wouldn't export these, but some other modules access internal state
+ getState, setState, getSelfLoop, withSelfLoop, getInfoDown, getDynFlags, getThisPackage,
+
+ -- more localised access to monad state
+ CgIdInfo(..),
+ getBinds, setBinds,
+
+ -- out of general friendliness, we also export ...
+ CgInfoDownwards(..), CgState(..) -- non-abstract
+ ) where
+
+import GhcPrelude hiding( sequence, succ )
+
+import Cmm
+import GHC.StgToCmm.Closure
+import DynFlags
+import Hoopl.Collections
+import MkGraph
+import BlockId
+import CLabel
+import SMRep
+import Module
+import Id
+import VarEnv
+import OrdList
+import BasicTypes( ConTagZ )
+import Unique
+import UniqSupply
+import FastString
+import Outputable
+import Util
+
+import Control.Monad
+import Data.List
+
+
+
+--------------------------------------------------------
+-- The FCode monad and its types
+--
+-- FCode is the monad plumbed through the Stg->Cmm code generator, and
+-- the Cmm parser. It contains the following things:
+--
+-- - A writer monad, collecting:
+-- - code for the current function, in the form of a CmmAGraph.
+-- The function "emit" appends more code to this.
+-- - the top-level CmmDecls accumulated so far
+--
+-- - A state monad with:
+-- - the local bindings in scope
+-- - the current heap usage
+-- - a UniqSupply
+--
+-- - A reader monad, for CgInfoDownwards, containing
+-- - DynFlags,
+-- - the current Module
+-- - the update-frame offset
+-- - the ticky counter label
+-- - the Sequel (the continuation to return to)
+-- - the self-recursive tail call information
+
+--------------------------------------------------------
+
+newtype FCode a = FCode { doFCode :: CgInfoDownwards -> CgState -> (a, CgState) }
+ deriving (Functor)
+
+instance Applicative FCode where
+ pure val = FCode (\_info_down state -> (val, state))
+ {-# INLINE pure #-}
+ (<*>) = ap
+
+instance Monad FCode where
+ FCode m >>= k = FCode $
+ \info_down state ->
+ case m info_down state of
+ (m_result, new_state) ->
+ case k m_result of
+ FCode kcode -> kcode info_down new_state
+ {-# INLINE (>>=) #-}
+
+instance MonadUnique FCode where
+ getUniqueSupplyM = cgs_uniqs <$> getState
+ getUniqueM = FCode $ \_ st ->
+ let (u, us') = takeUniqFromSupply (cgs_uniqs st)
+ in (u, st { cgs_uniqs = us' })
+
+initC :: IO CgState
+initC = do { uniqs <- mkSplitUniqSupply 'c'
+ ; return (initCgState uniqs) }
+
+runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState)
+runC dflags mod st fcode = doFCode fcode (initCgInfoDown dflags mod) st
+
+fixC :: (a -> FCode a) -> FCode a
+fixC fcode = FCode $
+ \info_down state -> let (v, s) = doFCode (fcode v) info_down state
+ in (v, s)
+
+--------------------------------------------------------
+-- The code generator environment
+--------------------------------------------------------
+
+-- This monadery has some information that it only passes
+-- *downwards*, as well as some ``state'' which is modified
+-- as we go along.
+
+data CgInfoDownwards -- information only passed *downwards* by the monad
+ = MkCgInfoDown {
+ cgd_dflags :: DynFlags,
+ cgd_mod :: Module, -- Module being compiled
+ cgd_updfr_off :: UpdFrameOffset, -- Size of current update frame
+ cgd_ticky :: CLabel, -- Current destination for ticky counts
+ cgd_sequel :: Sequel, -- What to do at end of basic block
+ cgd_self_loop :: Maybe SelfLoopInfo,-- Which tail calls can be compiled
+ -- as local jumps? See Note
+ -- [Self-recursive tail calls] in
+ -- GHC.StgToCmm.Expr
+ cgd_tick_scope:: CmmTickScope -- Tick scope for new blocks & ticks
+ }
+
+type CgBindings = IdEnv CgIdInfo
+
+data CgIdInfo
+ = CgIdInfo
+ { cg_id :: Id -- Id that this is the info for
+ , cg_lf :: LambdaFormInfo
+ , cg_loc :: CgLoc -- CmmExpr for the *tagged* value
+ }
+
+instance Outputable CgIdInfo where
+ ppr (CgIdInfo { cg_id = id, cg_loc = loc })
+ = ppr id <+> text "-->" <+> ppr loc
+
+-- Sequel tells what to do with the result of this expression
+data Sequel
+ = Return -- Return result(s) to continuation found on the stack.
+
+ | AssignTo
+ [LocalReg] -- Put result(s) in these regs and fall through
+ -- NB: no void arguments here
+ --
+ Bool -- Should we adjust the heap pointer back to
+ -- recover space that's unused on this path?
+ -- We need to do this only if the expression
+ -- may allocate (e.g. it's a foreign call or
+ -- allocating primOp)
+
+instance Outputable Sequel where
+ ppr Return = text "Return"
+ ppr (AssignTo regs b) = text "AssignTo" <+> ppr regs <+> ppr b
+
+-- See Note [sharing continuations] below
+data ReturnKind
+ = AssignedDirectly
+ | ReturnedTo BlockId ByteOff
+
+-- Note [sharing continuations]
+--
+-- ReturnKind says how the expression being compiled returned its
+-- results: either by assigning directly to the registers specified
+-- by the Sequel, or by returning to a continuation that does the
+-- assignments. The point of this is we might be able to re-use the
+-- continuation in a subsequent heap-check. Consider:
+--
+-- case f x of z
+-- True -> <True code>
+-- False -> <False code>
+--
+-- Naively we would generate
+--
+-- R2 = x -- argument to f
+-- Sp[young(L1)] = L1
+-- call f returns to L1
+-- L1:
+-- z = R1
+-- if (z & 1) then Ltrue else Lfalse
+-- Ltrue:
+-- Hp = Hp + 24
+-- if (Hp > HpLim) then L4 else L7
+-- L4:
+-- HpAlloc = 24
+-- goto L5
+-- L5:
+-- R1 = z
+-- Sp[young(L6)] = L6
+-- call stg_gc_unpt_r1 returns to L6
+-- L6:
+-- z = R1
+-- goto L1
+-- L7:
+-- <True code>
+-- Lfalse:
+-- <False code>
+--
+-- We want the gc call in L4 to return to L1, and discard L6. Note
+-- that not only can we share L1 and L6, but the assignment of the
+-- return address in L4 is unnecessary because the return address for
+-- L1 is already on the stack. We used to catch the sharing of L1 and
+-- L6 in the common-block-eliminator, but not the unnecessary return
+-- address assignment.
+--
+-- Since this case is so common I decided to make it more explicit and
+-- robust by programming the sharing directly, rather than relying on
+-- the common-block eliminator to catch it. This makes
+-- common-block-elimination an optional optimisation, and furthermore
+-- generates less code in the first place that we have to subsequently
+-- clean up.
+--
+-- There are some rarer cases of common blocks that we don't catch
+-- this way, but that's ok. Common-block-elimination is still available
+-- to catch them when optimisation is enabled. Some examples are:
+--
+-- - when both the True and False branches do a heap check, we
+-- can share the heap-check failure code L4a and maybe L4
+--
+-- - in a case-of-case, there might be multiple continuations that
+-- we can common up.
+--
+-- It is always safe to use AssignedDirectly. Expressions that jump
+-- to the continuation from multiple places (e.g. case expressions)
+-- fall back to AssignedDirectly.
+--
+
+
+initCgInfoDown :: DynFlags -> Module -> CgInfoDownwards
+initCgInfoDown dflags mod
+ = MkCgInfoDown { cgd_dflags = dflags
+ , cgd_mod = mod
+ , cgd_updfr_off = initUpdFrameOff dflags
+ , cgd_ticky = mkTopTickyCtrLabel
+ , cgd_sequel = initSequel
+ , cgd_self_loop = Nothing
+ , cgd_tick_scope= GlobalScope }
+
+initSequel :: Sequel
+initSequel = Return
+
+initUpdFrameOff :: DynFlags -> UpdFrameOffset
+initUpdFrameOff dflags = widthInBytes (wordWidth dflags) -- space for the RA
+
+
+--------------------------------------------------------
+-- The code generator state
+--------------------------------------------------------
+
+data CgState
+ = MkCgState {
+ cgs_stmts :: CmmAGraph, -- Current procedure
+
+ cgs_tops :: OrdList CmmDecl,
+ -- Other procedures and data blocks in this compilation unit
+ -- Both are ordered only so that we can
+ -- reduce forward references, when it's easy to do so
+
+ cgs_binds :: CgBindings,
+
+ cgs_hp_usg :: HeapUsage,
+
+ cgs_uniqs :: UniqSupply }
+
+data HeapUsage -- See Note [Virtual and real heap pointers]
+ = HeapUsage {
+ virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word
+ -- Incremented whenever we allocate
+ realHp :: VirtualHpOffset -- realHp: Virtual offset of real heap ptr
+ -- Used in instruction addressing modes
+ }
+
+type VirtualHpOffset = WordOff
+
+
+{- Note [Virtual and real heap pointers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The code generator can allocate one or more objects contiguously, performing
+one heap check to cover allocation of all the objects at once. Let's call
+this little chunk of heap space an "allocation chunk". The code generator
+will emit code to
+ * Perform a heap-exhaustion check
+ * Move the heap pointer to the end of the allocation chunk
+ * Allocate multiple objects within the chunk
+
+The code generator uses VirtualHpOffsets to address words within a
+single allocation chunk; these start at one and increase positively.
+The first word of the chunk has VirtualHpOffset=1, the second has
+VirtualHpOffset=2, and so on.
+
+ * The field realHp tracks (the VirtualHpOffset) where the real Hp
+ register is pointing. Typically it'll be pointing to the end of the
+ allocation chunk.
+
+ * The field virtHp gives the VirtualHpOffset of the highest-allocated
+ word so far. It starts at zero (meaning no word has been allocated),
+ and increases whenever an object is allocated.
+
+The difference between realHp and virtHp gives the offset from the
+real Hp register of a particular word in the allocation chunk. This
+is what getHpRelOffset does. Since the returned offset is relative
+to the real Hp register, it is valid only until you change the real
+Hp register. (Changing virtHp doesn't matter.)
+-}
+
+
+initCgState :: UniqSupply -> CgState
+initCgState uniqs
+ = MkCgState { cgs_stmts = mkNop
+ , cgs_tops = nilOL
+ , cgs_binds = emptyVarEnv
+ , cgs_hp_usg = initHpUsage
+ , cgs_uniqs = uniqs }
+
+stateIncUsage :: CgState -> CgState -> CgState
+-- stateIncUsage@ e1 e2 incorporates in e1
+-- the heap high water mark found in e2.
+stateIncUsage s1 s2@(MkCgState { cgs_hp_usg = hp_usg })
+ = s1 { cgs_hp_usg = cgs_hp_usg s1 `maxHpHw` virtHp hp_usg }
+ `addCodeBlocksFrom` s2
+
+addCodeBlocksFrom :: CgState -> CgState -> CgState
+-- Add code blocks from the latter to the former
+-- (The cgs_stmts will often be empty, but not always; see codeOnly)
+s1 `addCodeBlocksFrom` s2
+ = s1 { cgs_stmts = cgs_stmts s1 MkGraph.<*> cgs_stmts s2,
+ cgs_tops = cgs_tops s1 `appOL` cgs_tops s2 }
+
+
+-- The heap high water mark is the larger of virtHp and hwHp. The latter is
+-- only records the high water marks of forked-off branches, so to find the
+-- heap high water mark you have to take the max of virtHp and hwHp. Remember,
+-- virtHp never retreats!
+--
+-- Note Jan 04: ok, so why do we only look at the virtual Hp??
+
+heapHWM :: HeapUsage -> VirtualHpOffset
+heapHWM = virtHp
+
+initHpUsage :: HeapUsage
+initHpUsage = HeapUsage { virtHp = 0, realHp = 0 }
+
+maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage
+hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw }
+
+--------------------------------------------------------
+-- Operators for getting and setting the state and "info_down".
+--------------------------------------------------------
+
+getState :: FCode CgState
+getState = FCode $ \_info_down state -> (state, state)
+
+setState :: CgState -> FCode ()
+setState state = FCode $ \_info_down _ -> ((), state)
+
+getHpUsage :: FCode HeapUsage
+getHpUsage = do
+ state <- getState
+ return $ cgs_hp_usg state
+
+setHpUsage :: HeapUsage -> FCode ()
+setHpUsage new_hp_usg = do
+ state <- getState
+ setState $ state {cgs_hp_usg = new_hp_usg}
+
+setVirtHp :: VirtualHpOffset -> FCode ()
+setVirtHp new_virtHp
+ = do { hp_usage <- getHpUsage
+ ; setHpUsage (hp_usage {virtHp = new_virtHp}) }
+
+getVirtHp :: FCode VirtualHpOffset
+getVirtHp
+ = do { hp_usage <- getHpUsage
+ ; return (virtHp hp_usage) }
+
+setRealHp :: VirtualHpOffset -> FCode ()
+setRealHp new_realHp
+ = do { hp_usage <- getHpUsage
+ ; setHpUsage (hp_usage {realHp = new_realHp}) }
+
+getBinds :: FCode CgBindings
+getBinds = do
+ state <- getState
+ return $ cgs_binds state
+
+setBinds :: CgBindings -> FCode ()
+setBinds new_binds = do
+ state <- getState
+ setState $ state {cgs_binds = new_binds}
+
+withState :: FCode a -> CgState -> FCode (a,CgState)
+withState (FCode fcode) newstate = FCode $ \info_down state ->
+ case fcode info_down newstate of
+ (retval, state2) -> ((retval,state2), state)
+
+newUniqSupply :: FCode UniqSupply
+newUniqSupply = do
+ state <- getState
+ let (us1, us2) = splitUniqSupply (cgs_uniqs state)
+ setState $ state { cgs_uniqs = us1 }
+ return us2
+
+newUnique :: FCode Unique
+newUnique = do
+ state <- getState
+ let (u,us') = takeUniqFromSupply (cgs_uniqs state)
+ setState $ state { cgs_uniqs = us' }
+ return u
+
+------------------
+getInfoDown :: FCode CgInfoDownwards
+getInfoDown = FCode $ \info_down state -> (info_down,state)
+
+getSelfLoop :: FCode (Maybe SelfLoopInfo)
+getSelfLoop = do
+ info_down <- getInfoDown
+ return $ cgd_self_loop info_down
+
+withSelfLoop :: SelfLoopInfo -> FCode a -> FCode a
+withSelfLoop self_loop code = do
+ info_down <- getInfoDown
+ withInfoDown code (info_down {cgd_self_loop = Just self_loop})
+
+instance HasDynFlags FCode where
+ getDynFlags = liftM cgd_dflags getInfoDown
+
+getThisPackage :: FCode UnitId
+getThisPackage = liftM thisPackage getDynFlags
+
+withInfoDown :: FCode a -> CgInfoDownwards -> FCode a
+withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state
+
+-- ----------------------------------------------------------------------------
+-- Get the current module name
+
+getModuleName :: FCode Module
+getModuleName = do { info <- getInfoDown; return (cgd_mod info) }
+
+-- ----------------------------------------------------------------------------
+-- Get/set the end-of-block info
+
+withSequel :: Sequel -> FCode a -> FCode a
+withSequel sequel code
+ = do { info <- getInfoDown
+ ; withInfoDown code (info {cgd_sequel = sequel, cgd_self_loop = Nothing }) }
+
+getSequel :: FCode Sequel
+getSequel = do { info <- getInfoDown
+ ; return (cgd_sequel info) }
+
+-- ----------------------------------------------------------------------------
+-- Get/set the size of the update frame
+
+-- We keep track of the size of the update frame so that we
+-- can set the stack pointer to the proper address on return
+-- (or tail call) from the closure.
+-- There should be at most one update frame for each closure.
+-- Note: I'm including the size of the original return address
+-- in the size of the update frame -- hence the default case on `get'.
+
+withUpdFrameOff :: UpdFrameOffset -> FCode a -> FCode a
+withUpdFrameOff size code
+ = do { info <- getInfoDown
+ ; withInfoDown code (info {cgd_updfr_off = size }) }
+
+getUpdFrameOff :: FCode UpdFrameOffset
+getUpdFrameOff
+ = do { info <- getInfoDown
+ ; return $ cgd_updfr_off info }
+
+-- ----------------------------------------------------------------------------
+-- Get/set the current ticky counter label
+
+getTickyCtrLabel :: FCode CLabel
+getTickyCtrLabel = do
+ info <- getInfoDown
+ return (cgd_ticky info)
+
+setTickyCtrLabel :: CLabel -> FCode a -> FCode a
+setTickyCtrLabel ticky code = do
+ info <- getInfoDown
+ withInfoDown code (info {cgd_ticky = ticky})
+
+-- ----------------------------------------------------------------------------
+-- Manage tick scopes
+
+-- | The current tick scope. We will assign this to generated blocks.
+getTickScope :: FCode CmmTickScope
+getTickScope = do
+ info <- getInfoDown
+ return (cgd_tick_scope info)
+
+-- | Places blocks generated by the given code into a fresh
+-- (sub-)scope. This will make sure that Cmm annotations in our scope
+-- will apply to the Cmm blocks generated therein - but not the other
+-- way around.
+tickScope :: FCode a -> FCode a
+tickScope code = do
+ info <- getInfoDown
+ if debugLevel (cgd_dflags info) == 0 then code else do
+ u <- newUnique
+ let scope' = SubScope u (cgd_tick_scope info)
+ withInfoDown code info{ cgd_tick_scope = scope' }
+
+
+--------------------------------------------------------
+-- Forking
+--------------------------------------------------------
+
+forkClosureBody :: FCode () -> FCode ()
+-- forkClosureBody compiles body_code in environment where:
+-- - sequel, update stack frame and self loop info are
+-- set to fresh values
+-- - state is set to a fresh value, except for local bindings
+-- that are passed in unchanged. It's up to the enclosed code to
+-- re-bind the free variables to a field of the closure.
+
+forkClosureBody body_code
+ = do { dflags <- getDynFlags
+ ; info <- getInfoDown
+ ; us <- newUniqSupply
+ ; state <- getState
+ ; let body_info_down = info { cgd_sequel = initSequel
+ , cgd_updfr_off = initUpdFrameOff dflags
+ , cgd_self_loop = Nothing }
+ fork_state_in = (initCgState us) { cgs_binds = cgs_binds state }
+ ((),fork_state_out) = doFCode body_code body_info_down fork_state_in
+ ; setState $ state `addCodeBlocksFrom` fork_state_out }
+
+forkLneBody :: FCode a -> FCode a
+-- 'forkLneBody' takes a body of let-no-escape binding and compiles
+-- it in the *current* environment, returning the graph thus constructed.
+--
+-- The current environment is passed on completely unchanged to
+-- the successor. In particular, any heap usage from the enclosed
+-- code is discarded; it should deal with its own heap consumption.
+forkLneBody body_code
+ = do { info_down <- getInfoDown
+ ; us <- newUniqSupply
+ ; state <- getState
+ ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state }
+ (result, fork_state_out) = doFCode body_code info_down fork_state_in
+ ; setState $ state `addCodeBlocksFrom` fork_state_out
+ ; return result }
+
+codeOnly :: FCode () -> FCode ()
+-- Emit any code from the inner thing into the outer thing
+-- Do not affect anything else in the outer state
+-- Used in almost-circular code to prevent false loop dependencies
+codeOnly body_code
+ = do { info_down <- getInfoDown
+ ; us <- newUniqSupply
+ ; state <- getState
+ ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state
+ , cgs_hp_usg = cgs_hp_usg state }
+ ((), fork_state_out) = doFCode body_code info_down fork_state_in
+ ; setState $ state `addCodeBlocksFrom` fork_state_out }
+
+forkAlts :: [FCode a] -> FCode [a]
+-- (forkAlts' bs d) takes fcodes 'bs' for the branches of a 'case', and
+-- an fcode for the default case 'd', and compiles each in the current
+-- environment. The current environment is passed on unmodified, except
+-- that the virtual Hp is moved on to the worst virtual Hp for the branches
+
+forkAlts branch_fcodes
+ = do { info_down <- getInfoDown
+ ; us <- newUniqSupply
+ ; state <- getState
+ ; let compile us branch
+ = (us2, doFCode branch info_down branch_state)
+ where
+ (us1,us2) = splitUniqSupply us
+ branch_state = (initCgState us1) {
+ cgs_binds = cgs_binds state
+ , cgs_hp_usg = cgs_hp_usg state }
+ (_us, results) = mapAccumL compile us branch_fcodes
+ (branch_results, branch_out_states) = unzip results
+ ; setState $ foldl' stateIncUsage state branch_out_states
+ -- NB foldl. state is the *left* argument to stateIncUsage
+ ; return branch_results }
+
+forkAltPair :: FCode a -> FCode a -> FCode (a,a)
+-- Most common use of 'forkAlts'; having this helper function avoids
+-- accidental use of failible pattern-matches in @do@-notation
+forkAltPair x y = do
+ xy' <- forkAlts [x,y]
+ case xy' of
+ [x',y'] -> return (x',y')
+ _ -> panic "forkAltPair"
+
+-- collect the code emitted by an FCode computation
+getCodeR :: FCode a -> FCode (a, CmmAGraph)
+getCodeR fcode
+ = do { state1 <- getState
+ ; (a, state2) <- withState fcode (state1 { cgs_stmts = mkNop })
+ ; setState $ state2 { cgs_stmts = cgs_stmts state1 }
+ ; return (a, cgs_stmts state2) }
+
+getCode :: FCode a -> FCode CmmAGraph
+getCode fcode = do { (_,stmts) <- getCodeR fcode; return stmts }
+
+-- | Generate code into a fresh tick (sub-)scope and gather generated code
+getCodeScoped :: FCode a -> FCode (a, CmmAGraphScoped)
+getCodeScoped fcode
+ = do { state1 <- getState
+ ; ((a, tscope), state2) <-
+ tickScope $
+ flip withState state1 { cgs_stmts = mkNop } $
+ do { a <- fcode
+ ; scp <- getTickScope
+ ; return (a, scp) }
+ ; setState $ state2 { cgs_stmts = cgs_stmts state1 }
+ ; return (a, (cgs_stmts state2, tscope)) }
+
+
+-- 'getHeapUsage' applies a function to the amount of heap that it uses.
+-- It initialises the heap usage to zeros, and passes on an unchanged
+-- heap usage.
+--
+-- It is usually a prelude to performing a GC check, so everything must
+-- be in a tidy and consistent state.
+--
+-- Note the slightly subtle fixed point behaviour needed here
+
+getHeapUsage :: (VirtualHpOffset -> FCode a) -> FCode a
+getHeapUsage fcode
+ = do { info_down <- getInfoDown
+ ; state <- getState
+ ; let fstate_in = state { cgs_hp_usg = initHpUsage }
+ (r, fstate_out) = doFCode (fcode hp_hw) info_down fstate_in
+ hp_hw = heapHWM (cgs_hp_usg fstate_out) -- Loop here!
+
+ ; setState $ fstate_out { cgs_hp_usg = cgs_hp_usg state }
+ ; return r }
+
+-- ----------------------------------------------------------------------------
+-- Combinators for emitting code
+
+emitCgStmt :: CgStmt -> FCode ()
+emitCgStmt stmt
+ = do { state <- getState
+ ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt }
+ }
+
+emitLabel :: BlockId -> FCode ()
+emitLabel id = do tscope <- getTickScope
+ emitCgStmt (CgLabel id tscope)
+
+emitComment :: FastString -> FCode ()
+emitComment s
+ | debugIsOn = emitCgStmt (CgStmt (CmmComment s))
+ | otherwise = return ()
+
+emitTick :: CmmTickish -> FCode ()
+emitTick = emitCgStmt . CgStmt . CmmTick
+
+emitUnwind :: [(GlobalReg, Maybe CmmExpr)] -> FCode ()
+emitUnwind regs = do
+ dflags <- getDynFlags
+ when (debugLevel dflags > 0) $ do
+ emitCgStmt $ CgStmt $ CmmUnwind regs
+
+emitAssign :: CmmReg -> CmmExpr -> FCode ()
+emitAssign l r = emitCgStmt (CgStmt (CmmAssign l r))
+
+emitStore :: CmmExpr -> CmmExpr -> FCode ()
+emitStore l r = emitCgStmt (CgStmt (CmmStore l r))
+
+emit :: CmmAGraph -> FCode ()
+emit ag
+ = do { state <- getState
+ ; setState $ state { cgs_stmts = cgs_stmts state MkGraph.<*> ag } }
+
+emitDecl :: CmmDecl -> FCode ()
+emitDecl decl
+ = do { state <- getState
+ ; setState $ state { cgs_tops = cgs_tops state `snocOL` decl } }
+
+emitOutOfLine :: BlockId -> CmmAGraphScoped -> FCode ()
+emitOutOfLine l (stmts, tscope) = emitCgStmt (CgFork l stmts tscope)
+
+emitProcWithStackFrame
+ :: Convention -- entry convention
+ -> Maybe CmmInfoTable -- info table?
+ -> CLabel -- label for the proc
+ -> [CmmFormal] -- stack frame
+ -> [CmmFormal] -- arguments
+ -> CmmAGraphScoped -- code
+ -> Bool -- do stack layout?
+ -> FCode ()
+
+emitProcWithStackFrame _conv mb_info lbl _stk_args [] blocks False
+ = do { dflags <- getDynFlags
+ ; emitProc mb_info lbl [] blocks (widthInBytes (wordWidth dflags)) False
+ }
+emitProcWithStackFrame conv mb_info lbl stk_args args (graph, tscope) True
+ -- do layout
+ = do { dflags <- getDynFlags
+ ; let (offset, live, entry) = mkCallEntry dflags conv args stk_args
+ graph' = entry MkGraph.<*> graph
+ ; emitProc mb_info lbl live (graph', tscope) offset True
+ }
+emitProcWithStackFrame _ _ _ _ _ _ _ = panic "emitProcWithStackFrame"
+
+emitProcWithConvention :: Convention -> Maybe CmmInfoTable -> CLabel
+ -> [CmmFormal]
+ -> CmmAGraphScoped
+ -> FCode ()
+emitProcWithConvention conv mb_info lbl args blocks
+ = emitProcWithStackFrame conv mb_info lbl [] args blocks True
+
+emitProc :: Maybe CmmInfoTable -> CLabel -> [GlobalReg] -> CmmAGraphScoped
+ -> Int -> Bool -> FCode ()
+emitProc mb_info lbl live blocks offset do_layout
+ = do { dflags <- getDynFlags
+ ; l <- newBlockId
+ ; let
+ blks :: CmmGraph
+ blks = labelAGraph l blocks
+
+ infos | Just info <- mb_info = mapSingleton (g_entry blks) info
+ | otherwise = mapEmpty
+
+ sinfo = StackInfo { arg_space = offset
+ , updfr_space = Just (initUpdFrameOff dflags)
+ , do_layout = do_layout }
+
+ tinfo = TopInfo { info_tbls = infos
+ , stack_info=sinfo}
+
+ proc_block = CmmProc tinfo lbl live blks
+
+ ; state <- getState
+ ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } }
+
+getCmm :: FCode () -> FCode CmmGroup
+-- Get all the CmmTops (there should be no stmts)
+-- Return a single Cmm which may be split from other Cmms by
+-- object splitting (at a later stage)
+getCmm code
+ = do { state1 <- getState
+ ; ((), state2) <- withState code (state1 { cgs_tops = nilOL })
+ ; setState $ state2 { cgs_tops = cgs_tops state1 }
+ ; return (fromOL (cgs_tops state2)) }
+
+
+mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> FCode CmmAGraph
+mkCmmIfThenElse e tbranch fbranch = mkCmmIfThenElse' e tbranch fbranch Nothing
+
+mkCmmIfThenElse' :: CmmExpr -> CmmAGraph -> CmmAGraph
+ -> Maybe Bool -> FCode CmmAGraph
+mkCmmIfThenElse' e tbranch fbranch likely = do
+ tscp <- getTickScope
+ endif <- newBlockId
+ tid <- newBlockId
+ fid <- newBlockId
+
+ let
+ (test, then_, else_, likely') = case likely of
+ Just False | Just e' <- maybeInvertCmmExpr e
+ -- currently NCG doesn't know about likely
+ -- annotations. We manually switch then and
+ -- else branch so the likely false branch
+ -- becomes a fallthrough.
+ -> (e', fbranch, tbranch, Just True)
+ _ -> (e, tbranch, fbranch, likely)
+
+ return $ catAGraphs [ mkCbranch test tid fid likely'
+ , mkLabel tid tscp, then_, mkBranch endif
+ , mkLabel fid tscp, else_, mkLabel endif tscp ]
+
+mkCmmIfGoto :: CmmExpr -> BlockId -> FCode CmmAGraph
+mkCmmIfGoto e tid = mkCmmIfGoto' e tid Nothing
+
+mkCmmIfGoto' :: CmmExpr -> BlockId -> Maybe Bool -> FCode CmmAGraph
+mkCmmIfGoto' e tid l = do
+ endif <- newBlockId
+ tscp <- getTickScope
+ return $ catAGraphs [ mkCbranch e tid endif l, mkLabel endif tscp ]
+
+mkCmmIfThen :: CmmExpr -> CmmAGraph -> FCode CmmAGraph
+mkCmmIfThen e tbranch = mkCmmIfThen' e tbranch Nothing
+
+mkCmmIfThen' :: CmmExpr -> CmmAGraph -> Maybe Bool -> FCode CmmAGraph
+mkCmmIfThen' e tbranch l = do
+ endif <- newBlockId
+ tid <- newBlockId
+ tscp <- getTickScope
+ return $ catAGraphs [ mkCbranch e tid endif l
+ , mkLabel tid tscp, tbranch, mkLabel endif tscp ]
+
+mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmExpr]
+ -> UpdFrameOffset -> [CmmExpr] -> FCode CmmAGraph
+mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do
+ dflags <- getDynFlags
+ k <- newBlockId
+ tscp <- getTickScope
+ let area = Young k
+ (off, _, copyin) = copyInOflow dflags retConv area results []
+ copyout = mkCallReturnsTo dflags f callConv actuals k off updfr_off extra_stack
+ return $ catAGraphs [copyout, mkLabel k tscp, copyin]
+
+mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmExpr] -> UpdFrameOffset
+ -> FCode CmmAGraph
+mkCmmCall f results actuals updfr_off
+ = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off []
+
+
+-- ----------------------------------------------------------------------------
+-- turn CmmAGraph into CmmGraph, for making a new proc.
+
+aGraphToGraph :: CmmAGraphScoped -> FCode CmmGraph
+aGraphToGraph stmts
+ = do { l <- newBlockId
+ ; return (labelAGraph l stmts) }
diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs
new file mode 100644
index 0000000000..dc69a51916
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Prim.hs
@@ -0,0 +1,2622 @@
+{-# LANGUAGE CPP #-}
+-- emitPrimOp is quite large
+{-# OPTIONS_GHC -fmax-pmcheck-iterations=4000000 #-}
+
+----------------------------------------------------------------------------
+--
+-- Stg to C--: primitive operations
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.Prim (
+ cgOpApp,
+ cgPrimOp, -- internal(ish), used by cgCase to get code for a
+ -- comparison without also turning it into a Bool.
+ shouldInlinePrimOp
+ ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude hiding ((<*>))
+
+import GHC.StgToCmm.Layout
+import GHC.StgToCmm.Foreign
+import GHC.StgToCmm.Env
+import GHC.StgToCmm.Monad
+import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Ticky
+import GHC.StgToCmm.Heap
+import GHC.StgToCmm.Prof ( costCentreFrom )
+
+import DynFlags
+import GHC.Platform
+import BasicTypes
+import BlockId
+import MkGraph
+import StgSyn
+import Cmm
+import Type ( Type, tyConAppTyCon )
+import TyCon
+import CLabel
+import CmmUtils
+import PrimOp
+import SMRep
+import FastString
+import Outputable
+import Util
+import Data.Maybe
+
+import Data.Bits ((.&.), bit)
+import Control.Monad (liftM, when, unless)
+
+------------------------------------------------------------------------
+-- Primitive operations and foreign calls
+------------------------------------------------------------------------
+
+{- Note [Foreign call results]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A foreign call always returns an unboxed tuple of results, one
+of which is the state token. This seems to happen even for pure
+calls.
+
+Even if we returned a single result for pure calls, it'd still be
+right to wrap it in a singleton unboxed tuple, because the result
+might be a Haskell closure pointer, we don't want to evaluate it. -}
+
+----------------------------------
+cgOpApp :: StgOp -- The op
+ -> [StgArg] -- Arguments
+ -> Type -- Result type (always an unboxed tuple)
+ -> FCode ReturnKind
+
+-- Foreign calls
+cgOpApp (StgFCallOp fcall ty) stg_args res_ty
+ = cgForeignCall fcall ty stg_args res_ty
+ -- Note [Foreign call results]
+
+-- tagToEnum# is special: we need to pull the constructor
+-- out of the table, and perform an appropriate return.
+
+cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty
+ = ASSERT(isEnumerationTyCon tycon)
+ do { dflags <- getDynFlags
+ ; args' <- getNonVoidArgAmodes [arg]
+ ; let amode = case args' of [amode] -> amode
+ _ -> panic "TagToEnumOp had void arg"
+ ; emitReturn [tagToClosure dflags tycon amode] }
+ where
+ -- If you're reading this code in the attempt to figure
+ -- out why the compiler panic'ed here, it is probably because
+ -- you used tagToEnum# in a non-monomorphic setting, e.g.,
+ -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
+ -- That won't work.
+ tycon = tyConAppTyCon res_ty
+
+cgOpApp (StgPrimOp primop) args res_ty = do
+ dflags <- getDynFlags
+ cmm_args <- getNonVoidArgAmodes args
+ case shouldInlinePrimOp dflags primop cmm_args of
+ Nothing -> do -- out-of-line
+ let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop))
+ emitCall (NativeNodeCall, NativeReturn) fun cmm_args
+
+ Just f -- inline
+ | ReturnsPrim VoidRep <- result_info
+ -> do f []
+ emitReturn []
+
+ | ReturnsPrim rep <- result_info
+ -> do dflags <- getDynFlags
+ res <- newTemp (primRepCmmType dflags rep)
+ f [res]
+ emitReturn [CmmReg (CmmLocal res)]
+
+ | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
+ -> do (regs, _hints) <- newUnboxedTupleRegs res_ty
+ f regs
+ emitReturn (map (CmmReg . CmmLocal) regs)
+
+ | otherwise -> panic "cgPrimop"
+ where
+ result_info = getPrimOpResultInfo primop
+
+cgOpApp (StgPrimCallOp primcall) args _res_ty
+ = do { cmm_args <- getNonVoidArgAmodes args
+ ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall))
+ ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args }
+
+-- | Interpret the argument as an unsigned value, assuming the value
+-- is given in two-complement form in the given width.
+--
+-- Example: @asUnsigned W64 (-1)@ is 18446744073709551615.
+--
+-- This function is used to work around the fact that many array
+-- primops take Int# arguments, but we interpret them as unsigned
+-- quantities in the code gen. This means that we have to be careful
+-- every time we work on e.g. a CmmInt literal that corresponds to the
+-- array size, as it might contain a negative Integer value if the
+-- user passed a value larger than 2^(wORD_SIZE_IN_BITS-1) as the Int#
+-- literal.
+asUnsigned :: Width -> Integer -> Integer
+asUnsigned w n = n .&. (bit (widthInBits w) - 1)
+
+-- TODO: Several primop implementations (e.g. 'doNewByteArrayOp') use
+-- ByteOff (or some other fixed width signed type) to represent
+-- array sizes or indices. This means that these will overflow for
+-- large enough sizes.
+
+-- | Decide whether an out-of-line primop should be replaced by an
+-- inline implementation. This might happen e.g. if there's enough
+-- static information, such as statically know arguments, to emit a
+-- more efficient implementation inline.
+--
+-- Returns 'Nothing' if this primop should use its out-of-line
+-- implementation (defined elsewhere) and 'Just' together with a code
+-- generating function that takes the output regs as arguments
+-- otherwise.
+shouldInlinePrimOp :: DynFlags
+ -> PrimOp -- ^ The primop
+ -> [CmmExpr] -- ^ The primop arguments
+ -> Maybe ([LocalReg] -> FCode ())
+
+shouldInlinePrimOp dflags NewByteArrayOp_Char [(CmmLit (CmmInt n w))]
+ | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) =
+ Just $ \ [res] -> doNewByteArrayOp res (fromInteger n)
+
+shouldInlinePrimOp dflags NewArrayOp [(CmmLit (CmmInt n w)), init]
+ | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
+ Just $ \ [res] ->
+ doNewArrayOp res (arrPtrsRep dflags (fromInteger n)) mkMAP_DIRTY_infoLabel
+ [ (mkIntExpr dflags (fromInteger n),
+ fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags)
+ , (mkIntExpr dflags (nonHdrSizeW (arrPtrsRep dflags (fromInteger n))),
+ fixedHdrSize dflags + oFFSET_StgMutArrPtrs_size dflags)
+ ]
+ (fromInteger n) init
+
+shouldInlinePrimOp _ CopyArrayOp
+ [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
+ Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
+
+shouldInlinePrimOp _ CopyMutableArrayOp
+ [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
+ Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
+
+shouldInlinePrimOp _ CopyArrayArrayOp
+ [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
+ Just $ \ [] -> doCopyArrayOp src src_off dst dst_off (fromInteger n)
+
+shouldInlinePrimOp _ CopyMutableArrayArrayOp
+ [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
+ Just $ \ [] -> doCopyMutableArrayOp src src_off dst dst_off (fromInteger n)
+
+shouldInlinePrimOp dflags CloneArrayOp [src, src_off, (CmmLit (CmmInt n w))]
+ | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
+ Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
+
+shouldInlinePrimOp dflags CloneMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
+ | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
+ Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
+
+shouldInlinePrimOp dflags FreezeArrayOp [src, src_off, (CmmLit (CmmInt n w))]
+ | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
+ Just $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
+
+shouldInlinePrimOp dflags ThawArrayOp [src, src_off, (CmmLit (CmmInt n w))]
+ | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
+ Just $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n)
+
+shouldInlinePrimOp dflags NewSmallArrayOp [(CmmLit (CmmInt n w)), init]
+ | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
+ Just $ \ [res] ->
+ doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel
+ [ (mkIntExpr dflags (fromInteger n),
+ fixedHdrSize dflags + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
+ ]
+ (fromInteger n) init
+
+shouldInlinePrimOp _ CopySmallArrayOp
+ [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
+ Just $ \ [] -> doCopySmallArrayOp src src_off dst dst_off (fromInteger n)
+
+shouldInlinePrimOp _ CopySmallMutableArrayOp
+ [src, src_off, dst, dst_off, (CmmLit (CmmInt n _))] =
+ Just $ \ [] -> doCopySmallMutableArrayOp src src_off dst dst_off (fromInteger n)
+
+shouldInlinePrimOp dflags CloneSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
+ | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
+ Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
+
+shouldInlinePrimOp dflags CloneSmallMutableArrayOp [src, src_off, (CmmLit (CmmInt n w))]
+ | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
+ Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
+
+shouldInlinePrimOp dflags FreezeSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
+ | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
+ Just $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n)
+
+shouldInlinePrimOp dflags ThawSmallArrayOp [src, src_off, (CmmLit (CmmInt n w))]
+ | wordsToBytes dflags (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) =
+ Just $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n)
+
+shouldInlinePrimOp dflags primop args
+ | primOpOutOfLine primop = Nothing
+ | otherwise = Just $ \ regs -> emitPrimOp dflags regs primop args
+
+-- TODO: Several primops, such as 'copyArray#', only have an inline
+-- implementation (below) but could possibly have both an inline
+-- implementation and an out-of-line implementation, just like
+-- 'newArray#'. This would lower the amount of code generated,
+-- hopefully without a performance impact (needs to be measured).
+
+---------------------------------------------------
+cgPrimOp :: [LocalReg] -- where to put the results
+ -> PrimOp -- the op
+ -> [StgArg] -- arguments
+ -> FCode ()
+
+cgPrimOp results op args
+ = do dflags <- getDynFlags
+ arg_exprs <- getNonVoidArgAmodes args
+ emitPrimOp dflags results op arg_exprs
+
+
+------------------------------------------------------------------------
+-- Emitting code for a primop
+------------------------------------------------------------------------
+
+emitPrimOp :: DynFlags
+ -> [LocalReg] -- where to put the results
+ -> PrimOp -- the op
+ -> [CmmExpr] -- arguments
+ -> FCode ()
+
+-- First we handle various awkward cases specially. The remaining
+-- easy cases are then handled by translateOp, defined below.
+
+emitPrimOp _ [res] ParOp [arg]
+ =
+ -- for now, just implement this in a C function
+ -- later, we might want to inline it.
+ emitCCall
+ [(res,NoHint)]
+ (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
+ [(baseExpr, AddrHint), (arg,AddrHint)]
+
+emitPrimOp dflags [res] SparkOp [arg]
+ = do
+ -- returns the value of arg in res. We're going to therefore
+ -- refer to arg twice (once to pass to newSpark(), and once to
+ -- assign to res), so put it in a temporary.
+ tmp <- assignTemp arg
+ tmp2 <- newTemp (bWord dflags)
+ emitCCall
+ [(tmp2,NoHint)]
+ (CmmLit (CmmLabel (mkForeignLabel (fsLit "newSpark") Nothing ForeignLabelInExternalPackage IsFunction)))
+ [(baseExpr, AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
+ emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
+
+emitPrimOp dflags [res] GetCCSOfOp [arg]
+ = emitAssign (CmmLocal res) val
+ where
+ val
+ | gopt Opt_SccProfilingOn dflags = costCentreFrom dflags (cmmUntag dflags arg)
+ | otherwise = CmmLit (zeroCLit dflags)
+
+emitPrimOp _ [res] GetCurrentCCSOp [_dummy_arg]
+ = emitAssign (CmmLocal res) cccsExpr
+
+emitPrimOp _ [res] MyThreadIdOp []
+ = emitAssign (CmmLocal res) currentTSOExpr
+
+emitPrimOp dflags [res] ReadMutVarOp [mutv]
+ = emitAssign (CmmLocal res) (cmmLoadIndexW dflags mutv (fixedHdrSizeW dflags) (gcWord dflags))
+
+emitPrimOp dflags res@[] WriteMutVarOp [mutv,var]
+ = do -- Without this write barrier, other CPUs may see this pointer before
+ -- the writes for the closure it points to have occurred.
+ emitPrimCall res MO_WriteBarrier []
+ emitStore (cmmOffsetW dflags mutv (fixedHdrSizeW dflags)) var
+ emitCCall
+ [{-no results-}]
+ (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
+ [(baseExpr, AddrHint), (mutv,AddrHint)]
+
+-- #define sizzeofByteArrayzh(r,a) \
+-- r = ((StgArrBytes *)(a))->bytes
+emitPrimOp dflags [res] SizeofByteArrayOp [arg]
+ = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
+
+-- #define sizzeofMutableByteArrayzh(r,a) \
+-- r = ((StgArrBytes *)(a))->bytes
+emitPrimOp dflags [res] SizeofMutableByteArrayOp [arg]
+ = emitPrimOp dflags [res] SizeofByteArrayOp [arg]
+
+-- #define getSizzeofMutableByteArrayzh(r,a) \
+-- r = ((StgArrBytes *)(a))->bytes
+emitPrimOp dflags [res] GetSizeofMutableByteArrayOp [arg]
+ = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
+
+
+-- #define touchzh(o) /* nothing */
+emitPrimOp _ res@[] TouchOp args@[_arg]
+ = do emitPrimCall res MO_Touch args
+
+-- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
+emitPrimOp dflags [res] ByteArrayContents_Char [arg]
+ = emitAssign (CmmLocal res) (cmmOffsetB dflags arg (arrWordsHdrSize dflags))
+
+-- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
+emitPrimOp dflags [res] StableNameToIntOp [arg]
+ = emitAssign (CmmLocal res) (cmmLoadIndexW dflags arg (fixedHdrSizeW dflags) (bWord dflags))
+
+emitPrimOp dflags [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
+ = emitAssign (CmmLocal res) (CmmMachOp (mo_wordEq dflags) [arg1,arg2])
+
+-- #define addrToHValuezh(r,a) r=(P_)a
+emitPrimOp _ [res] AddrToAnyOp [arg]
+ = emitAssign (CmmLocal res) arg
+
+-- #define hvalueToAddrzh(r, a) r=(W_)a
+emitPrimOp _ [res] AnyToAddrOp [arg]
+ = emitAssign (CmmLocal res) arg
+
+{- Freezing arrays-of-ptrs requires changing an info table, for the
+ benefit of the generational collector. It needs to scavenge mutable
+ objects, even if they are in old space. When they become immutable,
+ they can be removed from this scavenge list. -}
+
+-- #define unsafeFreezzeArrayzh(r,a)
+-- {
+-- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN_DIRTY_info);
+-- r = a;
+-- }
+emitPrimOp _ [res] UnsafeFreezeArrayOp [arg]
+ = emit $ catAGraphs
+ [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
+ mkAssign (CmmLocal res) arg ]
+emitPrimOp _ [res] UnsafeFreezeArrayArrayOp [arg]
+ = emit $ catAGraphs
+ [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_DIRTY_infoLabel)),
+ mkAssign (CmmLocal res) arg ]
+emitPrimOp _ [res] UnsafeFreezeSmallArrayOp [arg]
+ = emit $ catAGraphs
+ [ setInfo arg (CmmLit (CmmLabel mkSMAP_FROZEN_DIRTY_infoLabel)),
+ mkAssign (CmmLocal res) arg ]
+
+-- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
+emitPrimOp _ [res] UnsafeFreezeByteArrayOp [arg]
+ = emitAssign (CmmLocal res) arg
+
+-- Reading/writing pointer arrays
+
+emitPrimOp _ [res] ReadArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
+emitPrimOp _ [res] IndexArrayOp [obj,ix] = doReadPtrArrayOp res obj ix
+emitPrimOp _ [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v
+
+emitPrimOp _ [res] IndexArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
+emitPrimOp _ [res] IndexArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
+emitPrimOp _ [res] ReadArrayArrayOp_ByteArray [obj,ix] = doReadPtrArrayOp res obj ix
+emitPrimOp _ [res] ReadArrayArrayOp_MutableByteArray [obj,ix] = doReadPtrArrayOp res obj ix
+emitPrimOp _ [res] ReadArrayArrayOp_ArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
+emitPrimOp _ [res] ReadArrayArrayOp_MutableArrayArray [obj,ix] = doReadPtrArrayOp res obj ix
+emitPrimOp _ [] WriteArrayArrayOp_ByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
+emitPrimOp _ [] WriteArrayArrayOp_MutableByteArray [obj,ix,v] = doWritePtrArrayOp obj ix v
+emitPrimOp _ [] WriteArrayArrayOp_ArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
+emitPrimOp _ [] WriteArrayArrayOp_MutableArrayArray [obj,ix,v] = doWritePtrArrayOp obj ix v
+
+emitPrimOp _ [res] ReadSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
+emitPrimOp _ [res] IndexSmallArrayOp [obj,ix] = doReadSmallPtrArrayOp res obj ix
+emitPrimOp _ [] WriteSmallArrayOp [obj,ix,v] = doWriteSmallPtrArrayOp obj ix v
+
+-- Getting the size of pointer arrays
+
+emitPrimOp dflags [res] SizeofArrayOp [arg]
+ = emit $ mkAssign (CmmLocal res) (cmmLoadIndexW dflags arg
+ (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgMutArrPtrs_ptrs dflags))
+ (bWord dflags))
+emitPrimOp dflags [res] SizeofMutableArrayOp [arg]
+ = emitPrimOp dflags [res] SizeofArrayOp [arg]
+emitPrimOp dflags [res] SizeofArrayArrayOp [arg]
+ = emitPrimOp dflags [res] SizeofArrayOp [arg]
+emitPrimOp dflags [res] SizeofMutableArrayArrayOp [arg]
+ = emitPrimOp dflags [res] SizeofArrayOp [arg]
+
+emitPrimOp dflags [res] SizeofSmallArrayOp [arg] =
+ emit $ mkAssign (CmmLocal res)
+ (cmmLoadIndexW dflags arg
+ (fixedHdrSizeW dflags + bytesToWordsRoundUp dflags (oFFSET_StgSmallMutArrPtrs_ptrs dflags))
+ (bWord dflags))
+emitPrimOp dflags [res] SizeofSmallMutableArrayOp [arg] =
+ emitPrimOp dflags [res] SizeofSmallArrayOp [arg]
+
+-- IndexXXXoffAddr
+
+emitPrimOp dflags res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
+emitPrimOp dflags res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
+emitPrimOp dflags res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
+emitPrimOp dflags res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
+emitPrimOp dflags res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
+emitPrimOp _ res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
+emitPrimOp _ res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
+emitPrimOp dflags res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
+emitPrimOp dflags res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
+emitPrimOp dflags res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
+emitPrimOp dflags res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
+emitPrimOp _ res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
+emitPrimOp dflags res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
+emitPrimOp dflags res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
+emitPrimOp dflags res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
+emitPrimOp _ res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
+
+-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
+
+emitPrimOp dflags res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
+emitPrimOp dflags res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
+emitPrimOp dflags res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing (bWord dflags) res args
+emitPrimOp dflags res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing (bWord dflags) res args
+emitPrimOp dflags res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing (bWord dflags) res args
+emitPrimOp _ res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args
+emitPrimOp _ res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args
+emitPrimOp dflags res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing (bWord dflags) res args
+emitPrimOp dflags res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just (mo_s_8ToWord dflags)) b8 res args
+emitPrimOp dflags res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just (mo_s_16ToWord dflags)) b16 res args
+emitPrimOp dflags res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just (mo_s_32ToWord dflags)) b32 res args
+emitPrimOp _ res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args
+emitPrimOp dflags res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just (mo_u_8ToWord dflags)) b8 res args
+emitPrimOp dflags res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just (mo_u_16ToWord dflags)) b16 res args
+emitPrimOp dflags res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just (mo_u_32ToWord dflags)) b32 res args
+emitPrimOp _ res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args
+
+-- IndexXXXArray
+
+emitPrimOp dflags res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
+emitPrimOp dflags res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
+emitPrimOp dflags res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
+emitPrimOp dflags res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
+emitPrimOp dflags res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
+emitPrimOp _ res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
+emitPrimOp _ res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
+emitPrimOp dflags res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
+emitPrimOp dflags res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
+emitPrimOp dflags res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
+emitPrimOp _ res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
+emitPrimOp dflags res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
+emitPrimOp _ res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
+
+-- ReadXXXArray, identical to IndexXXXArray.
+
+emitPrimOp dflags res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
+emitPrimOp dflags res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
+emitPrimOp dflags res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing (bWord dflags) res args
+emitPrimOp dflags res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing (bWord dflags) res args
+emitPrimOp dflags res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing (bWord dflags) res args
+emitPrimOp _ res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args
+emitPrimOp _ res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args
+emitPrimOp dflags res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing (bWord dflags) res args
+emitPrimOp dflags res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just (mo_s_8ToWord dflags)) b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just (mo_s_16ToWord dflags)) b16 res args
+emitPrimOp dflags res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just (mo_s_32ToWord dflags)) b32 res args
+emitPrimOp _ res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just (mo_u_8ToWord dflags)) b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just (mo_u_16ToWord dflags)) b16 res args
+emitPrimOp dflags res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just (mo_u_32ToWord dflags)) b32 res args
+emitPrimOp _ res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args
+
+-- IndexWord8ArrayAsXXX
+
+emitPrimOp dflags res IndexByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp _ res IndexByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args
+emitPrimOp _ res IndexByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args
+emitPrimOp _ res IndexByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args
+emitPrimOp dflags res IndexByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
+emitPrimOp _ res IndexByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
+
+-- ReadInt8ArrayAsXXX, identical to IndexInt8ArrayAsXXX
+
+emitPrimOp dflags res ReadByteArrayOp_Word8AsChar args = doIndexByteArrayOpAs (Just (mo_u_8ToWord dflags)) b8 b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsWideChar args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsInt args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsWord args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsAddr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp _ res ReadByteArrayOp_Word8AsFloat args = doIndexByteArrayOpAs Nothing f32 b8 res args
+emitPrimOp _ res ReadByteArrayOp_Word8AsDouble args = doIndexByteArrayOpAs Nothing f64 b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsStablePtr args = doIndexByteArrayOpAs Nothing (bWord dflags) b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsInt16 args = doIndexByteArrayOpAs (Just (mo_s_16ToWord dflags)) b16 b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsInt32 args = doIndexByteArrayOpAs (Just (mo_s_32ToWord dflags)) b32 b8 res args
+emitPrimOp _ res ReadByteArrayOp_Word8AsInt64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsWord16 args = doIndexByteArrayOpAs (Just (mo_u_16ToWord dflags)) b16 b8 res args
+emitPrimOp dflags res ReadByteArrayOp_Word8AsWord32 args = doIndexByteArrayOpAs (Just (mo_u_32ToWord dflags)) b32 b8 res args
+emitPrimOp _ res ReadByteArrayOp_Word8AsWord64 args = doIndexByteArrayOpAs Nothing b64 b8 res args
+
+-- WriteXXXoffAddr
+
+emitPrimOp dflags res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
+emitPrimOp dflags res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
+emitPrimOp dflags res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing (bWord dflags) res args
+emitPrimOp dflags res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing (bWord dflags) res args
+emitPrimOp dflags res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing (bWord dflags) res args
+emitPrimOp _ res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing f32 res args
+emitPrimOp _ res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing f64 res args
+emitPrimOp dflags res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing (bWord dflags) res args
+emitPrimOp dflags res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
+emitPrimOp dflags res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
+emitPrimOp dflags res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
+emitPrimOp _ res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing b64 res args
+emitPrimOp dflags res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just (mo_WordTo8 dflags)) b8 res args
+emitPrimOp dflags res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just (mo_WordTo16 dflags)) b16 res args
+emitPrimOp dflags res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just (mo_WordTo32 dflags)) b32 res args
+emitPrimOp _ res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing b64 res args
+
+-- WriteXXXArray
+
+emitPrimOp dflags res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
+emitPrimOp dflags res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
+emitPrimOp dflags res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing (bWord dflags) res args
+emitPrimOp dflags res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing (bWord dflags) res args
+emitPrimOp dflags res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing (bWord dflags) res args
+emitPrimOp _ res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing f32 res args
+emitPrimOp _ res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing f64 res args
+emitPrimOp dflags res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing (bWord dflags) res args
+emitPrimOp dflags res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
+emitPrimOp dflags res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
+emitPrimOp dflags res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
+emitPrimOp _ res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing b64 res args
+emitPrimOp dflags res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
+emitPrimOp dflags res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b16 res args
+emitPrimOp dflags res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b32 res args
+emitPrimOp _ res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing b64 res args
+
+-- WriteInt8ArrayAsXXX
+
+emitPrimOp dflags res WriteByteArrayOp_Word8AsChar args = doWriteByteArrayOp (Just (mo_WordTo8 dflags)) b8 res args
+emitPrimOp dflags res WriteByteArrayOp_Word8AsWideChar args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsInt args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsWord args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsAddr args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsFloat args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsDouble args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsStablePtr args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp dflags res WriteByteArrayOp_Word8AsInt16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
+emitPrimOp dflags res WriteByteArrayOp_Word8AsInt32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsInt64 args = doWriteByteArrayOp Nothing b8 res args
+emitPrimOp dflags res WriteByteArrayOp_Word8AsWord16 args = doWriteByteArrayOp (Just (mo_WordTo16 dflags)) b8 res args
+emitPrimOp dflags res WriteByteArrayOp_Word8AsWord32 args = doWriteByteArrayOp (Just (mo_WordTo32 dflags)) b8 res args
+emitPrimOp _ res WriteByteArrayOp_Word8AsWord64 args = doWriteByteArrayOp Nothing b8 res args
+
+-- Copying and setting byte arrays
+emitPrimOp _ [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
+ doCopyByteArrayOp src src_off dst dst_off n
+emitPrimOp _ [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
+ doCopyMutableByteArrayOp src src_off dst dst_off n
+emitPrimOp _ [] CopyByteArrayToAddrOp [src,src_off,dst,n] =
+ doCopyByteArrayToAddrOp src src_off dst n
+emitPrimOp _ [] CopyMutableByteArrayToAddrOp [src,src_off,dst,n] =
+ doCopyMutableByteArrayToAddrOp src src_off dst n
+emitPrimOp _ [] CopyAddrToByteArrayOp [src,dst,dst_off,n] =
+ doCopyAddrToByteArrayOp src dst dst_off n
+emitPrimOp _ [] SetByteArrayOp [ba,off,len,c] =
+ doSetByteArrayOp ba off len c
+
+-- Comparing byte arrays
+emitPrimOp _ [res] CompareByteArraysOp [ba1,ba1_off,ba2,ba2_off,n] =
+ doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n
+
+emitPrimOp _ [res] BSwap16Op [w] = emitBSwapCall res w W16
+emitPrimOp _ [res] BSwap32Op [w] = emitBSwapCall res w W32
+emitPrimOp _ [res] BSwap64Op [w] = emitBSwapCall res w W64
+emitPrimOp dflags [res] BSwapOp [w] = emitBSwapCall res w (wordWidth dflags)
+
+emitPrimOp _ [res] BRev8Op [w] = emitBRevCall res w W8
+emitPrimOp _ [res] BRev16Op [w] = emitBRevCall res w W16
+emitPrimOp _ [res] BRev32Op [w] = emitBRevCall res w W32
+emitPrimOp _ [res] BRev64Op [w] = emitBRevCall res w W64
+emitPrimOp dflags [res] BRevOp [w] = emitBRevCall res w (wordWidth dflags)
+
+-- Population count
+emitPrimOp _ [res] PopCnt8Op [w] = emitPopCntCall res w W8
+emitPrimOp _ [res] PopCnt16Op [w] = emitPopCntCall res w W16
+emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
+emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
+emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
+
+-- Parallel bit deposit
+emitPrimOp _ [res] Pdep8Op [src, mask] = emitPdepCall res src mask W8
+emitPrimOp _ [res] Pdep16Op [src, mask] = emitPdepCall res src mask W16
+emitPrimOp _ [res] Pdep32Op [src, mask] = emitPdepCall res src mask W32
+emitPrimOp _ [res] Pdep64Op [src, mask] = emitPdepCall res src mask W64
+emitPrimOp dflags [res] PdepOp [src, mask] = emitPdepCall res src mask (wordWidth dflags)
+
+-- Parallel bit extract
+emitPrimOp _ [res] Pext8Op [src, mask] = emitPextCall res src mask W8
+emitPrimOp _ [res] Pext16Op [src, mask] = emitPextCall res src mask W16
+emitPrimOp _ [res] Pext32Op [src, mask] = emitPextCall res src mask W32
+emitPrimOp _ [res] Pext64Op [src, mask] = emitPextCall res src mask W64
+emitPrimOp dflags [res] PextOp [src, mask] = emitPextCall res src mask (wordWidth dflags)
+
+-- count leading zeros
+emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
+emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
+emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
+emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
+emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
+
+-- count trailing zeros
+emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
+emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
+emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
+emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
+emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
+
+-- Unsigned int to floating point conversions
+emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
+ (MO_UF_Conv W32) [w]
+emitPrimOp _ [res] Word2DoubleOp [w] = emitPrimCall [res]
+ (MO_UF_Conv W64) [w]
+
+-- SIMD primops
+emitPrimOp dflags [res] (VecBroadcastOp vcat n w) [e] = do
+ checkVecCompatibility dflags vcat n w
+ doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros (replicate n e) res
+ where
+ zeros :: CmmExpr
+ zeros = CmmLit $ CmmVec (replicate n zero)
+
+ zero :: CmmLit
+ zero = case vcat of
+ IntVec -> CmmInt 0 w
+ WordVec -> CmmInt 0 w
+ FloatVec -> CmmFloat 0 w
+
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags [res] (VecPackOp vcat n w) es = do
+ checkVecCompatibility dflags vcat n w
+ when (es `lengthIsNot` n) $
+ panic "emitPrimOp: VecPackOp has wrong number of arguments"
+ doVecPackOp (vecElemInjectCast dflags vcat w) ty zeros es res
+ where
+ zeros :: CmmExpr
+ zeros = CmmLit $ CmmVec (replicate n zero)
+
+ zero :: CmmLit
+ zero = case vcat of
+ IntVec -> CmmInt 0 w
+ WordVec -> CmmInt 0 w
+ FloatVec -> CmmFloat 0 w
+
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags res (VecUnpackOp vcat n w) [arg] = do
+ checkVecCompatibility dflags vcat n w
+ when (res `lengthIsNot` n) $
+ panic "emitPrimOp: VecUnpackOp has wrong number of results"
+ doVecUnpackOp (vecElemProjectCast dflags vcat w) ty arg res
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags [res] (VecInsertOp vcat n w) [v,e,i] = do
+ checkVecCompatibility dflags vcat n w
+ doVecInsertOp (vecElemInjectCast dflags vcat w) ty v e i res
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags res (VecIndexByteArrayOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags res (VecReadByteArrayOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags res (VecWriteByteArrayOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doWriteByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags res (VecIndexOffAddrOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags res (VecReadOffAddrOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags res (VecWriteOffAddrOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doWriteOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecVmmType vcat n w
+
+emitPrimOp dflags res (VecIndexScalarByteArrayOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexByteArrayOpAs Nothing vecty ty res args
+ where
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
+
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+emitPrimOp dflags res (VecReadScalarByteArrayOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexByteArrayOpAs Nothing vecty ty res args
+ where
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
+
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+emitPrimOp dflags res (VecWriteScalarByteArrayOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doWriteByteArrayOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+emitPrimOp dflags res (VecIndexScalarOffAddrOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexOffAddrOpAs Nothing vecty ty res args
+ where
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
+
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+emitPrimOp dflags res (VecReadScalarOffAddrOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doIndexOffAddrOpAs Nothing vecty ty res args
+ where
+ vecty :: CmmType
+ vecty = vecVmmType vcat n w
+
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+emitPrimOp dflags res (VecWriteScalarOffAddrOp vcat n w) args = do
+ checkVecCompatibility dflags vcat n w
+ doWriteOffAddrOp Nothing ty res args
+ where
+ ty :: CmmType
+ ty = vecCmmCat vcat w
+
+-- Prefetch
+emitPrimOp _ [] PrefetchByteArrayOp3 args = doPrefetchByteArrayOp 3 args
+emitPrimOp _ [] PrefetchMutableByteArrayOp3 args = doPrefetchMutableByteArrayOp 3 args
+emitPrimOp _ [] PrefetchAddrOp3 args = doPrefetchAddrOp 3 args
+emitPrimOp _ [] PrefetchValueOp3 args = doPrefetchValueOp 3 args
+
+emitPrimOp _ [] PrefetchByteArrayOp2 args = doPrefetchByteArrayOp 2 args
+emitPrimOp _ [] PrefetchMutableByteArrayOp2 args = doPrefetchMutableByteArrayOp 2 args
+emitPrimOp _ [] PrefetchAddrOp2 args = doPrefetchAddrOp 2 args
+emitPrimOp _ [] PrefetchValueOp2 args = doPrefetchValueOp 2 args
+
+emitPrimOp _ [] PrefetchByteArrayOp1 args = doPrefetchByteArrayOp 1 args
+emitPrimOp _ [] PrefetchMutableByteArrayOp1 args = doPrefetchMutableByteArrayOp 1 args
+emitPrimOp _ [] PrefetchAddrOp1 args = doPrefetchAddrOp 1 args
+emitPrimOp _ [] PrefetchValueOp1 args = doPrefetchValueOp 1 args
+
+emitPrimOp _ [] PrefetchByteArrayOp0 args = doPrefetchByteArrayOp 0 args
+emitPrimOp _ [] PrefetchMutableByteArrayOp0 args = doPrefetchMutableByteArrayOp 0 args
+emitPrimOp _ [] PrefetchAddrOp0 args = doPrefetchAddrOp 0 args
+emitPrimOp _ [] PrefetchValueOp0 args = doPrefetchValueOp 0 args
+
+-- Atomic read-modify-write
+emitPrimOp dflags [res] FetchAddByteArrayOp_Int [mba, ix, n] =
+ doAtomicRMW res AMO_Add mba ix (bWord dflags) n
+emitPrimOp dflags [res] FetchSubByteArrayOp_Int [mba, ix, n] =
+ doAtomicRMW res AMO_Sub mba ix (bWord dflags) n
+emitPrimOp dflags [res] FetchAndByteArrayOp_Int [mba, ix, n] =
+ doAtomicRMW res AMO_And mba ix (bWord dflags) n
+emitPrimOp dflags [res] FetchNandByteArrayOp_Int [mba, ix, n] =
+ doAtomicRMW res AMO_Nand mba ix (bWord dflags) n
+emitPrimOp dflags [res] FetchOrByteArrayOp_Int [mba, ix, n] =
+ doAtomicRMW res AMO_Or mba ix (bWord dflags) n
+emitPrimOp dflags [res] FetchXorByteArrayOp_Int [mba, ix, n] =
+ doAtomicRMW res AMO_Xor mba ix (bWord dflags) n
+emitPrimOp dflags [res] AtomicReadByteArrayOp_Int [mba, ix] =
+ doAtomicReadByteArray res mba ix (bWord dflags)
+emitPrimOp dflags [] AtomicWriteByteArrayOp_Int [mba, ix, val] =
+ doAtomicWriteByteArray mba ix (bWord dflags) val
+emitPrimOp dflags [res] CasByteArrayOp_Int [mba, ix, old, new] =
+ doCasByteArray res mba ix (bWord dflags) old new
+
+-- The rest just translate straightforwardly
+emitPrimOp dflags [res] op [arg]
+ | nopOp op
+ = emitAssign (CmmLocal res) arg
+
+ | Just (mop,rep) <- narrowOp op
+ = emitAssign (CmmLocal res) $
+ CmmMachOp (mop rep (wordWidth dflags)) [CmmMachOp (mop (wordWidth dflags) rep) [arg]]
+
+emitPrimOp dflags r@[res] op args
+ | Just prim <- callishOp op
+ = do emitPrimCall r prim args
+
+ | Just mop <- translateOp dflags op
+ = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in
+ emit stmt
+
+emitPrimOp dflags results op args
+ = case callishPrimOpSupported dflags op args of
+ Left op -> emit $ mkUnsafeCall (PrimTarget op) results args
+ Right gen -> gen results args
+
+-- Note [QuotRem optimization]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- `quot` and `rem` with constant divisor can be implemented with fast bit-ops
+-- (shift, .&.).
+--
+-- Currently we only support optimization (performed in CmmOpt) when the
+-- constant is a power of 2. #9041 tracks the implementation of the general
+-- optimization.
+--
+-- `quotRem` can be optimized in the same way. However as it returns two values,
+-- it is implemented as a "callish" primop which is harder to match and
+-- to transform later on. For simplicity, the current implementation detects cases
+-- that can be optimized (see `quotRemCanBeOptimized`) and converts STG quotRem
+-- primop into two CMM quot and rem primops.
+
+type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
+
+callishPrimOpSupported :: DynFlags -> PrimOp -> [CmmExpr] -> Either CallishMachOp GenericOp
+callishPrimOpSupported dflags op args
+ = case op of
+ IntQuotRemOp | ncg && (x86ish || ppc)
+ , not quotRemCanBeOptimized
+ -> Left (MO_S_QuotRem (wordWidth dflags))
+ | otherwise
+ -> Right (genericIntQuotRemOp (wordWidth dflags))
+
+ Int8QuotRemOp | ncg && (x86ish || ppc)
+ , not quotRemCanBeOptimized
+ -> Left (MO_S_QuotRem W8)
+ | otherwise -> Right (genericIntQuotRemOp W8)
+
+ Int16QuotRemOp | ncg && (x86ish || ppc)
+ , not quotRemCanBeOptimized
+ -> Left (MO_S_QuotRem W16)
+ | otherwise -> Right (genericIntQuotRemOp W16)
+
+
+ WordQuotRemOp | ncg && (x86ish || ppc)
+ , not quotRemCanBeOptimized
+ -> Left (MO_U_QuotRem (wordWidth dflags))
+ | otherwise
+ -> Right (genericWordQuotRemOp (wordWidth dflags))
+
+ WordQuotRem2Op | (ncg && (x86ish || ppc))
+ || llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
+ | otherwise -> Right (genericWordQuotRem2Op dflags)
+
+ Word8QuotRemOp | ncg && (x86ish || ppc)
+ , not quotRemCanBeOptimized
+ -> Left (MO_U_QuotRem W8)
+ | otherwise -> Right (genericWordQuotRemOp W8)
+
+ Word16QuotRemOp| ncg && (x86ish || ppc)
+ , not quotRemCanBeOptimized
+ -> Left (MO_U_QuotRem W16)
+ | otherwise -> Right (genericWordQuotRemOp W16)
+
+ WordAdd2Op | (ncg && (x86ish || ppc))
+ || llvm -> Left (MO_Add2 (wordWidth dflags))
+ | otherwise -> Right genericWordAdd2Op
+
+ WordAddCOp | (ncg && (x86ish || ppc))
+ || llvm -> Left (MO_AddWordC (wordWidth dflags))
+ | otherwise -> Right genericWordAddCOp
+
+ WordSubCOp | (ncg && (x86ish || ppc))
+ || llvm -> Left (MO_SubWordC (wordWidth dflags))
+ | otherwise -> Right genericWordSubCOp
+
+ IntAddCOp | (ncg && (x86ish || ppc))
+ || llvm -> Left (MO_AddIntC (wordWidth dflags))
+ | otherwise -> Right genericIntAddCOp
+
+ IntSubCOp | (ncg && (x86ish || ppc))
+ || llvm -> Left (MO_SubIntC (wordWidth dflags))
+ | otherwise -> Right genericIntSubCOp
+
+ WordMul2Op | ncg && (x86ish || ppc)
+ || llvm -> Left (MO_U_Mul2 (wordWidth dflags))
+ | otherwise -> Right genericWordMul2Op
+ FloatFabsOp | (ncg && x86ish || ppc)
+ || llvm -> Left MO_F32_Fabs
+ | otherwise -> Right $ genericFabsOp W32
+ DoubleFabsOp | (ncg && x86ish || ppc)
+ || llvm -> Left MO_F64_Fabs
+ | otherwise -> Right $ genericFabsOp W64
+
+ _ -> pprPanic "emitPrimOp: can't translate PrimOp " (ppr op)
+ where
+ -- See Note [QuotRem optimization]
+ quotRemCanBeOptimized = case args of
+ [_, CmmLit (CmmInt n _) ] -> isJust (exactLog2 n)
+ _ -> False
+
+ ncg = case hscTarget dflags of
+ HscAsm -> True
+ _ -> False
+ llvm = case hscTarget dflags of
+ HscLlvm -> True
+ _ -> False
+ x86ish = case platformArch (targetPlatform dflags) of
+ ArchX86 -> True
+ ArchX86_64 -> True
+ _ -> False
+ ppc = case platformArch (targetPlatform dflags) of
+ ArchPPC -> True
+ ArchPPC_64 _ -> True
+ _ -> False
+
+genericIntQuotRemOp :: Width -> GenericOp
+genericIntQuotRemOp width [res_q, res_r] [arg_x, arg_y]
+ = emit $ mkAssign (CmmLocal res_q)
+ (CmmMachOp (MO_S_Quot width) [arg_x, arg_y]) <*>
+ mkAssign (CmmLocal res_r)
+ (CmmMachOp (MO_S_Rem width) [arg_x, arg_y])
+genericIntQuotRemOp _ _ _ = panic "genericIntQuotRemOp"
+
+genericWordQuotRemOp :: Width -> GenericOp
+genericWordQuotRemOp width [res_q, res_r] [arg_x, arg_y]
+ = emit $ mkAssign (CmmLocal res_q)
+ (CmmMachOp (MO_U_Quot width) [arg_x, arg_y]) <*>
+ mkAssign (CmmLocal res_r)
+ (CmmMachOp (MO_U_Rem width) [arg_x, arg_y])
+genericWordQuotRemOp _ _ _ = panic "genericWordQuotRemOp"
+
+genericWordQuotRem2Op :: DynFlags -> GenericOp
+genericWordQuotRem2Op dflags [res_q, res_r] [arg_x_high, arg_x_low, arg_y]
+ = emit =<< f (widthInBits (wordWidth dflags)) zero arg_x_high arg_x_low
+ where ty = cmmExprType dflags arg_x_high
+ shl x i = CmmMachOp (MO_Shl (wordWidth dflags)) [x, i]
+ shr x i = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, i]
+ or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
+ ge x y = CmmMachOp (MO_U_Ge (wordWidth dflags)) [x, y]
+ ne x y = CmmMachOp (MO_Ne (wordWidth dflags)) [x, y]
+ minus x y = CmmMachOp (MO_Sub (wordWidth dflags)) [x, y]
+ times x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
+ zero = lit 0
+ one = lit 1
+ negone = lit (fromIntegral (widthInBits (wordWidth dflags)) - 1)
+ lit i = CmmLit (CmmInt i (wordWidth dflags))
+
+ f :: Int -> CmmExpr -> CmmExpr -> CmmExpr -> FCode CmmAGraph
+ f 0 acc high _ = return (mkAssign (CmmLocal res_q) acc <*>
+ mkAssign (CmmLocal res_r) high)
+ f i acc high low =
+ do roverflowedBit <- newTemp ty
+ rhigh' <- newTemp ty
+ rhigh'' <- newTemp ty
+ rlow' <- newTemp ty
+ risge <- newTemp ty
+ racc' <- newTemp ty
+ let high' = CmmReg (CmmLocal rhigh')
+ isge = CmmReg (CmmLocal risge)
+ overflowedBit = CmmReg (CmmLocal roverflowedBit)
+ let this = catAGraphs
+ [mkAssign (CmmLocal roverflowedBit)
+ (shr high negone),
+ mkAssign (CmmLocal rhigh')
+ (or (shl high one) (shr low negone)),
+ mkAssign (CmmLocal rlow')
+ (shl low one),
+ mkAssign (CmmLocal risge)
+ (or (overflowedBit `ne` zero)
+ (high' `ge` arg_y)),
+ mkAssign (CmmLocal rhigh'')
+ (high' `minus` (arg_y `times` isge)),
+ mkAssign (CmmLocal racc')
+ (or (shl acc one) isge)]
+ rest <- f (i - 1) (CmmReg (CmmLocal racc'))
+ (CmmReg (CmmLocal rhigh''))
+ (CmmReg (CmmLocal rlow'))
+ return (this <*> rest)
+genericWordQuotRem2Op _ _ _ = panic "genericWordQuotRem2Op"
+
+genericWordAdd2Op :: GenericOp
+genericWordAdd2Op [res_h, res_l] [arg_x, arg_y]
+ = do dflags <- getDynFlags
+ r1 <- newTemp (cmmExprType dflags arg_x)
+ r2 <- newTemp (cmmExprType dflags arg_x)
+ let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
+ toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
+ bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
+ add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
+ or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
+ hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
+ (wordWidth dflags))
+ hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
+ emit $ catAGraphs
+ [mkAssign (CmmLocal r1)
+ (add (bottomHalf arg_x) (bottomHalf arg_y)),
+ mkAssign (CmmLocal r2)
+ (add (topHalf (CmmReg (CmmLocal r1)))
+ (add (topHalf arg_x) (topHalf arg_y))),
+ mkAssign (CmmLocal res_h)
+ (topHalf (CmmReg (CmmLocal r2))),
+ mkAssign (CmmLocal res_l)
+ (or (toTopHalf (CmmReg (CmmLocal r2)))
+ (bottomHalf (CmmReg (CmmLocal r1))))]
+genericWordAdd2Op _ _ = panic "genericWordAdd2Op"
+
+-- | Implements branchless recovery of the carry flag @c@ by checking the
+-- leftmost bits of both inputs @a@ and @b@ and result @r = a + b@:
+--
+-- @
+-- c = a&b | (a|b)&~r
+-- @
+--
+-- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
+genericWordAddCOp :: GenericOp
+genericWordAddCOp [res_r, res_c] [aa, bb]
+ = do dflags <- getDynFlags
+ emit $ catAGraphs [
+ mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
+ mkAssign (CmmLocal res_c) $
+ CmmMachOp (mo_wordUShr dflags) [
+ CmmMachOp (mo_wordOr dflags) [
+ CmmMachOp (mo_wordAnd dflags) [aa,bb],
+ CmmMachOp (mo_wordAnd dflags) [
+ CmmMachOp (mo_wordOr dflags) [aa,bb],
+ CmmMachOp (mo_wordNot dflags) [CmmReg (CmmLocal res_r)]
+ ]
+ ],
+ mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
+ ]
+ ]
+genericWordAddCOp _ _ = panic "genericWordAddCOp"
+
+-- | Implements branchless recovery of the carry flag @c@ by checking the
+-- leftmost bits of both inputs @a@ and @b@ and result @r = a - b@:
+--
+-- @
+-- c = ~a&b | (~a|b)&r
+-- @
+--
+-- https://brodowsky.it-sky.net/2015/04/02/how-to-recover-the-carry-bit/
+genericWordSubCOp :: GenericOp
+genericWordSubCOp [res_r, res_c] [aa, bb]
+ = do dflags <- getDynFlags
+ emit $ catAGraphs [
+ mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
+ mkAssign (CmmLocal res_c) $
+ CmmMachOp (mo_wordUShr dflags) [
+ CmmMachOp (mo_wordOr dflags) [
+ CmmMachOp (mo_wordAnd dflags) [
+ CmmMachOp (mo_wordNot dflags) [aa],
+ bb
+ ],
+ CmmMachOp (mo_wordAnd dflags) [
+ CmmMachOp (mo_wordOr dflags) [
+ CmmMachOp (mo_wordNot dflags) [aa],
+ bb
+ ],
+ CmmReg (CmmLocal res_r)
+ ]
+ ],
+ mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
+ ]
+ ]
+genericWordSubCOp _ _ = panic "genericWordSubCOp"
+
+genericIntAddCOp :: GenericOp
+genericIntAddCOp [res_r, res_c] [aa, bb]
+{-
+ With some bit-twiddling, we can define int{Add,Sub}Czh portably in
+ C, and without needing any comparisons. This may not be the
+ fastest way to do it - if you have better code, please send it! --SDM
+
+ Return : r = a + b, c = 0 if no overflow, 1 on overflow.
+
+ We currently don't make use of the r value if c is != 0 (i.e.
+ overflow), we just convert to big integers and try again. This
+ could be improved by making r and c the correct values for
+ plugging into a new J#.
+
+ { r = ((I_)(a)) + ((I_)(b)); \
+ c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
+ >> (BITS_IN (I_) - 1); \
+ }
+ Wading through the mass of bracketry, it seems to reduce to:
+ c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
+
+-}
+ = do dflags <- getDynFlags
+ emit $ catAGraphs [
+ mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordAdd dflags) [aa,bb]),
+ mkAssign (CmmLocal res_c) $
+ CmmMachOp (mo_wordUShr dflags) [
+ CmmMachOp (mo_wordAnd dflags) [
+ CmmMachOp (mo_wordNot dflags) [CmmMachOp (mo_wordXor dflags) [aa,bb]],
+ CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
+ ],
+ mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
+ ]
+ ]
+genericIntAddCOp _ _ = panic "genericIntAddCOp"
+
+genericIntSubCOp :: GenericOp
+genericIntSubCOp [res_r, res_c] [aa, bb]
+{- Similarly:
+ #define subIntCzh(r,c,a,b) \
+ { r = ((I_)(a)) - ((I_)(b)); \
+ c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
+ >> (BITS_IN (I_) - 1); \
+ }
+
+ c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
+-}
+ = do dflags <- getDynFlags
+ emit $ catAGraphs [
+ mkAssign (CmmLocal res_r) (CmmMachOp (mo_wordSub dflags) [aa,bb]),
+ mkAssign (CmmLocal res_c) $
+ CmmMachOp (mo_wordUShr dflags) [
+ CmmMachOp (mo_wordAnd dflags) [
+ CmmMachOp (mo_wordXor dflags) [aa,bb],
+ CmmMachOp (mo_wordXor dflags) [aa, CmmReg (CmmLocal res_r)]
+ ],
+ mkIntExpr dflags (wORD_SIZE_IN_BITS dflags - 1)
+ ]
+ ]
+genericIntSubCOp _ _ = panic "genericIntSubCOp"
+
+genericWordMul2Op :: GenericOp
+genericWordMul2Op [res_h, res_l] [arg_x, arg_y]
+ = do dflags <- getDynFlags
+ let t = cmmExprType dflags arg_x
+ xlyl <- liftM CmmLocal $ newTemp t
+ xlyh <- liftM CmmLocal $ newTemp t
+ xhyl <- liftM CmmLocal $ newTemp t
+ r <- liftM CmmLocal $ newTemp t
+ -- This generic implementation is very simple and slow. We might
+ -- well be able to do better, but for now this at least works.
+ let topHalf x = CmmMachOp (MO_U_Shr (wordWidth dflags)) [x, hww]
+ toTopHalf x = CmmMachOp (MO_Shl (wordWidth dflags)) [x, hww]
+ bottomHalf x = CmmMachOp (MO_And (wordWidth dflags)) [x, hwm]
+ add x y = CmmMachOp (MO_Add (wordWidth dflags)) [x, y]
+ sum = foldl1 add
+ mul x y = CmmMachOp (MO_Mul (wordWidth dflags)) [x, y]
+ or x y = CmmMachOp (MO_Or (wordWidth dflags)) [x, y]
+ hww = CmmLit (CmmInt (fromIntegral (widthInBits (halfWordWidth dflags)))
+ (wordWidth dflags))
+ hwm = CmmLit (CmmInt (halfWordMask dflags) (wordWidth dflags))
+ emit $ catAGraphs
+ [mkAssign xlyl
+ (mul (bottomHalf arg_x) (bottomHalf arg_y)),
+ mkAssign xlyh
+ (mul (bottomHalf arg_x) (topHalf arg_y)),
+ mkAssign xhyl
+ (mul (topHalf arg_x) (bottomHalf arg_y)),
+ mkAssign r
+ (sum [topHalf (CmmReg xlyl),
+ bottomHalf (CmmReg xhyl),
+ bottomHalf (CmmReg xlyh)]),
+ mkAssign (CmmLocal res_l)
+ (or (bottomHalf (CmmReg xlyl))
+ (toTopHalf (CmmReg r))),
+ mkAssign (CmmLocal res_h)
+ (sum [mul (topHalf arg_x) (topHalf arg_y),
+ topHalf (CmmReg xhyl),
+ topHalf (CmmReg xlyh),
+ topHalf (CmmReg r)])]
+genericWordMul2Op _ _ = panic "genericWordMul2Op"
+
+-- This replicates what we had in libraries/base/GHC/Float.hs:
+--
+-- abs x | x == 0 = 0 -- handles (-0.0)
+-- | x > 0 = x
+-- | otherwise = negateFloat x
+genericFabsOp :: Width -> GenericOp
+genericFabsOp w [res_r] [aa]
+ = do dflags <- getDynFlags
+ let zero = CmmLit (CmmFloat 0 w)
+
+ eq x y = CmmMachOp (MO_F_Eq w) [x, y]
+ gt x y = CmmMachOp (MO_F_Gt w) [x, y]
+
+ neg x = CmmMachOp (MO_F_Neg w) [x]
+
+ g1 = catAGraphs [mkAssign (CmmLocal res_r) zero]
+ g2 = catAGraphs [mkAssign (CmmLocal res_r) aa]
+
+ res_t <- CmmLocal <$> newTemp (cmmExprType dflags aa)
+ let g3 = catAGraphs [mkAssign res_t aa,
+ mkAssign (CmmLocal res_r) (neg (CmmReg res_t))]
+
+ g4 <- mkCmmIfThenElse (gt aa zero) g2 g3
+
+ emit =<< mkCmmIfThenElse (eq aa zero) g1 g4
+
+genericFabsOp _ _ _ = panic "genericFabsOp"
+
+-- These PrimOps are NOPs in Cmm
+
+nopOp :: PrimOp -> Bool
+nopOp Int2WordOp = True
+nopOp Word2IntOp = True
+nopOp Int2AddrOp = True
+nopOp Addr2IntOp = True
+nopOp ChrOp = True -- Int# and Char# are rep'd the same
+nopOp OrdOp = True
+nopOp _ = False
+
+-- These PrimOps turn into double casts
+
+narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width)
+narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8)
+narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16)
+narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32)
+narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8)
+narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16)
+narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32)
+narrowOp _ = Nothing
+
+-- Native word signless ops
+
+translateOp :: DynFlags -> PrimOp -> Maybe MachOp
+translateOp dflags IntAddOp = Just (mo_wordAdd dflags)
+translateOp dflags IntSubOp = Just (mo_wordSub dflags)
+translateOp dflags WordAddOp = Just (mo_wordAdd dflags)
+translateOp dflags WordSubOp = Just (mo_wordSub dflags)
+translateOp dflags AddrAddOp = Just (mo_wordAdd dflags)
+translateOp dflags AddrSubOp = Just (mo_wordSub dflags)
+
+translateOp dflags IntEqOp = Just (mo_wordEq dflags)
+translateOp dflags IntNeOp = Just (mo_wordNe dflags)
+translateOp dflags WordEqOp = Just (mo_wordEq dflags)
+translateOp dflags WordNeOp = Just (mo_wordNe dflags)
+translateOp dflags AddrEqOp = Just (mo_wordEq dflags)
+translateOp dflags AddrNeOp = Just (mo_wordNe dflags)
+
+translateOp dflags AndOp = Just (mo_wordAnd dflags)
+translateOp dflags OrOp = Just (mo_wordOr dflags)
+translateOp dflags XorOp = Just (mo_wordXor dflags)
+translateOp dflags NotOp = Just (mo_wordNot dflags)
+translateOp dflags SllOp = Just (mo_wordShl dflags)
+translateOp dflags SrlOp = Just (mo_wordUShr dflags)
+
+translateOp dflags AddrRemOp = Just (mo_wordURem dflags)
+
+-- Native word signed ops
+
+translateOp dflags IntMulOp = Just (mo_wordMul dflags)
+translateOp dflags IntMulMayOfloOp = Just (MO_S_MulMayOflo (wordWidth dflags))
+translateOp dflags IntQuotOp = Just (mo_wordSQuot dflags)
+translateOp dflags IntRemOp = Just (mo_wordSRem dflags)
+translateOp dflags IntNegOp = Just (mo_wordSNeg dflags)
+
+
+translateOp dflags IntGeOp = Just (mo_wordSGe dflags)
+translateOp dflags IntLeOp = Just (mo_wordSLe dflags)
+translateOp dflags IntGtOp = Just (mo_wordSGt dflags)
+translateOp dflags IntLtOp = Just (mo_wordSLt dflags)
+
+translateOp dflags AndIOp = Just (mo_wordAnd dflags)
+translateOp dflags OrIOp = Just (mo_wordOr dflags)
+translateOp dflags XorIOp = Just (mo_wordXor dflags)
+translateOp dflags NotIOp = Just (mo_wordNot dflags)
+translateOp dflags ISllOp = Just (mo_wordShl dflags)
+translateOp dflags ISraOp = Just (mo_wordSShr dflags)
+translateOp dflags ISrlOp = Just (mo_wordUShr dflags)
+
+-- Native word unsigned ops
+
+translateOp dflags WordGeOp = Just (mo_wordUGe dflags)
+translateOp dflags WordLeOp = Just (mo_wordULe dflags)
+translateOp dflags WordGtOp = Just (mo_wordUGt dflags)
+translateOp dflags WordLtOp = Just (mo_wordULt dflags)
+
+translateOp dflags WordMulOp = Just (mo_wordMul dflags)
+translateOp dflags WordQuotOp = Just (mo_wordUQuot dflags)
+translateOp dflags WordRemOp = Just (mo_wordURem dflags)
+
+translateOp dflags AddrGeOp = Just (mo_wordUGe dflags)
+translateOp dflags AddrLeOp = Just (mo_wordULe dflags)
+translateOp dflags AddrGtOp = Just (mo_wordUGt dflags)
+translateOp dflags AddrLtOp = Just (mo_wordULt dflags)
+
+-- Int8# signed ops
+
+translateOp dflags Int8Extend = Just (MO_SS_Conv W8 (wordWidth dflags))
+translateOp dflags Int8Narrow = Just (MO_SS_Conv (wordWidth dflags) W8)
+translateOp _ Int8NegOp = Just (MO_S_Neg W8)
+translateOp _ Int8AddOp = Just (MO_Add W8)
+translateOp _ Int8SubOp = Just (MO_Sub W8)
+translateOp _ Int8MulOp = Just (MO_Mul W8)
+translateOp _ Int8QuotOp = Just (MO_S_Quot W8)
+translateOp _ Int8RemOp = Just (MO_S_Rem W8)
+
+translateOp _ Int8EqOp = Just (MO_Eq W8)
+translateOp _ Int8GeOp = Just (MO_S_Ge W8)
+translateOp _ Int8GtOp = Just (MO_S_Gt W8)
+translateOp _ Int8LeOp = Just (MO_S_Le W8)
+translateOp _ Int8LtOp = Just (MO_S_Lt W8)
+translateOp _ Int8NeOp = Just (MO_Ne W8)
+
+-- Word8# unsigned ops
+
+translateOp dflags Word8Extend = Just (MO_UU_Conv W8 (wordWidth dflags))
+translateOp dflags Word8Narrow = Just (MO_UU_Conv (wordWidth dflags) W8)
+translateOp _ Word8NotOp = Just (MO_Not W8)
+translateOp _ Word8AddOp = Just (MO_Add W8)
+translateOp _ Word8SubOp = Just (MO_Sub W8)
+translateOp _ Word8MulOp = Just (MO_Mul W8)
+translateOp _ Word8QuotOp = Just (MO_U_Quot W8)
+translateOp _ Word8RemOp = Just (MO_U_Rem W8)
+
+translateOp _ Word8EqOp = Just (MO_Eq W8)
+translateOp _ Word8GeOp = Just (MO_U_Ge W8)
+translateOp _ Word8GtOp = Just (MO_U_Gt W8)
+translateOp _ Word8LeOp = Just (MO_U_Le W8)
+translateOp _ Word8LtOp = Just (MO_U_Lt W8)
+translateOp _ Word8NeOp = Just (MO_Ne W8)
+
+-- Int16# signed ops
+
+translateOp dflags Int16Extend = Just (MO_SS_Conv W16 (wordWidth dflags))
+translateOp dflags Int16Narrow = Just (MO_SS_Conv (wordWidth dflags) W16)
+translateOp _ Int16NegOp = Just (MO_S_Neg W16)
+translateOp _ Int16AddOp = Just (MO_Add W16)
+translateOp _ Int16SubOp = Just (MO_Sub W16)
+translateOp _ Int16MulOp = Just (MO_Mul W16)
+translateOp _ Int16QuotOp = Just (MO_S_Quot W16)
+translateOp _ Int16RemOp = Just (MO_S_Rem W16)
+
+translateOp _ Int16EqOp = Just (MO_Eq W16)
+translateOp _ Int16GeOp = Just (MO_S_Ge W16)
+translateOp _ Int16GtOp = Just (MO_S_Gt W16)
+translateOp _ Int16LeOp = Just (MO_S_Le W16)
+translateOp _ Int16LtOp = Just (MO_S_Lt W16)
+translateOp _ Int16NeOp = Just (MO_Ne W16)
+
+-- Word16# unsigned ops
+
+translateOp dflags Word16Extend = Just (MO_UU_Conv W16 (wordWidth dflags))
+translateOp dflags Word16Narrow = Just (MO_UU_Conv (wordWidth dflags) W16)
+translateOp _ Word16NotOp = Just (MO_Not W16)
+translateOp _ Word16AddOp = Just (MO_Add W16)
+translateOp _ Word16SubOp = Just (MO_Sub W16)
+translateOp _ Word16MulOp = Just (MO_Mul W16)
+translateOp _ Word16QuotOp = Just (MO_U_Quot W16)
+translateOp _ Word16RemOp = Just (MO_U_Rem W16)
+
+translateOp _ Word16EqOp = Just (MO_Eq W16)
+translateOp _ Word16GeOp = Just (MO_U_Ge W16)
+translateOp _ Word16GtOp = Just (MO_U_Gt W16)
+translateOp _ Word16LeOp = Just (MO_U_Le W16)
+translateOp _ Word16LtOp = Just (MO_U_Lt W16)
+translateOp _ Word16NeOp = Just (MO_Ne W16)
+
+-- Char# ops
+
+translateOp dflags CharEqOp = Just (MO_Eq (wordWidth dflags))
+translateOp dflags CharNeOp = Just (MO_Ne (wordWidth dflags))
+translateOp dflags CharGeOp = Just (MO_U_Ge (wordWidth dflags))
+translateOp dflags CharLeOp = Just (MO_U_Le (wordWidth dflags))
+translateOp dflags CharGtOp = Just (MO_U_Gt (wordWidth dflags))
+translateOp dflags CharLtOp = Just (MO_U_Lt (wordWidth dflags))
+
+-- Double ops
+
+translateOp _ DoubleEqOp = Just (MO_F_Eq W64)
+translateOp _ DoubleNeOp = Just (MO_F_Ne W64)
+translateOp _ DoubleGeOp = Just (MO_F_Ge W64)
+translateOp _ DoubleLeOp = Just (MO_F_Le W64)
+translateOp _ DoubleGtOp = Just (MO_F_Gt W64)
+translateOp _ DoubleLtOp = Just (MO_F_Lt W64)
+
+translateOp _ DoubleAddOp = Just (MO_F_Add W64)
+translateOp _ DoubleSubOp = Just (MO_F_Sub W64)
+translateOp _ DoubleMulOp = Just (MO_F_Mul W64)
+translateOp _ DoubleDivOp = Just (MO_F_Quot W64)
+translateOp _ DoubleNegOp = Just (MO_F_Neg W64)
+
+-- Float ops
+
+translateOp _ FloatEqOp = Just (MO_F_Eq W32)
+translateOp _ FloatNeOp = Just (MO_F_Ne W32)
+translateOp _ FloatGeOp = Just (MO_F_Ge W32)
+translateOp _ FloatLeOp = Just (MO_F_Le W32)
+translateOp _ FloatGtOp = Just (MO_F_Gt W32)
+translateOp _ FloatLtOp = Just (MO_F_Lt W32)
+
+translateOp _ FloatAddOp = Just (MO_F_Add W32)
+translateOp _ FloatSubOp = Just (MO_F_Sub W32)
+translateOp _ FloatMulOp = Just (MO_F_Mul W32)
+translateOp _ FloatDivOp = Just (MO_F_Quot W32)
+translateOp _ FloatNegOp = Just (MO_F_Neg W32)
+
+-- Vector ops
+
+translateOp _ (VecAddOp FloatVec n w) = Just (MO_VF_Add n w)
+translateOp _ (VecSubOp FloatVec n w) = Just (MO_VF_Sub n w)
+translateOp _ (VecMulOp FloatVec n w) = Just (MO_VF_Mul n w)
+translateOp _ (VecDivOp FloatVec n w) = Just (MO_VF_Quot n w)
+translateOp _ (VecNegOp FloatVec n w) = Just (MO_VF_Neg n w)
+
+translateOp _ (VecAddOp IntVec n w) = Just (MO_V_Add n w)
+translateOp _ (VecSubOp IntVec n w) = Just (MO_V_Sub n w)
+translateOp _ (VecMulOp IntVec n w) = Just (MO_V_Mul n w)
+translateOp _ (VecQuotOp IntVec n w) = Just (MO_VS_Quot n w)
+translateOp _ (VecRemOp IntVec n w) = Just (MO_VS_Rem n w)
+translateOp _ (VecNegOp IntVec n w) = Just (MO_VS_Neg n w)
+
+translateOp _ (VecAddOp WordVec n w) = Just (MO_V_Add n w)
+translateOp _ (VecSubOp WordVec n w) = Just (MO_V_Sub n w)
+translateOp _ (VecMulOp WordVec n w) = Just (MO_V_Mul n w)
+translateOp _ (VecQuotOp WordVec n w) = Just (MO_VU_Quot n w)
+translateOp _ (VecRemOp WordVec n w) = Just (MO_VU_Rem n w)
+
+-- Conversions
+
+translateOp dflags Int2DoubleOp = Just (MO_SF_Conv (wordWidth dflags) W64)
+translateOp dflags Double2IntOp = Just (MO_FS_Conv W64 (wordWidth dflags))
+
+translateOp dflags Int2FloatOp = Just (MO_SF_Conv (wordWidth dflags) W32)
+translateOp dflags Float2IntOp = Just (MO_FS_Conv W32 (wordWidth dflags))
+
+translateOp _ Float2DoubleOp = Just (MO_FF_Conv W32 W64)
+translateOp _ Double2FloatOp = Just (MO_FF_Conv W64 W32)
+
+-- Word comparisons masquerading as more exotic things.
+
+translateOp dflags SameMutVarOp = Just (mo_wordEq dflags)
+translateOp dflags SameMVarOp = Just (mo_wordEq dflags)
+translateOp dflags SameMutableArrayOp = Just (mo_wordEq dflags)
+translateOp dflags SameMutableByteArrayOp = Just (mo_wordEq dflags)
+translateOp dflags SameMutableArrayArrayOp= Just (mo_wordEq dflags)
+translateOp dflags SameSmallMutableArrayOp= Just (mo_wordEq dflags)
+translateOp dflags SameTVarOp = Just (mo_wordEq dflags)
+translateOp dflags EqStablePtrOp = Just (mo_wordEq dflags)
+-- See Note [Comparing stable names]
+translateOp dflags EqStableNameOp = Just (mo_wordEq dflags)
+
+translateOp _ _ = Nothing
+
+-- Note [Comparing stable names]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- A StableName# is actually a pointer to a stable name object (SNO)
+-- containing an index into the stable name table (SNT). We
+-- used to compare StableName#s by following the pointers to the
+-- SNOs and checking whether they held the same SNT indices. However,
+-- this is not necessary: there is a one-to-one correspondence
+-- between SNOs and entries in the SNT, so simple pointer equality
+-- does the trick.
+
+-- These primops are implemented by CallishMachOps, because they sometimes
+-- turn into foreign calls depending on the backend.
+
+callishOp :: PrimOp -> Maybe CallishMachOp
+callishOp DoublePowerOp = Just MO_F64_Pwr
+callishOp DoubleSinOp = Just MO_F64_Sin
+callishOp DoubleCosOp = Just MO_F64_Cos
+callishOp DoubleTanOp = Just MO_F64_Tan
+callishOp DoubleSinhOp = Just MO_F64_Sinh
+callishOp DoubleCoshOp = Just MO_F64_Cosh
+callishOp DoubleTanhOp = Just MO_F64_Tanh
+callishOp DoubleAsinOp = Just MO_F64_Asin
+callishOp DoubleAcosOp = Just MO_F64_Acos
+callishOp DoubleAtanOp = Just MO_F64_Atan
+callishOp DoubleAsinhOp = Just MO_F64_Asinh
+callishOp DoubleAcoshOp = Just MO_F64_Acosh
+callishOp DoubleAtanhOp = Just MO_F64_Atanh
+callishOp DoubleLogOp = Just MO_F64_Log
+callishOp DoubleLog1POp = Just MO_F64_Log1P
+callishOp DoubleExpOp = Just MO_F64_Exp
+callishOp DoubleExpM1Op = Just MO_F64_ExpM1
+callishOp DoubleSqrtOp = Just MO_F64_Sqrt
+
+callishOp FloatPowerOp = Just MO_F32_Pwr
+callishOp FloatSinOp = Just MO_F32_Sin
+callishOp FloatCosOp = Just MO_F32_Cos
+callishOp FloatTanOp = Just MO_F32_Tan
+callishOp FloatSinhOp = Just MO_F32_Sinh
+callishOp FloatCoshOp = Just MO_F32_Cosh
+callishOp FloatTanhOp = Just MO_F32_Tanh
+callishOp FloatAsinOp = Just MO_F32_Asin
+callishOp FloatAcosOp = Just MO_F32_Acos
+callishOp FloatAtanOp = Just MO_F32_Atan
+callishOp FloatAsinhOp = Just MO_F32_Asinh
+callishOp FloatAcoshOp = Just MO_F32_Acosh
+callishOp FloatAtanhOp = Just MO_F32_Atanh
+callishOp FloatLogOp = Just MO_F32_Log
+callishOp FloatLog1POp = Just MO_F32_Log1P
+callishOp FloatExpOp = Just MO_F32_Exp
+callishOp FloatExpM1Op = Just MO_F32_ExpM1
+callishOp FloatSqrtOp = Just MO_F32_Sqrt
+
+callishOp _ = Nothing
+
+------------------------------------------------------------------------------
+-- Helpers for translating various minor variants of array indexing.
+
+doIndexOffAddrOp :: Maybe MachOp
+ -> CmmType
+ -> [LocalReg]
+ -> [CmmExpr]
+ -> FCode ()
+doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
+ = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr rep idx
+doIndexOffAddrOp _ _ _ _
+ = panic "GHC.StgToCmm.Prim: doIndexOffAddrOp"
+
+doIndexOffAddrOpAs :: Maybe MachOp
+ -> CmmType
+ -> CmmType
+ -> [LocalReg]
+ -> [CmmExpr]
+ -> FCode ()
+doIndexOffAddrOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
+ = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx_rep idx
+doIndexOffAddrOpAs _ _ _ _ _
+ = panic "GHC.StgToCmm.Prim: doIndexOffAddrOpAs"
+
+doIndexByteArrayOp :: Maybe MachOp
+ -> CmmType
+ -> [LocalReg]
+ -> [CmmExpr]
+ -> FCode ()
+doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
+ = do dflags <- getDynFlags
+ mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr rep idx
+doIndexByteArrayOp _ _ _ _
+ = panic "GHC.StgToCmm.Prim: doIndexByteArrayOp"
+
+doIndexByteArrayOpAs :: Maybe MachOp
+ -> CmmType
+ -> CmmType
+ -> [LocalReg]
+ -> [CmmExpr]
+ -> FCode ()
+doIndexByteArrayOpAs maybe_post_read_cast rep idx_rep [res] [addr,idx]
+ = do dflags <- getDynFlags
+ mkBasicIndexedRead (arrWordsHdrSize dflags) maybe_post_read_cast rep res addr idx_rep idx
+doIndexByteArrayOpAs _ _ _ _ _
+ = panic "GHC.StgToCmm.Prim: doIndexByteArrayOpAs"
+
+doReadPtrArrayOp :: LocalReg
+ -> CmmExpr
+ -> CmmExpr
+ -> FCode ()
+doReadPtrArrayOp res addr idx
+ = do dflags <- getDynFlags
+ mkBasicIndexedRead (arrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr (gcWord dflags) idx
+
+doWriteOffAddrOp :: Maybe MachOp
+ -> CmmType
+ -> [LocalReg]
+ -> [CmmExpr]
+ -> FCode ()
+doWriteOffAddrOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
+ = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx_ty idx val
+doWriteOffAddrOp _ _ _ _
+ = panic "GHC.StgToCmm.Prim: doWriteOffAddrOp"
+
+doWriteByteArrayOp :: Maybe MachOp
+ -> CmmType
+ -> [LocalReg]
+ -> [CmmExpr]
+ -> FCode ()
+doWriteByteArrayOp maybe_pre_write_cast idx_ty [] [addr,idx,val]
+ = do dflags <- getDynFlags
+ mkBasicIndexedWrite (arrWordsHdrSize dflags) maybe_pre_write_cast addr idx_ty idx val
+doWriteByteArrayOp _ _ _ _
+ = panic "GHC.StgToCmm.Prim: doWriteByteArrayOp"
+
+doWritePtrArrayOp :: CmmExpr
+ -> CmmExpr
+ -> CmmExpr
+ -> FCode ()
+doWritePtrArrayOp addr idx val
+ = do dflags <- getDynFlags
+ let ty = cmmExprType dflags val
+ -- This write barrier is to ensure that the heap writes to the object
+ -- referred to by val have happened before we write val into the array.
+ -- See #12469 for details.
+ emitPrimCall [] MO_WriteBarrier []
+ mkBasicIndexedWrite (arrPtrsHdrSize dflags) Nothing addr ty idx val
+ emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
+ -- the write barrier. We must write a byte into the mark table:
+ -- bits8[a + header_size + StgMutArrPtrs_size(a) + x >> N]
+ emit $ mkStore (
+ cmmOffsetExpr dflags
+ (cmmOffsetExprW dflags (cmmOffsetB dflags addr (arrPtrsHdrSize dflags))
+ (loadArrPtrsSize dflags addr))
+ (CmmMachOp (mo_wordUShr dflags) [idx,
+ mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags)])
+ ) (CmmLit (CmmInt 1 W8))
+
+loadArrPtrsSize :: DynFlags -> CmmExpr -> CmmExpr
+loadArrPtrsSize dflags addr = CmmLoad (cmmOffsetB dflags addr off) (bWord dflags)
+ where off = fixedHdrSize dflags + oFFSET_StgMutArrPtrs_ptrs dflags
+
+mkBasicIndexedRead :: ByteOff -- Initial offset in bytes
+ -> Maybe MachOp -- Optional result cast
+ -> CmmType -- Type of element we are accessing
+ -> LocalReg -- Destination
+ -> CmmExpr -- Base address
+ -> CmmType -- Type of element by which we are indexing
+ -> CmmExpr -- Index
+ -> FCode ()
+mkBasicIndexedRead off Nothing ty res base idx_ty idx
+ = do dflags <- getDynFlags
+ emitAssign (CmmLocal res) (cmmLoadIndexOffExpr dflags off ty base idx_ty idx)
+mkBasicIndexedRead off (Just cast) ty res base idx_ty idx
+ = do dflags <- getDynFlags
+ emitAssign (CmmLocal res) (CmmMachOp cast [
+ cmmLoadIndexOffExpr dflags off ty base idx_ty idx])
+
+mkBasicIndexedWrite :: ByteOff -- Initial offset in bytes
+ -> Maybe MachOp -- Optional value cast
+ -> CmmExpr -- Base address
+ -> CmmType -- Type of element by which we are indexing
+ -> CmmExpr -- Index
+ -> CmmExpr -- Value to write
+ -> FCode ()
+mkBasicIndexedWrite off Nothing base idx_ty idx val
+ = do dflags <- getDynFlags
+ emitStore (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) val
+mkBasicIndexedWrite off (Just cast) base idx_ty idx val
+ = mkBasicIndexedWrite off Nothing base idx_ty idx (CmmMachOp cast [val])
+
+-- ----------------------------------------------------------------------------
+-- Misc utils
+
+cmmIndexOffExpr :: DynFlags
+ -> ByteOff -- Initial offset in bytes
+ -> Width -- Width of element by which we are indexing
+ -> CmmExpr -- Base address
+ -> CmmExpr -- Index
+ -> CmmExpr
+cmmIndexOffExpr dflags off width base idx
+ = cmmIndexExpr dflags width (cmmOffsetB dflags base off) idx
+
+cmmLoadIndexOffExpr :: DynFlags
+ -> ByteOff -- Initial offset in bytes
+ -> CmmType -- Type of element we are accessing
+ -> CmmExpr -- Base address
+ -> CmmType -- Type of element by which we are indexing
+ -> CmmExpr -- Index
+ -> CmmExpr
+cmmLoadIndexOffExpr dflags off ty base idx_ty idx
+ = CmmLoad (cmmIndexOffExpr dflags off (typeWidth idx_ty) base idx) ty
+
+setInfo :: CmmExpr -> CmmExpr -> CmmAGraph
+setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr
+
+------------------------------------------------------------------------------
+-- Helpers for translating vector primops.
+
+vecVmmType :: PrimOpVecCat -> Length -> Width -> CmmType
+vecVmmType pocat n w = vec n (vecCmmCat pocat w)
+
+vecCmmCat :: PrimOpVecCat -> Width -> CmmType
+vecCmmCat IntVec = cmmBits
+vecCmmCat WordVec = cmmBits
+vecCmmCat FloatVec = cmmFloat
+
+vecElemInjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
+vecElemInjectCast _ FloatVec _ = Nothing
+vecElemInjectCast dflags IntVec W8 = Just (mo_WordTo8 dflags)
+vecElemInjectCast dflags IntVec W16 = Just (mo_WordTo16 dflags)
+vecElemInjectCast dflags IntVec W32 = Just (mo_WordTo32 dflags)
+vecElemInjectCast _ IntVec W64 = Nothing
+vecElemInjectCast dflags WordVec W8 = Just (mo_WordTo8 dflags)
+vecElemInjectCast dflags WordVec W16 = Just (mo_WordTo16 dflags)
+vecElemInjectCast dflags WordVec W32 = Just (mo_WordTo32 dflags)
+vecElemInjectCast _ WordVec W64 = Nothing
+vecElemInjectCast _ _ _ = Nothing
+
+vecElemProjectCast :: DynFlags -> PrimOpVecCat -> Width -> Maybe MachOp
+vecElemProjectCast _ FloatVec _ = Nothing
+vecElemProjectCast dflags IntVec W8 = Just (mo_s_8ToWord dflags)
+vecElemProjectCast dflags IntVec W16 = Just (mo_s_16ToWord dflags)
+vecElemProjectCast dflags IntVec W32 = Just (mo_s_32ToWord dflags)
+vecElemProjectCast _ IntVec W64 = Nothing
+vecElemProjectCast dflags WordVec W8 = Just (mo_u_8ToWord dflags)
+vecElemProjectCast dflags WordVec W16 = Just (mo_u_16ToWord dflags)
+vecElemProjectCast dflags WordVec W32 = Just (mo_u_32ToWord dflags)
+vecElemProjectCast _ WordVec W64 = Nothing
+vecElemProjectCast _ _ _ = Nothing
+
+
+-- NOTE [SIMD Design for the future]
+-- Check to make sure that we can generate code for the specified vector type
+-- given the current set of dynamic flags.
+-- Currently these checks are specific to x86 and x86_64 architecture.
+-- This should be fixed!
+-- In particular,
+-- 1) Add better support for other architectures! (this may require a redesign)
+-- 2) Decouple design choices from LLVM's pseudo SIMD model!
+-- The high level LLVM naive rep makes per CPU family SIMD generation is own
+-- optimization problem, and hides important differences in eg ARM vs x86_64 simd
+-- 3) Depending on the architecture, the SIMD registers may also support general
+-- computations on Float/Double/Word/Int scalars, but currently on
+-- for example x86_64, we always put Word/Int (or sized) in GPR
+-- (general purpose) registers. Would relaxing that allow for
+-- useful optimization opportunities?
+-- Phrased differently, it is worth experimenting with supporting
+-- different register mapping strategies than we currently have, especially if
+-- someday we want SIMD to be a first class denizen in GHC along with scalar
+-- values!
+-- The current design with respect to register mapping of scalars could
+-- very well be the best,but exploring the design space and doing careful
+-- measurments is the only only way to validate that.
+-- In some next generation CPU ISAs, notably RISC V, the SIMD extension
+-- includes support for a sort of run time CPU dependent vectorization parameter,
+-- where a loop may act upon a single scalar each iteration OR some 2,4,8 ...
+-- element chunk! Time will tell if that direction sees wide adoption,
+-- but it is from that context that unifying our handling of simd and scalars
+-- may benefit. It is not likely to benefit current architectures, though
+-- it may very well be a design perspective that helps guide improving the NCG.
+
+
+checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode ()
+checkVecCompatibility dflags vcat l w = do
+ when (hscTarget dflags /= HscLlvm) $ do
+ sorry $ unlines ["SIMD vector instructions require the LLVM back-end."
+ ,"Please use -fllvm."]
+ check vecWidth vcat l w
+ where
+ check :: Width -> PrimOpVecCat -> Length -> Width -> FCode ()
+ check W128 FloatVec 4 W32 | not (isSseEnabled dflags) =
+ sorry $ "128-bit wide single-precision floating point " ++
+ "SIMD vector instructions require at least -msse."
+ check W128 _ _ _ | not (isSse2Enabled dflags) =
+ sorry $ "128-bit wide integer and double precision " ++
+ "SIMD vector instructions require at least -msse2."
+ check W256 FloatVec _ _ | not (isAvxEnabled dflags) =
+ sorry $ "256-bit wide floating point " ++
+ "SIMD vector instructions require at least -mavx."
+ check W256 _ _ _ | not (isAvx2Enabled dflags) =
+ sorry $ "256-bit wide integer " ++
+ "SIMD vector instructions require at least -mavx2."
+ check W512 _ _ _ | not (isAvx512fEnabled dflags) =
+ sorry $ "512-bit wide " ++
+ "SIMD vector instructions require -mavx512f."
+ check _ _ _ _ = return ()
+
+ vecWidth = typeWidth (vecVmmType vcat l w)
+
+------------------------------------------------------------------------------
+-- Helpers for translating vector packing and unpacking.
+
+doVecPackOp :: Maybe MachOp -- Cast from element to vector component
+ -> CmmType -- Type of vector
+ -> CmmExpr -- Initial vector
+ -> [CmmExpr] -- Elements
+ -> CmmFormal -- Destination for result
+ -> FCode ()
+doVecPackOp maybe_pre_write_cast ty z es res = do
+ dst <- newTemp ty
+ emitAssign (CmmLocal dst) z
+ vecPack dst es 0
+ where
+ vecPack :: CmmFormal -> [CmmExpr] -> Int -> FCode ()
+ vecPack src [] _ =
+ emitAssign (CmmLocal res) (CmmReg (CmmLocal src))
+
+ vecPack src (e : es) i = do
+ dst <- newTemp ty
+ if isFloatType (vecElemType ty)
+ then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
+ [CmmReg (CmmLocal src), cast e, iLit])
+ else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
+ [CmmReg (CmmLocal src), cast e, iLit])
+ vecPack dst es (i + 1)
+ where
+ -- vector indices are always 32-bits
+ iLit = CmmLit (CmmInt (toInteger i) W32)
+
+ cast :: CmmExpr -> CmmExpr
+ cast val = case maybe_pre_write_cast of
+ Nothing -> val
+ Just cast -> CmmMachOp cast [val]
+
+ len :: Length
+ len = vecLength ty
+
+ wid :: Width
+ wid = typeWidth (vecElemType ty)
+
+doVecUnpackOp :: Maybe MachOp -- Cast from vector component to element result
+ -> CmmType -- Type of vector
+ -> CmmExpr -- Vector
+ -> [CmmFormal] -- Element results
+ -> FCode ()
+doVecUnpackOp maybe_post_read_cast ty e res =
+ vecUnpack res 0
+ where
+ vecUnpack :: [CmmFormal] -> Int -> FCode ()
+ vecUnpack [] _ =
+ return ()
+
+ vecUnpack (r : rs) i = do
+ if isFloatType (vecElemType ty)
+ then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
+ [e, iLit]))
+ else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
+ [e, iLit]))
+ vecUnpack rs (i + 1)
+ where
+ -- vector indices are always 32-bits
+ iLit = CmmLit (CmmInt (toInteger i) W32)
+
+ cast :: CmmExpr -> CmmExpr
+ cast val = case maybe_post_read_cast of
+ Nothing -> val
+ Just cast -> CmmMachOp cast [val]
+
+ len :: Length
+ len = vecLength ty
+
+ wid :: Width
+ wid = typeWidth (vecElemType ty)
+
+doVecInsertOp :: Maybe MachOp -- Cast from element to vector component
+ -> CmmType -- Vector type
+ -> CmmExpr -- Source vector
+ -> CmmExpr -- Element
+ -> CmmExpr -- Index at which to insert element
+ -> CmmFormal -- Destination for result
+ -> FCode ()
+doVecInsertOp maybe_pre_write_cast ty src e idx res = do
+ dflags <- getDynFlags
+ -- vector indices are always 32-bits
+ let idx' :: CmmExpr
+ idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
+ if isFloatType (vecElemType ty)
+ then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
+ else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
+ where
+ cast :: CmmExpr -> CmmExpr
+ cast val = case maybe_pre_write_cast of
+ Nothing -> val
+ Just cast -> CmmMachOp cast [val]
+
+ len :: Length
+ len = vecLength ty
+
+ wid :: Width
+ wid = typeWidth (vecElemType ty)
+
+------------------------------------------------------------------------------
+-- Helpers for translating prefetching.
+
+
+-- | Translate byte array prefetch operations into proper primcalls.
+doPrefetchByteArrayOp :: Int
+ -> [CmmExpr]
+ -> FCode ()
+doPrefetchByteArrayOp locality [addr,idx]
+ = do dflags <- getDynFlags
+ mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
+doPrefetchByteArrayOp _ _
+ = panic "GHC.StgToCmm.Prim: doPrefetchByteArrayOp"
+
+-- | Translate mutable byte array prefetch operations into proper primcalls.
+doPrefetchMutableByteArrayOp :: Int
+ -> [CmmExpr]
+ -> FCode ()
+doPrefetchMutableByteArrayOp locality [addr,idx]
+ = do dflags <- getDynFlags
+ mkBasicPrefetch locality (arrWordsHdrSize dflags) addr idx
+doPrefetchMutableByteArrayOp _ _
+ = panic "GHC.StgToCmm.Prim: doPrefetchByteArrayOp"
+
+-- | Translate address prefetch operations into proper primcalls.
+doPrefetchAddrOp ::Int
+ -> [CmmExpr]
+ -> FCode ()
+doPrefetchAddrOp locality [addr,idx]
+ = mkBasicPrefetch locality 0 addr idx
+doPrefetchAddrOp _ _
+ = panic "GHC.StgToCmm.Prim: doPrefetchAddrOp"
+
+-- | Translate value prefetch operations into proper primcalls.
+doPrefetchValueOp :: Int
+ -> [CmmExpr]
+ -> FCode ()
+doPrefetchValueOp locality [addr]
+ = do dflags <- getDynFlags
+ mkBasicPrefetch locality 0 addr (CmmLit (CmmInt 0 (wordWidth dflags)))
+doPrefetchValueOp _ _
+ = panic "GHC.StgToCmm.Prim: doPrefetchValueOp"
+
+-- | helper to generate prefetch primcalls
+mkBasicPrefetch :: Int -- Locality level 0-3
+ -> ByteOff -- Initial offset in bytes
+ -> CmmExpr -- Base address
+ -> CmmExpr -- Index
+ -> FCode ()
+mkBasicPrefetch locality off base idx
+ = do dflags <- getDynFlags
+ emitPrimCall [] (MO_Prefetch_Data locality) [cmmIndexExpr dflags W8 (cmmOffsetB dflags base off) idx]
+ return ()
+
+-- ----------------------------------------------------------------------------
+-- Allocating byte arrays
+
+-- | Takes a register to return the newly allocated array in and the
+-- size of the new array in bytes. Allocates a new
+-- 'MutableByteArray#'.
+doNewByteArrayOp :: CmmFormal -> ByteOff -> FCode ()
+doNewByteArrayOp res_r n = do
+ dflags <- getDynFlags
+
+ let info_ptr = mkLblExpr mkArrWords_infoLabel
+ rep = arrWordsRep dflags n
+
+ tickyAllocPrim (mkIntExpr dflags (arrWordsHdrSize dflags))
+ (mkIntExpr dflags (nonHdrSize dflags rep))
+ (zeroExpr dflags)
+
+ let hdr_size = fixedHdrSize dflags
+
+ base <- allocHeapClosure rep info_ptr cccsExpr
+ [ (mkIntExpr dflags n,
+ hdr_size + oFFSET_StgArrBytes_bytes dflags)
+ ]
+
+ emit $ mkAssign (CmmLocal res_r) base
+
+-- ----------------------------------------------------------------------------
+-- Comparing byte arrays
+
+doCompareByteArraysOp :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
+ -> FCode ()
+doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do
+ dflags <- getDynFlags
+ ba1_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba1 (arrWordsHdrSize dflags)) ba1_off
+ ba2_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba2 (arrWordsHdrSize dflags)) ba2_off
+
+ -- short-cut in case of equal pointers avoiding a costly
+ -- subroutine call to the memcmp(3) routine; the Cmm logic below
+ -- results in assembly code being generated for
+ --
+ -- cmpPrefix10 :: ByteArray# -> ByteArray# -> Int#
+ -- cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10#
+ --
+ -- that looks like
+ --
+ -- leaq 16(%r14),%rax
+ -- leaq 16(%rsi),%rbx
+ -- xorl %ecx,%ecx
+ -- cmpq %rbx,%rax
+ -- je l_ptr_eq
+ --
+ -- ; NB: the common case (unequal pointers) falls-through
+ -- ; the conditional jump, and therefore matches the
+ -- ; usual static branch prediction convention of modern cpus
+ --
+ -- subq $8,%rsp
+ -- movq %rbx,%rsi
+ -- movq %rax,%rdi
+ -- movl $10,%edx
+ -- xorl %eax,%eax
+ -- call memcmp
+ -- addq $8,%rsp
+ -- movslq %eax,%rax
+ -- movq %rax,%rcx
+ -- l_ptr_eq:
+ -- movq %rcx,%rbx
+ -- jmp *(%rbp)
+
+ l_ptr_eq <- newBlockId
+ l_ptr_ne <- newBlockId
+
+ emit (mkAssign (CmmLocal res) (zeroExpr dflags))
+ emit (mkCbranch (cmmEqWord dflags ba1_p ba2_p)
+ l_ptr_eq l_ptr_ne (Just False))
+
+ emitLabel l_ptr_ne
+ emitMemcmpCall res ba1_p ba2_p n 1
+
+ emitLabel l_ptr_eq
+
+-- ----------------------------------------------------------------------------
+-- Copying byte arrays
+
+-- | Takes a source 'ByteArray#', an offset in the source array, a
+-- destination 'MutableByteArray#', an offset into the destination
+-- array, and the number of bytes to copy. Copies the given number of
+-- bytes from the source array to the destination array.
+doCopyByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
+ -> FCode ()
+doCopyByteArrayOp = emitCopyByteArray copy
+ where
+ -- Copy data (we assume the arrays aren't overlapping since
+ -- they're of different types)
+ copy _src _dst dst_p src_p bytes align =
+ emitMemcpyCall dst_p src_p bytes align
+
+-- | Takes a source 'MutableByteArray#', an offset in the source
+-- array, a destination 'MutableByteArray#', an offset into the
+-- destination array, and the number of bytes to copy. Copies the
+-- given number of bytes from the source array to the destination
+-- array.
+doCopyMutableByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
+ -> FCode ()
+doCopyMutableByteArrayOp = emitCopyByteArray copy
+ where
+ -- The only time the memory might overlap is when the two arrays
+ -- we were provided are the same array!
+ -- TODO: Optimize branch for common case of no aliasing.
+ copy src dst dst_p src_p bytes align = do
+ dflags <- getDynFlags
+ (moveCall, cpyCall) <- forkAltPair
+ (getCode $ emitMemmoveCall dst_p src_p bytes align)
+ (getCode $ emitMemcpyCall dst_p src_p bytes align)
+ emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
+
+emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
+ -> Alignment -> FCode ())
+ -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
+ -> FCode ()
+emitCopyByteArray copy src src_off dst dst_off n = do
+ dflags <- getDynFlags
+ let byteArrayAlignment = wordAlignment dflags
+ srcOffAlignment = cmmExprAlignment src_off
+ dstOffAlignment = cmmExprAlignment dst_off
+ align = minimum [byteArrayAlignment, srcOffAlignment, dstOffAlignment]
+ dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
+ src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
+ copy src dst dst_p src_p n align
+
+-- | Takes a source 'ByteArray#', an offset in the source array, a
+-- destination 'Addr#', and the number of bytes to copy. Copies the given
+-- number of bytes from the source array to the destination memory region.
+doCopyByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
+doCopyByteArrayToAddrOp src src_off dst_p bytes = do
+ -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
+ dflags <- getDynFlags
+ src_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags src (arrWordsHdrSize dflags)) src_off
+ emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
+
+-- | Takes a source 'MutableByteArray#', an offset in the source array, a
+-- destination 'Addr#', and the number of bytes to copy. Copies the given
+-- number of bytes from the source array to the destination memory region.
+doCopyMutableByteArrayToAddrOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
+ -> FCode ()
+doCopyMutableByteArrayToAddrOp = doCopyByteArrayToAddrOp
+
+-- | Takes a source 'Addr#', a destination 'MutableByteArray#', an offset into
+-- the destination array, and the number of bytes to copy. Copies the given
+-- number of bytes from the source memory region to the destination array.
+doCopyAddrToByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
+doCopyAddrToByteArrayOp src_p dst dst_off bytes = do
+ -- Use memcpy (we are allowed to assume the arrays aren't overlapping)
+ dflags <- getDynFlags
+ dst_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags dst (arrWordsHdrSize dflags)) dst_off
+ emitMemcpyCall dst_p src_p bytes (mkAlignment 1)
+
+
+-- ----------------------------------------------------------------------------
+-- Setting byte arrays
+
+-- | Takes a 'MutableByteArray#', an offset into the array, a length,
+-- and a byte, and sets each of the selected bytes in the array to the
+-- character.
+doSetByteArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
+ -> FCode ()
+doSetByteArrayOp ba off len c = do
+ dflags <- getDynFlags
+
+ let byteArrayAlignment = wordAlignment dflags -- known since BA is allocated on heap
+ offsetAlignment = cmmExprAlignment off
+ align = min byteArrayAlignment offsetAlignment
+
+ p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba (arrWordsHdrSize dflags)) off
+ emitMemsetCall p c len align
+
+-- ----------------------------------------------------------------------------
+-- Allocating arrays
+
+-- | Allocate a new array.
+doNewArrayOp :: CmmFormal -- ^ return register
+ -> SMRep -- ^ representation of the array
+ -> CLabel -- ^ info pointer
+ -> [(CmmExpr, ByteOff)] -- ^ header payload
+ -> WordOff -- ^ array size
+ -> CmmExpr -- ^ initial element
+ -> FCode ()
+doNewArrayOp res_r rep info payload n init = do
+ dflags <- getDynFlags
+
+ let info_ptr = mkLblExpr info
+
+ tickyAllocPrim (mkIntExpr dflags (hdrSize dflags rep))
+ (mkIntExpr dflags (nonHdrSize dflags rep))
+ (zeroExpr dflags)
+
+ base <- allocHeapClosure rep info_ptr cccsExpr payload
+
+ arr <- CmmLocal `fmap` newTemp (bWord dflags)
+ emit $ mkAssign arr base
+
+ -- Initialise all elements of the array
+ let mkOff off = cmmOffsetW dflags (CmmReg arr) (hdrSizeW dflags rep + off)
+ initialization = [ mkStore (mkOff off) init | off <- [0.. n - 1] ]
+ emit (catAGraphs initialization)
+
+ emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
+
+-- ----------------------------------------------------------------------------
+-- Copying pointer arrays
+
+-- EZY: This code has an unusually high amount of assignTemp calls, seen
+-- nowhere else in the code generator. This is mostly because these
+-- "primitive" ops result in a surprisingly large amount of code. It
+-- will likely be worthwhile to optimize what is emitted here, so that
+-- our optimization passes don't waste time repeatedly optimizing the
+-- same bits of code.
+
+-- More closely imitates 'assignTemp' from the old code generator, which
+-- returns a CmmExpr rather than a LocalReg.
+assignTempE :: CmmExpr -> FCode CmmExpr
+assignTempE e = do
+ t <- assignTemp e
+ return (CmmReg (CmmLocal t))
+
+-- | Takes a source 'Array#', an offset in the source array, a
+-- destination 'MutableArray#', an offset into the destination array,
+-- and the number of elements to copy. Copies the given number of
+-- elements from the source array to the destination array.
+doCopyArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
+ -> FCode ()
+doCopyArrayOp = emitCopyArray copy
+ where
+ -- Copy data (we assume the arrays aren't overlapping since
+ -- they're of different types)
+ copy _src _dst dst_p src_p bytes =
+ do dflags <- getDynFlags
+ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
+ (wordAlignment dflags)
+
+
+-- | Takes a source 'MutableArray#', an offset in the source array, a
+-- destination 'MutableArray#', an offset into the destination array,
+-- and the number of elements to copy. Copies the given number of
+-- elements from the source array to the destination array.
+doCopyMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
+ -> FCode ()
+doCopyMutableArrayOp = emitCopyArray copy
+ where
+ -- The only time the memory might overlap is when the two arrays
+ -- we were provided are the same array!
+ -- TODO: Optimize branch for common case of no aliasing.
+ copy src dst dst_p src_p bytes = do
+ dflags <- getDynFlags
+ (moveCall, cpyCall) <- forkAltPair
+ (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
+ (wordAlignment dflags))
+ (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
+ (wordAlignment dflags))
+ emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
+
+emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
+ -> FCode ()) -- ^ copy function
+ -> CmmExpr -- ^ source array
+ -> CmmExpr -- ^ offset in source array
+ -> CmmExpr -- ^ destination array
+ -> CmmExpr -- ^ offset in destination array
+ -> WordOff -- ^ number of elements to copy
+ -> FCode ()
+emitCopyArray copy src0 src_off dst0 dst_off0 n =
+ when (n /= 0) $ do
+ dflags <- getDynFlags
+
+ -- Passed as arguments (be careful)
+ src <- assignTempE src0
+ dst <- assignTempE dst0
+ dst_off <- assignTempE dst_off0
+
+ -- Set the dirty bit in the header.
+ emit (setInfo dst (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
+
+ dst_elems_p <- assignTempE $ cmmOffsetB dflags dst
+ (arrPtrsHdrSize dflags)
+ dst_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p dst_off
+ src_p <- assignTempE $ cmmOffsetExprW dflags
+ (cmmOffsetB dflags src (arrPtrsHdrSize dflags)) src_off
+ let bytes = wordsToBytes dflags n
+
+ copy src dst dst_p src_p bytes
+
+ -- The base address of the destination card table
+ dst_cards_p <- assignTempE $ cmmOffsetExprW dflags dst_elems_p
+ (loadArrPtrsSize dflags dst)
+
+ emitSetCards dst_off dst_cards_p n
+
+doCopySmallArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
+ -> FCode ()
+doCopySmallArrayOp = emitCopySmallArray copy
+ where
+ -- Copy data (we assume the arrays aren't overlapping since
+ -- they're of different types)
+ copy _src _dst dst_p src_p bytes =
+ do dflags <- getDynFlags
+ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
+ (wordAlignment dflags)
+
+
+doCopySmallMutableArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> WordOff
+ -> FCode ()
+doCopySmallMutableArrayOp = emitCopySmallArray copy
+ where
+ -- The only time the memory might overlap is when the two arrays
+ -- we were provided are the same array!
+ -- TODO: Optimize branch for common case of no aliasing.
+ copy src dst dst_p src_p bytes = do
+ dflags <- getDynFlags
+ (moveCall, cpyCall) <- forkAltPair
+ (getCode $ emitMemmoveCall dst_p src_p (mkIntExpr dflags bytes)
+ (wordAlignment dflags))
+ (getCode $ emitMemcpyCall dst_p src_p (mkIntExpr dflags bytes)
+ (wordAlignment dflags))
+ emit =<< mkCmmIfThenElse (cmmEqWord dflags src dst) moveCall cpyCall
+
+emitCopySmallArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> ByteOff
+ -> FCode ()) -- ^ copy function
+ -> CmmExpr -- ^ source array
+ -> CmmExpr -- ^ offset in source array
+ -> CmmExpr -- ^ destination array
+ -> CmmExpr -- ^ offset in destination array
+ -> WordOff -- ^ number of elements to copy
+ -> FCode ()
+emitCopySmallArray copy src0 src_off dst0 dst_off n =
+ when (n /= 0) $ do
+ dflags <- getDynFlags
+
+ -- Passed as arguments (be careful)
+ src <- assignTempE src0
+ dst <- assignTempE dst0
+
+ -- Set the dirty bit in the header.
+ emit (setInfo dst (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
+
+ dst_p <- assignTempE $ cmmOffsetExprW dflags
+ (cmmOffsetB dflags dst (smallArrPtrsHdrSize dflags)) dst_off
+ src_p <- assignTempE $ cmmOffsetExprW dflags
+ (cmmOffsetB dflags src (smallArrPtrsHdrSize dflags)) src_off
+ let bytes = wordsToBytes dflags n
+
+ copy src dst dst_p src_p bytes
+
+-- | Takes an info table label, a register to return the newly
+-- allocated array in, a source array, an offset in the source array,
+-- and the number of elements to copy. Allocates a new array and
+-- initializes it from the source array.
+emitCloneArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
+ -> FCode ()
+emitCloneArray info_p res_r src src_off n = do
+ dflags <- getDynFlags
+
+ let info_ptr = mkLblExpr info_p
+ rep = arrPtrsRep dflags n
+
+ tickyAllocPrim (mkIntExpr dflags (arrPtrsHdrSize dflags))
+ (mkIntExpr dflags (nonHdrSize dflags rep))
+ (zeroExpr dflags)
+
+ let hdr_size = fixedHdrSize dflags
+
+ base <- allocHeapClosure rep info_ptr cccsExpr
+ [ (mkIntExpr dflags n,
+ hdr_size + oFFSET_StgMutArrPtrs_ptrs dflags)
+ , (mkIntExpr dflags (nonHdrSizeW rep),
+ hdr_size + oFFSET_StgMutArrPtrs_size dflags)
+ ]
+
+ arr <- CmmLocal `fmap` newTemp (bWord dflags)
+ emit $ mkAssign arr base
+
+ dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
+ (arrPtrsHdrSize dflags)
+ src_p <- assignTempE $ cmmOffsetExprW dflags src
+ (cmmAddWord dflags
+ (mkIntExpr dflags (arrPtrsHdrSizeW dflags)) src_off)
+
+ emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
+ (wordAlignment dflags)
+
+ emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
+
+-- | Takes an info table label, a register to return the newly
+-- allocated array in, a source array, an offset in the source array,
+-- and the number of elements to copy. Allocates a new array and
+-- initializes it from the source array.
+emitCloneSmallArray :: CLabel -> CmmFormal -> CmmExpr -> CmmExpr -> WordOff
+ -> FCode ()
+emitCloneSmallArray info_p res_r src src_off n = do
+ dflags <- getDynFlags
+
+ let info_ptr = mkLblExpr info_p
+ rep = smallArrPtrsRep n
+
+ tickyAllocPrim (mkIntExpr dflags (smallArrPtrsHdrSize dflags))
+ (mkIntExpr dflags (nonHdrSize dflags rep))
+ (zeroExpr dflags)
+
+ let hdr_size = fixedHdrSize dflags
+
+ base <- allocHeapClosure rep info_ptr cccsExpr
+ [ (mkIntExpr dflags n,
+ hdr_size + oFFSET_StgSmallMutArrPtrs_ptrs dflags)
+ ]
+
+ arr <- CmmLocal `fmap` newTemp (bWord dflags)
+ emit $ mkAssign arr base
+
+ dst_p <- assignTempE $ cmmOffsetB dflags (CmmReg arr)
+ (smallArrPtrsHdrSize dflags)
+ src_p <- assignTempE $ cmmOffsetExprW dflags src
+ (cmmAddWord dflags
+ (mkIntExpr dflags (smallArrPtrsHdrSizeW dflags)) src_off)
+
+ emitMemcpyCall dst_p src_p (mkIntExpr dflags (wordsToBytes dflags n))
+ (wordAlignment dflags)
+
+ emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
+
+-- | Takes and offset in the destination array, the base address of
+-- the card table, and the number of elements affected (*not* the
+-- number of cards). The number of elements may not be zero.
+-- Marks the relevant cards as dirty.
+emitSetCards :: CmmExpr -> CmmExpr -> WordOff -> FCode ()
+emitSetCards dst_start dst_cards_start n = do
+ dflags <- getDynFlags
+ start_card <- assignTempE $ cardCmm dflags dst_start
+ let end_card = cardCmm dflags
+ (cmmSubWord dflags
+ (cmmAddWord dflags dst_start (mkIntExpr dflags n))
+ (mkIntExpr dflags 1))
+ emitMemsetCall (cmmAddWord dflags dst_cards_start start_card)
+ (mkIntExpr dflags 1)
+ (cmmAddWord dflags (cmmSubWord dflags end_card start_card) (mkIntExpr dflags 1))
+ (mkAlignment 1) -- no alignment (1 byte)
+
+-- Convert an element index to a card index
+cardCmm :: DynFlags -> CmmExpr -> CmmExpr
+cardCmm dflags i =
+ cmmUShrWord dflags i (mkIntExpr dflags (mUT_ARR_PTRS_CARD_BITS dflags))
+
+------------------------------------------------------------------------------
+-- SmallArray PrimOp implementations
+
+doReadSmallPtrArrayOp :: LocalReg
+ -> CmmExpr
+ -> CmmExpr
+ -> FCode ()
+doReadSmallPtrArrayOp res addr idx = do
+ dflags <- getDynFlags
+ mkBasicIndexedRead (smallArrPtrsHdrSize dflags) Nothing (gcWord dflags) res addr
+ (gcWord dflags) idx
+
+doWriteSmallPtrArrayOp :: CmmExpr
+ -> CmmExpr
+ -> CmmExpr
+ -> FCode ()
+doWriteSmallPtrArrayOp addr idx val = do
+ dflags <- getDynFlags
+ let ty = cmmExprType dflags val
+ emitPrimCall [] MO_WriteBarrier [] -- #12469
+ mkBasicIndexedWrite (smallArrPtrsHdrSize dflags) Nothing addr ty idx val
+ emit (setInfo addr (CmmLit (CmmLabel mkSMAP_DIRTY_infoLabel)))
+
+------------------------------------------------------------------------------
+-- Atomic read-modify-write
+
+-- | Emit an atomic modification to a byte array element. The result
+-- reg contains that previous value of the element. Implies a full
+-- memory barrier.
+doAtomicRMW :: LocalReg -- ^ Result reg
+ -> AtomicMachOp -- ^ Atomic op (e.g. add)
+ -> CmmExpr -- ^ MutableByteArray#
+ -> CmmExpr -- ^ Index
+ -> CmmType -- ^ Type of element by which we are indexing
+ -> CmmExpr -- ^ Op argument (e.g. amount to add)
+ -> FCode ()
+doAtomicRMW res amop mba idx idx_ty n = do
+ dflags <- getDynFlags
+ let width = typeWidth idx_ty
+ addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
+ width mba idx
+ emitPrimCall
+ [ res ]
+ (MO_AtomicRMW width amop)
+ [ addr, n ]
+
+-- | Emit an atomic read to a byte array that acts as a memory barrier.
+doAtomicReadByteArray
+ :: LocalReg -- ^ Result reg
+ -> CmmExpr -- ^ MutableByteArray#
+ -> CmmExpr -- ^ Index
+ -> CmmType -- ^ Type of element by which we are indexing
+ -> FCode ()
+doAtomicReadByteArray res mba idx idx_ty = do
+ dflags <- getDynFlags
+ let width = typeWidth idx_ty
+ addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
+ width mba idx
+ emitPrimCall
+ [ res ]
+ (MO_AtomicRead width)
+ [ addr ]
+
+-- | Emit an atomic write to a byte array that acts as a memory barrier.
+doAtomicWriteByteArray
+ :: CmmExpr -- ^ MutableByteArray#
+ -> CmmExpr -- ^ Index
+ -> CmmType -- ^ Type of element by which we are indexing
+ -> CmmExpr -- ^ Value to write
+ -> FCode ()
+doAtomicWriteByteArray mba idx idx_ty val = do
+ dflags <- getDynFlags
+ let width = typeWidth idx_ty
+ addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
+ width mba idx
+ emitPrimCall
+ [ {- no results -} ]
+ (MO_AtomicWrite width)
+ [ addr, val ]
+
+doCasByteArray
+ :: LocalReg -- ^ Result reg
+ -> CmmExpr -- ^ MutableByteArray#
+ -> CmmExpr -- ^ Index
+ -> CmmType -- ^ Type of element by which we are indexing
+ -> CmmExpr -- ^ Old value
+ -> CmmExpr -- ^ New value
+ -> FCode ()
+doCasByteArray res mba idx idx_ty old new = do
+ dflags <- getDynFlags
+ let width = (typeWidth idx_ty)
+ addr = cmmIndexOffExpr dflags (arrWordsHdrSize dflags)
+ width mba idx
+ emitPrimCall
+ [ res ]
+ (MO_Cmpxchg width)
+ [ addr, old, new ]
+
+------------------------------------------------------------------------------
+-- Helpers for emitting function calls
+
+-- | Emit a call to @memcpy@.
+emitMemcpyCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
+emitMemcpyCall dst src n align = do
+ emitPrimCall
+ [ {-no results-} ]
+ (MO_Memcpy (alignmentBytes align))
+ [ dst, src, n ]
+
+-- | Emit a call to @memmove@.
+emitMemmoveCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
+emitMemmoveCall dst src n align = do
+ emitPrimCall
+ [ {- no results -} ]
+ (MO_Memmove (alignmentBytes align))
+ [ dst, src, n ]
+
+-- | Emit a call to @memset@. The second argument must fit inside an
+-- unsigned char.
+emitMemsetCall :: CmmExpr -> CmmExpr -> CmmExpr -> Alignment -> FCode ()
+emitMemsetCall dst c n align = do
+ emitPrimCall
+ [ {- no results -} ]
+ (MO_Memset (alignmentBytes align))
+ [ dst, c, n ]
+
+emitMemcmpCall :: LocalReg -> CmmExpr -> CmmExpr -> CmmExpr -> Int -> FCode ()
+emitMemcmpCall res ptr1 ptr2 n align = do
+ -- 'MO_Memcmp' is assumed to return an 32bit 'CInt' because all
+ -- code-gens currently call out to the @memcmp(3)@ C function.
+ -- This was easier than moving the sign-extensions into
+ -- all the code-gens.
+ dflags <- getDynFlags
+ let is32Bit = typeWidth (localRegType res) == W32
+
+ cres <- if is32Bit
+ then return res
+ else newTemp b32
+
+ emitPrimCall
+ [ cres ]
+ (MO_Memcmp align)
+ [ ptr1, ptr2, n ]
+
+ unless is32Bit $ do
+ emit $ mkAssign (CmmLocal res)
+ (CmmMachOp
+ (mo_s_32ToWord dflags)
+ [(CmmReg (CmmLocal cres))])
+
+emitBSwapCall :: LocalReg -> CmmExpr -> Width -> FCode ()
+emitBSwapCall res x width = do
+ emitPrimCall
+ [ res ]
+ (MO_BSwap width)
+ [ x ]
+
+emitBRevCall :: LocalReg -> CmmExpr -> Width -> FCode ()
+emitBRevCall res x width = do
+ emitPrimCall
+ [ res ]
+ (MO_BRev width)
+ [ x ]
+
+emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
+emitPopCntCall res x width = do
+ emitPrimCall
+ [ res ]
+ (MO_PopCnt width)
+ [ x ]
+
+emitPdepCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
+emitPdepCall res x y width = do
+ emitPrimCall
+ [ res ]
+ (MO_Pdep width)
+ [ x, y ]
+
+emitPextCall :: LocalReg -> CmmExpr -> CmmExpr -> Width -> FCode ()
+emitPextCall res x y width = do
+ emitPrimCall
+ [ res ]
+ (MO_Pext width)
+ [ x, y ]
+
+emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
+emitClzCall res x width = do
+ emitPrimCall
+ [ res ]
+ (MO_Clz width)
+ [ x ]
+
+emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
+emitCtzCall res x width = do
+ emitPrimCall
+ [ res ]
+ (MO_Ctz width)
+ [ x ]
diff --git a/compiler/GHC/StgToCmm/Prof.hs b/compiler/GHC/StgToCmm/Prof.hs
new file mode 100644
index 0000000000..ce8ef61f17
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Prof.hs
@@ -0,0 +1,360 @@
+-----------------------------------------------------------------------------
+--
+-- Code generation for profiling
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.Prof (
+ initCostCentres, ccType, ccsType,
+ mkCCostCentre, mkCCostCentreStack,
+
+ -- Cost-centre Profiling
+ dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
+ enterCostCentreThunk, enterCostCentreFun,
+ costCentreFrom,
+ storeCurCCS,
+ emitSetCCC,
+
+ saveCurrentCostCentre, restoreCurrentCostCentre,
+
+ -- Lag/drag/void stuff
+ ldvEnter, ldvEnterClosure, ldvRecordCreate
+ ) where
+
+import GhcPrelude
+
+import GHC.StgToCmm.Closure
+import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Monad
+import SMRep
+
+import MkGraph
+import Cmm
+import CmmUtils
+import CLabel
+
+import CostCentre
+import DynFlags
+import FastString
+import Module
+import Outputable
+
+import Control.Monad
+import Data.Char (ord)
+
+-----------------------------------------------------------------------------
+--
+-- Cost-centre-stack Profiling
+--
+-----------------------------------------------------------------------------
+
+-- Expression representing the current cost centre stack
+ccsType :: DynFlags -> CmmType -- Type of a cost-centre stack
+ccsType = bWord
+
+ccType :: DynFlags -> CmmType -- Type of a cost centre
+ccType = bWord
+
+storeCurCCS :: CmmExpr -> CmmAGraph
+storeCurCCS e = mkAssign cccsReg e
+
+mkCCostCentre :: CostCentre -> CmmLit
+mkCCostCentre cc = CmmLabel (mkCCLabel cc)
+
+mkCCostCentreStack :: CostCentreStack -> CmmLit
+mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs)
+
+costCentreFrom :: DynFlags
+ -> CmmExpr -- A closure pointer
+ -> CmmExpr -- The cost centre from that closure
+costCentreFrom dflags cl = CmmLoad (cmmOffsetB dflags cl (oFFSET_StgHeader_ccs dflags)) (ccsType dflags)
+
+-- | The profiling header words in a static closure
+staticProfHdr :: DynFlags -> CostCentreStack -> [CmmLit]
+staticProfHdr dflags ccs
+ = ifProfilingL dflags [mkCCostCentreStack ccs, staticLdvInit dflags]
+
+-- | Profiling header words in a dynamic closure
+dynProfHdr :: DynFlags -> CmmExpr -> [CmmExpr]
+dynProfHdr dflags ccs = ifProfilingL dflags [ccs, dynLdvInit dflags]
+
+-- | Initialise the profiling field of an update frame
+initUpdFrameProf :: CmmExpr -> FCode ()
+initUpdFrameProf frame
+ = ifProfiling $ -- frame->header.prof.ccs = CCCS
+ do dflags <- getDynFlags
+ emitStore (cmmOffset dflags frame (oFFSET_StgHeader_ccs dflags)) cccsExpr
+ -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0)
+ -- is unnecessary because it is not used anyhow.
+
+---------------------------------------------------------------------------
+-- Saving and restoring the current cost centre
+---------------------------------------------------------------------------
+
+{- Note [Saving the current cost centre]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The current cost centre is like a global register. Like other
+global registers, it's a caller-saves one. But consider
+ case (f x) of (p,q) -> rhs
+Since 'f' may set the cost centre, we must restore it
+before resuming rhs. So we want code like this:
+ local_cc = CCC -- save
+ r = f( x )
+ CCC = local_cc -- restore
+That is, we explicitly "save" the current cost centre in
+a LocalReg, local_cc; and restore it after the call. The
+C-- infrastructure will arrange to save local_cc across the
+call.
+
+The same goes for join points;
+ let j x = join-stuff
+ in blah-blah
+We want this kind of code:
+ local_cc = CCC -- save
+ blah-blah
+ J:
+ CCC = local_cc -- restore
+-}
+
+saveCurrentCostCentre :: FCode (Maybe LocalReg)
+ -- Returns Nothing if profiling is off
+saveCurrentCostCentre
+ = do dflags <- getDynFlags
+ if not (gopt Opt_SccProfilingOn dflags)
+ then return Nothing
+ else do local_cc <- newTemp (ccType dflags)
+ emitAssign (CmmLocal local_cc) cccsExpr
+ return (Just local_cc)
+
+restoreCurrentCostCentre :: Maybe LocalReg -> FCode ()
+restoreCurrentCostCentre Nothing
+ = return ()
+restoreCurrentCostCentre (Just local_cc)
+ = emit (storeCurCCS (CmmReg (CmmLocal local_cc)))
+
+
+-------------------------------------------------------------------------------
+-- Recording allocation in a cost centre
+-------------------------------------------------------------------------------
+
+-- | Record the allocation of a closure. The CmmExpr is the cost
+-- centre stack to which to attribute the allocation.
+profDynAlloc :: SMRep -> CmmExpr -> FCode ()
+profDynAlloc rep ccs
+ = ifProfiling $
+ do dflags <- getDynFlags
+ profAlloc (mkIntExpr dflags (heapClosureSizeW dflags rep)) ccs
+
+-- | Record the allocation of a closure (size is given by a CmmExpr)
+-- The size must be in words, because the allocation counter in a CCS counts
+-- in words.
+profAlloc :: CmmExpr -> CmmExpr -> FCode ()
+profAlloc words ccs
+ = ifProfiling $
+ do dflags <- getDynFlags
+ let alloc_rep = rEP_CostCentreStack_mem_alloc dflags
+ emit (addToMemE alloc_rep
+ (cmmOffsetB dflags ccs (oFFSET_CostCentreStack_mem_alloc dflags))
+ (CmmMachOp (MO_UU_Conv (wordWidth dflags) (typeWidth alloc_rep)) $
+ [CmmMachOp (mo_wordSub dflags) [words,
+ mkIntExpr dflags (profHdrSize dflags)]]))
+ -- subtract the "profiling overhead", which is the
+ -- profiling header in a closure.
+
+-- -----------------------------------------------------------------------
+-- Setting the current cost centre on entry to a closure
+
+enterCostCentreThunk :: CmmExpr -> FCode ()
+enterCostCentreThunk closure =
+ ifProfiling $ do
+ dflags <- getDynFlags
+ emit $ storeCurCCS (costCentreFrom dflags closure)
+
+enterCostCentreFun :: CostCentreStack -> CmmExpr -> FCode ()
+enterCostCentreFun ccs closure =
+ ifProfiling $ do
+ if isCurrentCCS ccs
+ then do dflags <- getDynFlags
+ emitRtsCall rtsUnitId (fsLit "enterFunCCS")
+ [(baseExpr, AddrHint),
+ (costCentreFrom dflags closure, AddrHint)] False
+ else return () -- top-level function, nothing to do
+
+ifProfiling :: FCode () -> FCode ()
+ifProfiling code
+ = do dflags <- getDynFlags
+ if gopt Opt_SccProfilingOn dflags
+ then code
+ else return ()
+
+ifProfilingL :: DynFlags -> [a] -> [a]
+ifProfilingL dflags xs
+ | gopt Opt_SccProfilingOn dflags = xs
+ | otherwise = []
+
+
+---------------------------------------------------------------
+-- Initialising Cost Centres & CCSs
+---------------------------------------------------------------
+
+initCostCentres :: CollectedCCs -> FCode ()
+-- Emit the declarations
+initCostCentres (local_CCs, singleton_CCSs)
+ = do dflags <- getDynFlags
+ when (gopt Opt_SccProfilingOn dflags) $
+ do mapM_ emitCostCentreDecl local_CCs
+ mapM_ emitCostCentreStackDecl singleton_CCSs
+
+
+emitCostCentreDecl :: CostCentre -> FCode ()
+emitCostCentreDecl cc = do
+ { dflags <- getDynFlags
+ ; let is_caf | isCafCC cc = mkIntCLit dflags (ord 'c') -- 'c' == is a CAF
+ | otherwise = zero dflags
+ -- NB. bytesFS: we want the UTF-8 bytes here (#5559)
+ ; label <- newByteStringCLit (bytesFS $ costCentreUserNameFS cc)
+ ; modl <- newByteStringCLit (bytesFS $ Module.moduleNameFS
+ $ Module.moduleName
+ $ cc_mod cc)
+ ; loc <- newByteStringCLit $ bytesFS $ mkFastString $
+ showPpr dflags (costCentreSrcSpan cc)
+ -- XXX going via FastString to get UTF-8 encoding is silly
+ ; let
+ lits = [ zero dflags, -- StgInt ccID,
+ label, -- char *label,
+ modl, -- char *module,
+ loc, -- char *srcloc,
+ zero64, -- StgWord64 mem_alloc
+ zero dflags, -- StgWord time_ticks
+ is_caf, -- StgInt is_caf
+ zero dflags -- struct _CostCentre *link
+ ]
+ ; emitDataLits (mkCCLabel cc) lits
+ }
+
+emitCostCentreStackDecl :: CostCentreStack -> FCode ()
+emitCostCentreStackDecl ccs
+ = case maybeSingletonCCS ccs of
+ Just cc ->
+ do dflags <- getDynFlags
+ let mk_lits cc = zero dflags :
+ mkCCostCentre cc :
+ replicate (sizeof_ccs_words dflags - 2) (zero dflags)
+ -- Note: to avoid making any assumptions about how the
+ -- C compiler (that compiles the RTS, in particular) does
+ -- layouts of structs containing long-longs, simply
+ -- pad out the struct with zero words until we hit the
+ -- size of the overall struct (which we get via DerivedConstants.h)
+ emitDataLits (mkCCSLabel ccs) (mk_lits cc)
+ Nothing -> pprPanic "emitCostCentreStackDecl" (ppr ccs)
+
+zero :: DynFlags -> CmmLit
+zero dflags = mkIntCLit dflags 0
+zero64 :: CmmLit
+zero64 = CmmInt 0 W64
+
+sizeof_ccs_words :: DynFlags -> Int
+sizeof_ccs_words dflags
+ -- round up to the next word.
+ | ms == 0 = ws
+ | otherwise = ws + 1
+ where
+ (ws,ms) = sIZEOF_CostCentreStack dflags `divMod` wORD_SIZE dflags
+
+-- ---------------------------------------------------------------------------
+-- Set the current cost centre stack
+
+emitSetCCC :: CostCentre -> Bool -> Bool -> FCode ()
+emitSetCCC cc tick push
+ = do dflags <- getDynFlags
+ if not (gopt Opt_SccProfilingOn dflags)
+ then return ()
+ else do tmp <- newTemp (ccsType dflags)
+ pushCostCentre tmp cccsExpr cc
+ when tick $ emit (bumpSccCount dflags (CmmReg (CmmLocal tmp)))
+ when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp)))
+
+pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode ()
+pushCostCentre result ccs cc
+ = emitRtsCallWithResult result AddrHint
+ rtsUnitId
+ (fsLit "pushCostCentre") [(ccs,AddrHint),
+ (CmmLit (mkCCostCentre cc), AddrHint)]
+ False
+
+bumpSccCount :: DynFlags -> CmmExpr -> CmmAGraph
+bumpSccCount dflags ccs
+ = addToMem (rEP_CostCentreStack_scc_count dflags)
+ (cmmOffsetB dflags ccs (oFFSET_CostCentreStack_scc_count dflags)) 1
+
+-----------------------------------------------------------------------------
+--
+-- Lag/drag/void stuff
+--
+-----------------------------------------------------------------------------
+
+--
+-- Initial value for the LDV field in a static closure
+--
+staticLdvInit :: DynFlags -> CmmLit
+staticLdvInit = zeroCLit
+
+--
+-- Initial value of the LDV field in a dynamic closure
+--
+dynLdvInit :: DynFlags -> CmmExpr
+dynLdvInit dflags = -- (era << LDV_SHIFT) | LDV_STATE_CREATE
+ CmmMachOp (mo_wordOr dflags) [
+ CmmMachOp (mo_wordShl dflags) [loadEra dflags, mkIntExpr dflags (lDV_SHIFT dflags)],
+ CmmLit (mkWordCLit dflags (iLDV_STATE_CREATE dflags))
+ ]
+
+--
+-- Initialise the LDV word of a new closure
+--
+ldvRecordCreate :: CmmExpr -> FCode ()
+ldvRecordCreate closure = do
+ dflags <- getDynFlags
+ emit $ mkStore (ldvWord dflags closure) (dynLdvInit dflags)
+
+--
+-- | Called when a closure is entered, marks the closure as having
+-- been "used". The closure is not an "inherently used" one. The
+-- closure is not @IND@ because that is not considered for LDV profiling.
+--
+ldvEnterClosure :: ClosureInfo -> CmmReg -> FCode ()
+ldvEnterClosure closure_info node_reg = do
+ dflags <- getDynFlags
+ let tag = funTag dflags closure_info
+ -- don't forget to substract node's tag
+ ldvEnter (cmmOffsetB dflags (CmmReg node_reg) (-tag))
+
+ldvEnter :: CmmExpr -> FCode ()
+-- Argument is a closure pointer
+ldvEnter cl_ptr = do
+ dflags <- getDynFlags
+ let -- don't forget to substract node's tag
+ ldv_wd = ldvWord dflags cl_ptr
+ new_ldv_wd = cmmOrWord dflags (cmmAndWord dflags (CmmLoad ldv_wd (bWord dflags))
+ (CmmLit (mkWordCLit dflags (iLDV_CREATE_MASK dflags))))
+ (cmmOrWord dflags (loadEra dflags) (CmmLit (mkWordCLit dflags (iLDV_STATE_USE dflags))))
+ ifProfiling $
+ -- if (era > 0) {
+ -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
+ -- era | LDV_STATE_USE }
+ emit =<< mkCmmIfThenElse (CmmMachOp (mo_wordUGt dflags) [loadEra dflags, CmmLit (zeroCLit dflags)])
+ (mkStore ldv_wd new_ldv_wd)
+ mkNop
+
+loadEra :: DynFlags -> CmmExpr
+loadEra dflags = CmmMachOp (MO_UU_Conv (cIntWidth dflags) (wordWidth dflags))
+ [CmmLoad (mkLblExpr (mkCmmDataLabel rtsUnitId (fsLit "era")))
+ (cInt dflags)]
+
+ldvWord :: DynFlags -> CmmExpr -> CmmExpr
+-- Takes the address of a closure, and returns
+-- the address of the LDV word in the closure
+ldvWord dflags closure_ptr
+ = cmmOffsetB dflags closure_ptr (oFFSET_StgHeader_ldvw dflags)
diff --git a/compiler/GHC/StgToCmm/Ticky.hs b/compiler/GHC/StgToCmm/Ticky.hs
new file mode 100644
index 0000000000..06ef520c0d
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Ticky.hs
@@ -0,0 +1,682 @@
+{-# LANGUAGE BangPatterns #-}
+
+-----------------------------------------------------------------------------
+--
+-- Code generation for ticky-ticky profiling
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+{- OVERVIEW: ticky ticky profiling
+
+Please see
+https://gitlab.haskell.org/ghc/ghc/wikis/debugging/ticky-ticky and also
+edit it and the rest of this comment to keep them up-to-date if you
+change ticky-ticky. Thanks!
+
+ *** All allocation ticky numbers are in bytes. ***
+
+Some of the relevant source files:
+
+ ***not necessarily an exhaustive list***
+
+ * some codeGen/ modules import this one
+
+ * this module imports cmm/CLabel.hs to manage labels
+
+ * cmm/CmmParse.y expands some macros using generators defined in
+ this module
+
+ * includes/stg/Ticky.h declares all of the global counters
+
+ * includes/rts/Ticky.h declares the C data type for an
+ STG-declaration's counters
+
+ * some macros defined in includes/Cmm.h (and used within the RTS's
+ CMM code) update the global ticky counters
+
+ * at the end of execution rts/Ticky.c generates the final report
+ +RTS -r<report-file> -RTS
+
+The rts/Ticky.c function that generates the report includes an
+STG-declaration's ticky counters if
+
+ * that declaration was entered, or
+
+ * it was allocated (if -ticky-allocd)
+
+On either of those events, the counter is "registered" by adding it to
+a linked list; cf the CMM generated by registerTickyCtr.
+
+Ticky-ticky profiling has evolved over many years. Many of the
+counters from its most sophisticated days are no longer
+active/accurate. As the RTS has changed, sometimes the ticky code for
+relevant counters was not accordingly updated. Unfortunately, neither
+were the comments.
+
+As of March 2013, there still exist deprecated code and comments in
+the code generator as well as the RTS because:
+
+ * I don't know what is out-of-date versus merely commented out for
+ momentary convenience, and
+
+ * someone else might know how to repair it!
+
+-}
+
+module GHC.StgToCmm.Ticky (
+ withNewTickyCounterFun,
+ withNewTickyCounterLNE,
+ withNewTickyCounterThunk,
+ withNewTickyCounterStdThunk,
+ withNewTickyCounterCon,
+
+ tickyDynAlloc,
+ tickyAllocHeap,
+
+ tickyAllocPrim,
+ tickyAllocThunk,
+ tickyAllocPAP,
+ tickyHeapCheck,
+ tickyStackCheck,
+
+ tickyUnknownCall, tickyDirectCall,
+
+ tickyPushUpdateFrame,
+ tickyUpdateFrameOmitted,
+
+ tickyEnterDynCon,
+ tickyEnterStaticCon,
+ tickyEnterViaNode,
+
+ tickyEnterFun,
+ tickyEnterThunk, tickyEnterStdThunk, -- dynamic non-value
+ -- thunks only
+ tickyEnterLNE,
+
+ tickyUpdateBhCaf,
+ tickyBlackHole,
+ tickyUnboxedTupleReturn,
+ tickyReturnOldCon, tickyReturnNewCon,
+
+ tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs,
+ tickySlowCall, tickySlowCallPat,
+ ) where
+
+import GhcPrelude
+
+import GHC.StgToCmm.ArgRep ( slowCallPattern , toArgRep , argRepString )
+import GHC.StgToCmm.Closure
+import GHC.StgToCmm.Utils
+import GHC.StgToCmm.Monad
+
+import StgSyn
+import CmmExpr
+import MkGraph
+import CmmUtils
+import CLabel
+import SMRep
+
+import Module
+import Name
+import Id
+import BasicTypes
+import FastString
+import Outputable
+import Util
+
+import DynFlags
+
+-- Turgid imports for showTypeCategory
+import PrelNames
+import TcType
+import Type
+import TyCon
+
+import Data.Maybe
+import qualified Data.Char
+import Control.Monad ( when )
+
+-----------------------------------------------------------------------------
+--
+-- Ticky-ticky profiling
+--
+-----------------------------------------------------------------------------
+
+data TickyClosureType
+ = TickyFun
+ Bool -- True <-> single entry
+ | TickyCon
+ | TickyThunk
+ Bool -- True <-> updateable
+ Bool -- True <-> standard thunk (AP or selector), has no entry counter
+ | TickyLNE
+
+withNewTickyCounterFun :: Bool -> Name -> [NonVoid Id] -> FCode a -> FCode a
+withNewTickyCounterFun single_entry = withNewTickyCounter (TickyFun single_entry)
+
+withNewTickyCounterLNE :: Name -> [NonVoid Id] -> FCode a -> FCode a
+withNewTickyCounterLNE nm args code = do
+ b <- tickyLNEIsOn
+ if not b then code else withNewTickyCounter TickyLNE nm args code
+
+thunkHasCounter :: Bool -> FCode Bool
+thunkHasCounter isStatic = do
+ b <- tickyDynThunkIsOn
+ pure (not isStatic && b)
+
+withNewTickyCounterThunk
+ :: Bool -- ^ static
+ -> Bool -- ^ updateable
+ -> Name
+ -> FCode a
+ -> FCode a
+withNewTickyCounterThunk isStatic isUpdatable name code = do
+ has_ctr <- thunkHasCounter isStatic
+ if not has_ctr
+ then code
+ else withNewTickyCounter (TickyThunk isUpdatable False) name [] code
+
+withNewTickyCounterStdThunk
+ :: Bool -- ^ updateable
+ -> Name
+ -> FCode a
+ -> FCode a
+withNewTickyCounterStdThunk isUpdatable name code = do
+ has_ctr <- thunkHasCounter False
+ if not has_ctr
+ then code
+ else withNewTickyCounter (TickyThunk isUpdatable True) name [] code
+
+withNewTickyCounterCon
+ :: Name
+ -> FCode a
+ -> FCode a
+withNewTickyCounterCon name code = do
+ has_ctr <- thunkHasCounter False
+ if not has_ctr
+ then code
+ else withNewTickyCounter TickyCon name [] code
+
+-- args does not include the void arguments
+withNewTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode a -> FCode a
+withNewTickyCounter cloType name args m = do
+ lbl <- emitTickyCounter cloType name args
+ setTickyCtrLabel lbl m
+
+emitTickyCounter :: TickyClosureType -> Name -> [NonVoid Id] -> FCode CLabel
+emitTickyCounter cloType name args
+ = let ctr_lbl = mkRednCountsLabel name in
+ (>> return ctr_lbl) $
+ ifTicky $ do
+ { dflags <- getDynFlags
+ ; parent <- getTickyCtrLabel
+ ; mod_name <- getModuleName
+
+ -- When printing the name of a thing in a ticky file, we
+ -- want to give the module name even for *local* things. We
+ -- print just "x (M)" rather that "M.x" to distinguish them
+ -- from the global kind.
+ ; let ppr_for_ticky_name :: SDoc
+ ppr_for_ticky_name =
+ let n = ppr name
+ ext = case cloType of
+ TickyFun single_entry -> parens $ hcat $ punctuate comma $
+ [text "fun"] ++ [text "se"|single_entry]
+ TickyCon -> parens (text "con")
+ TickyThunk upd std -> parens $ hcat $ punctuate comma $
+ [text "thk"] ++ [text "se"|not upd] ++ [text "std"|std]
+ TickyLNE | isInternalName name -> parens (text "LNE")
+ | otherwise -> panic "emitTickyCounter: how is this an external LNE?"
+ p = case hasHaskellName parent of
+ -- NB the default "top" ticky ctr does not
+ -- have a Haskell name
+ Just pname -> text "in" <+> ppr (nameUnique pname)
+ _ -> empty
+ in if isInternalName name
+ then n <+> parens (ppr mod_name) <+> ext <+> p
+ else n <+> ext <+> p
+
+ ; fun_descr_lit <- newStringCLit $ showSDocDebug dflags ppr_for_ticky_name
+ ; arg_descr_lit <- newStringCLit $ map (showTypeCategory . idType . fromNonVoid) args
+ ; emitDataLits ctr_lbl
+ -- Must match layout of includes/rts/Ticky.h's StgEntCounter
+ --
+ -- krc: note that all the fields are I32 now; some were I16
+ -- before, but the code generator wasn't handling that
+ -- properly and it led to chaos, panic and disorder.
+ [ mkIntCLit dflags 0, -- registered?
+ mkIntCLit dflags (length args), -- Arity
+ mkIntCLit dflags 0, -- Heap allocated for this thing
+ fun_descr_lit,
+ arg_descr_lit,
+ zeroCLit dflags, -- Entries into this thing
+ zeroCLit dflags, -- Heap allocated by this thing
+ zeroCLit dflags -- Link to next StgEntCounter
+ ]
+ }
+
+-- -----------------------------------------------------------------------------
+-- Ticky stack frames
+
+tickyPushUpdateFrame, tickyUpdateFrameOmitted :: FCode ()
+tickyPushUpdateFrame = ifTicky $ bumpTickyCounter (fsLit "UPDF_PUSHED_ctr")
+tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter (fsLit "UPDF_OMITTED_ctr")
+
+-- -----------------------------------------------------------------------------
+-- Ticky entries
+
+-- NB the name-specific entries are only available for names that have
+-- dedicated Cmm code. As far as I know, this just rules out
+-- constructor thunks. For them, there is no CMM code block to put the
+-- bump of name-specific ticky counter into. On the other hand, we can
+-- still track allocation their allocation.
+
+tickyEnterDynCon, tickyEnterStaticCon, tickyEnterViaNode :: FCode ()
+tickyEnterDynCon = ifTicky $ bumpTickyCounter (fsLit "ENT_DYN_CON_ctr")
+tickyEnterStaticCon = ifTicky $ bumpTickyCounter (fsLit "ENT_STATIC_CON_ctr")
+tickyEnterViaNode = ifTicky $ bumpTickyCounter (fsLit "ENT_VIA_NODE_ctr")
+
+tickyEnterThunk :: ClosureInfo -> FCode ()
+tickyEnterThunk cl_info
+ = ifTicky $ do
+ { bumpTickyCounter ctr
+ ; has_ctr <- thunkHasCounter static
+ ; when has_ctr $ do
+ ticky_ctr_lbl <- getTickyCtrLabel
+ registerTickyCtrAtEntryDyn ticky_ctr_lbl
+ bumpTickyEntryCount ticky_ctr_lbl }
+ where
+ updatable = closureSingleEntry cl_info
+ static = isStaticClosure cl_info
+
+ ctr | static = if updatable then fsLit "ENT_STATIC_THK_SINGLE_ctr"
+ else fsLit "ENT_STATIC_THK_MANY_ctr"
+ | otherwise = if updatable then fsLit "ENT_DYN_THK_SINGLE_ctr"
+ else fsLit "ENT_DYN_THK_MANY_ctr"
+
+tickyEnterStdThunk :: ClosureInfo -> FCode ()
+tickyEnterStdThunk = tickyEnterThunk
+
+tickyBlackHole :: Bool{-updatable-} -> FCode ()
+tickyBlackHole updatable
+ = ifTicky (bumpTickyCounter ctr)
+ where
+ ctr | updatable = (fsLit "UPD_BH_SINGLE_ENTRY_ctr")
+ | otherwise = (fsLit "UPD_BH_UPDATABLE_ctr")
+
+tickyUpdateBhCaf :: ClosureInfo -> FCode ()
+tickyUpdateBhCaf cl_info
+ = ifTicky (bumpTickyCounter ctr)
+ where
+ ctr | closureUpdReqd cl_info = (fsLit "UPD_CAF_BH_SINGLE_ENTRY_ctr")
+ | otherwise = (fsLit "UPD_CAF_BH_UPDATABLE_ctr")
+
+tickyEnterFun :: ClosureInfo -> FCode ()
+tickyEnterFun cl_info = ifTicky $ do
+ ctr_lbl <- getTickyCtrLabel
+
+ if isStaticClosure cl_info
+ then do bumpTickyCounter (fsLit "ENT_STATIC_FUN_DIRECT_ctr")
+ registerTickyCtr ctr_lbl
+ else do bumpTickyCounter (fsLit "ENT_DYN_FUN_DIRECT_ctr")
+ registerTickyCtrAtEntryDyn ctr_lbl
+
+ bumpTickyEntryCount ctr_lbl
+
+tickyEnterLNE :: FCode ()
+tickyEnterLNE = ifTicky $ do
+ bumpTickyCounter (fsLit "ENT_LNE_ctr")
+ ifTickyLNE $ do
+ ctr_lbl <- getTickyCtrLabel
+ registerTickyCtr ctr_lbl
+ bumpTickyEntryCount ctr_lbl
+
+-- needn't register a counter upon entry if
+--
+-- 1) it's for a dynamic closure, and
+--
+-- 2) -ticky-allocd is on
+--
+-- since the counter was registered already upon being alloc'd
+registerTickyCtrAtEntryDyn :: CLabel -> FCode ()
+registerTickyCtrAtEntryDyn ctr_lbl = do
+ already_registered <- tickyAllocdIsOn
+ when (not already_registered) $ registerTickyCtr ctr_lbl
+
+registerTickyCtr :: CLabel -> FCode ()
+-- Register a ticky counter
+-- if ( ! f_ct.registeredp ) {
+-- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */
+-- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */
+-- f_ct.registeredp = 1 }
+registerTickyCtr ctr_lbl = do
+ dflags <- getDynFlags
+ let
+ -- krc: code generator doesn't handle Not, so we test for Eq 0 instead
+ test = CmmMachOp (MO_Eq (wordWidth dflags))
+ [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl
+ (oFFSET_StgEntCounter_registeredp dflags))) (bWord dflags),
+ zeroExpr dflags]
+ register_stmts
+ = [ mkStore (CmmLit (cmmLabelOffB ctr_lbl (oFFSET_StgEntCounter_link dflags)))
+ (CmmLoad ticky_entry_ctrs (bWord dflags))
+ , mkStore ticky_entry_ctrs (mkLblExpr ctr_lbl)
+ , mkStore (CmmLit (cmmLabelOffB ctr_lbl
+ (oFFSET_StgEntCounter_registeredp dflags)))
+ (mkIntExpr dflags 1) ]
+ ticky_entry_ctrs = mkLblExpr (mkCmmDataLabel rtsUnitId (fsLit "ticky_entry_ctrs"))
+ emit =<< mkCmmIfThen test (catAGraphs register_stmts)
+
+tickyReturnOldCon, tickyReturnNewCon :: RepArity -> FCode ()
+tickyReturnOldCon arity
+ = ifTicky $ do { bumpTickyCounter (fsLit "RET_OLD_ctr")
+ ; bumpHistogram (fsLit "RET_OLD_hst") arity }
+tickyReturnNewCon arity
+ = ifTicky $ do { bumpTickyCounter (fsLit "RET_NEW_ctr")
+ ; bumpHistogram (fsLit "RET_NEW_hst") arity }
+
+tickyUnboxedTupleReturn :: RepArity -> FCode ()
+tickyUnboxedTupleReturn arity
+ = ifTicky $ do { bumpTickyCounter (fsLit "RET_UNBOXED_TUP_ctr")
+ ; bumpHistogram (fsLit "RET_UNBOXED_TUP_hst") arity }
+
+-- -----------------------------------------------------------------------------
+-- Ticky calls
+
+-- Ticks at a *call site*:
+tickyDirectCall :: RepArity -> [StgArg] -> FCode ()
+tickyDirectCall arity args
+ | args `lengthIs` arity = tickyKnownCallExact
+ | otherwise = do tickyKnownCallExtraArgs
+ tickySlowCallPat (map argPrimRep (drop arity args))
+
+tickyKnownCallTooFewArgs :: FCode ()
+tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_TOO_FEW_ARGS_ctr")
+
+tickyKnownCallExact :: FCode ()
+tickyKnownCallExact = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_ctr")
+
+tickyKnownCallExtraArgs :: FCode ()
+tickyKnownCallExtraArgs = ifTicky $ bumpTickyCounter (fsLit "KNOWN_CALL_EXTRA_ARGS_ctr")
+
+tickyUnknownCall :: FCode ()
+tickyUnknownCall = ifTicky $ bumpTickyCounter (fsLit "UNKNOWN_CALL_ctr")
+
+-- Tick for the call pattern at slow call site (i.e. in addition to
+-- tickyUnknownCall, tickyKnownCallExtraArgs, etc.)
+tickySlowCall :: LambdaFormInfo -> [StgArg] -> FCode ()
+tickySlowCall _ [] = return ()
+tickySlowCall lf_info args = do
+ -- see Note [Ticky for slow calls]
+ if isKnownFun lf_info
+ then tickyKnownCallTooFewArgs
+ else tickyUnknownCall
+ tickySlowCallPat (map argPrimRep args)
+
+tickySlowCallPat :: [PrimRep] -> FCode ()
+tickySlowCallPat args = ifTicky $
+ let argReps = map toArgRep args
+ (_, n_matched) = slowCallPattern argReps
+ in if n_matched > 0 && args `lengthIs` n_matched
+ then bumpTickyLbl $ mkRtsSlowFastTickyCtrLabel $ concatMap (map Data.Char.toLower . argRepString) argReps
+ else bumpTickyCounter $ fsLit "VERY_SLOW_CALL_ctr"
+
+{-
+
+Note [Ticky for slow calls]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Terminology is unfortunately a bit mixed up for these calls. codeGen
+uses "slow call" to refer to unknown calls and under-saturated known
+calls.
+
+Nowadays, though (ie as of the eval/apply paper), the significantly
+slower calls are actually just a subset of these: the ones with no
+built-in argument pattern (cf GHC.StgToCmm.ArgRep.slowCallPattern)
+
+So for ticky profiling, we split slow calls into
+"SLOW_CALL_fast_<pattern>_ctr" (those matching a built-in pattern) and
+VERY_SLOW_CALL_ctr (those without a built-in pattern; these are very
+bad for both space and time).
+
+-}
+
+-- -----------------------------------------------------------------------------
+-- Ticky allocation
+
+tickyDynAlloc :: Maybe Id -> SMRep -> LambdaFormInfo -> FCode ()
+-- Called when doing a dynamic heap allocation; the LambdaFormInfo
+-- used to distinguish between closure types
+--
+-- TODO what else to count while we're here?
+tickyDynAlloc mb_id rep lf = ifTicky $ getDynFlags >>= \dflags ->
+ let bytes = wORD_SIZE dflags * heapClosureSizeW dflags rep
+
+ countGlobal tot ctr = do
+ bumpTickyCounterBy tot bytes
+ bumpTickyCounter ctr
+ countSpecific = ifTickyAllocd $ case mb_id of
+ Nothing -> return ()
+ Just id -> do
+ let ctr_lbl = mkRednCountsLabel (idName id)
+ registerTickyCtr ctr_lbl
+ bumpTickyAllocd ctr_lbl bytes
+
+ -- TODO are we still tracking "good stuff" (_gds) versus
+ -- administrative (_adm) versus slop (_slp)? I'm going with all _gds
+ -- for now, since I don't currently know neither if we do nor how to
+ -- distinguish. NSF Mar 2013
+
+ in case () of
+ _ | isConRep rep ->
+ ifTickyDynThunk countSpecific >>
+ countGlobal (fsLit "ALLOC_CON_gds") (fsLit "ALLOC_CON_ctr")
+ | isThunkRep rep ->
+ ifTickyDynThunk countSpecific >>
+ if lfUpdatable lf
+ then countGlobal (fsLit "ALLOC_THK_gds") (fsLit "ALLOC_UP_THK_ctr")
+ else countGlobal (fsLit "ALLOC_THK_gds") (fsLit "ALLOC_SE_THK_ctr")
+ | isFunRep rep ->
+ countSpecific >>
+ countGlobal (fsLit "ALLOC_FUN_gds") (fsLit "ALLOC_FUN_ctr")
+ | otherwise -> panic "How is this heap object not a con, thunk, or fun?"
+
+
+
+tickyAllocHeap ::
+ Bool -> -- is this a genuine allocation? As opposed to
+ -- GHC.StgToCmm.Layout.adjustHpBackwards
+ VirtualHpOffset -> FCode ()
+-- Called when doing a heap check [TICK_ALLOC_HEAP]
+-- Must be lazy in the amount of allocation!
+tickyAllocHeap genuine hp
+ = ifTicky $
+ do { dflags <- getDynFlags
+ ; ticky_ctr <- getTickyCtrLabel
+ ; emit $ catAGraphs $
+ -- only test hp from within the emit so that the monadic
+ -- computation itself is not strict in hp (cf knot in
+ -- GHC.StgToCmm.Monad.getHeapUsage)
+ if hp == 0 then []
+ else let !bytes = wORD_SIZE dflags * hp in [
+ -- Bump the allocation total in the closure's StgEntCounter
+ addToMem (rEP_StgEntCounter_allocs dflags)
+ (CmmLit (cmmLabelOffB ticky_ctr (oFFSET_StgEntCounter_allocs dflags)))
+ bytes,
+ -- Bump the global allocation total ALLOC_HEAP_tot
+ addToMemLbl (bWord dflags)
+ (mkCmmDataLabel rtsUnitId (fsLit "ALLOC_HEAP_tot"))
+ bytes,
+ -- Bump the global allocation counter ALLOC_HEAP_ctr
+ if not genuine then mkNop
+ else addToMemLbl (bWord dflags)
+ (mkCmmDataLabel rtsUnitId (fsLit "ALLOC_HEAP_ctr"))
+ 1
+ ]}
+
+
+--------------------------------------------------------------------------------
+-- these three are only called from CmmParse.y (ie ultimately from the RTS)
+
+-- the units are bytes
+
+tickyAllocPrim :: CmmExpr -- ^ size of the full header, in bytes
+ -> CmmExpr -- ^ size of the payload, in bytes
+ -> CmmExpr -> FCode ()
+tickyAllocPrim _hdr _goods _slop = ifTicky $ do
+ bumpTickyCounter (fsLit "ALLOC_PRIM_ctr")
+ bumpTickyCounterByE (fsLit "ALLOC_PRIM_adm") _hdr
+ bumpTickyCounterByE (fsLit "ALLOC_PRIM_gds") _goods
+ bumpTickyCounterByE (fsLit "ALLOC_PRIM_slp") _slop
+
+tickyAllocThunk :: CmmExpr -> CmmExpr -> FCode ()
+tickyAllocThunk _goods _slop = ifTicky $ do
+ -- TODO is it ever called with a Single-Entry thunk?
+ bumpTickyCounter (fsLit "ALLOC_UP_THK_ctr")
+ bumpTickyCounterByE (fsLit "ALLOC_THK_gds") _goods
+ bumpTickyCounterByE (fsLit "ALLOC_THK_slp") _slop
+
+tickyAllocPAP :: CmmExpr -> CmmExpr -> FCode ()
+tickyAllocPAP _goods _slop = ifTicky $ do
+ bumpTickyCounter (fsLit "ALLOC_PAP_ctr")
+ bumpTickyCounterByE (fsLit "ALLOC_PAP_gds") _goods
+ bumpTickyCounterByE (fsLit "ALLOC_PAP_slp") _slop
+
+tickyHeapCheck :: FCode ()
+tickyHeapCheck = ifTicky $ bumpTickyCounter (fsLit "HEAP_CHK_ctr")
+
+tickyStackCheck :: FCode ()
+tickyStackCheck = ifTicky $ bumpTickyCounter (fsLit "STK_CHK_ctr")
+
+-- -----------------------------------------------------------------------------
+-- Ticky utils
+
+ifTicky :: FCode () -> FCode ()
+ifTicky code =
+ getDynFlags >>= \dflags -> when (gopt Opt_Ticky dflags) code
+
+tickyAllocdIsOn :: FCode Bool
+tickyAllocdIsOn = gopt Opt_Ticky_Allocd `fmap` getDynFlags
+
+tickyLNEIsOn :: FCode Bool
+tickyLNEIsOn = gopt Opt_Ticky_LNE `fmap` getDynFlags
+
+tickyDynThunkIsOn :: FCode Bool
+tickyDynThunkIsOn = gopt Opt_Ticky_Dyn_Thunk `fmap` getDynFlags
+
+ifTickyAllocd :: FCode () -> FCode ()
+ifTickyAllocd code = tickyAllocdIsOn >>= \b -> when b code
+
+ifTickyLNE :: FCode () -> FCode ()
+ifTickyLNE code = tickyLNEIsOn >>= \b -> when b code
+
+ifTickyDynThunk :: FCode () -> FCode ()
+ifTickyDynThunk code = tickyDynThunkIsOn >>= \b -> when b code
+
+bumpTickyCounter :: FastString -> FCode ()
+bumpTickyCounter lbl = bumpTickyLbl (mkCmmDataLabel rtsUnitId lbl)
+
+bumpTickyCounterBy :: FastString -> Int -> FCode ()
+bumpTickyCounterBy lbl = bumpTickyLblBy (mkCmmDataLabel rtsUnitId lbl)
+
+bumpTickyCounterByE :: FastString -> CmmExpr -> FCode ()
+bumpTickyCounterByE lbl = bumpTickyLblByE (mkCmmDataLabel rtsUnitId lbl)
+
+bumpTickyEntryCount :: CLabel -> FCode ()
+bumpTickyEntryCount lbl = do
+ dflags <- getDynFlags
+ bumpTickyLit (cmmLabelOffB lbl (oFFSET_StgEntCounter_entry_count dflags))
+
+bumpTickyAllocd :: CLabel -> Int -> FCode ()
+bumpTickyAllocd lbl bytes = do
+ dflags <- getDynFlags
+ bumpTickyLitBy (cmmLabelOffB lbl (oFFSET_StgEntCounter_allocd dflags)) bytes
+
+bumpTickyLbl :: CLabel -> FCode ()
+bumpTickyLbl lhs = bumpTickyLitBy (cmmLabelOffB lhs 0) 1
+
+bumpTickyLblBy :: CLabel -> Int -> FCode ()
+bumpTickyLblBy lhs = bumpTickyLitBy (cmmLabelOffB lhs 0)
+
+bumpTickyLblByE :: CLabel -> CmmExpr -> FCode ()
+bumpTickyLblByE lhs = bumpTickyLitByE (cmmLabelOffB lhs 0)
+
+bumpTickyLit :: CmmLit -> FCode ()
+bumpTickyLit lhs = bumpTickyLitBy lhs 1
+
+bumpTickyLitBy :: CmmLit -> Int -> FCode ()
+bumpTickyLitBy lhs n = do
+ dflags <- getDynFlags
+ emit (addToMem (bWord dflags) (CmmLit lhs) n)
+
+bumpTickyLitByE :: CmmLit -> CmmExpr -> FCode ()
+bumpTickyLitByE lhs e = do
+ dflags <- getDynFlags
+ emit (addToMemE (bWord dflags) (CmmLit lhs) e)
+
+bumpHistogram :: FastString -> Int -> FCode ()
+bumpHistogram lbl n = do
+ dflags <- getDynFlags
+ let offset = n `min` (tICKY_BIN_COUNT dflags - 1)
+ emit (addToMem (bWord dflags)
+ (cmmIndexExpr dflags
+ (wordWidth dflags)
+ (CmmLit (CmmLabel (mkCmmDataLabel rtsUnitId lbl)))
+ (CmmLit (CmmInt (fromIntegral offset) (wordWidth dflags))))
+ 1)
+
+------------------------------------------------------------------
+-- Showing the "type category" for ticky-ticky profiling
+
+showTypeCategory :: Type -> Char
+ {-
+ + dictionary
+
+ > function
+
+ {C,I,F,D,W} char, int, float, double, word
+ {c,i,f,d,w} unboxed ditto
+
+ T tuple
+
+ P other primitive type
+ p unboxed ditto
+
+ L list
+ E enumeration type
+ S other single-constructor type
+ M other multi-constructor data-con type
+
+ . other type
+
+ - reserved for others to mark as "uninteresting"
+
+ Accurate as of Mar 2013, but I eliminated the Array category instead
+ of updating it, for simplicity. It's in P/p, I think --NSF
+
+ -}
+showTypeCategory ty
+ | isDictTy ty = '+'
+ | otherwise = case tcSplitTyConApp_maybe ty of
+ Nothing -> '.'
+ Just (tycon, _) ->
+ (if isUnliftedTyCon tycon then Data.Char.toLower else id) $
+ let anyOf us = getUnique tycon `elem` us in
+ case () of
+ _ | anyOf [funTyConKey] -> '>'
+ | anyOf [charPrimTyConKey, charTyConKey] -> 'C'
+ | anyOf [doublePrimTyConKey, doubleTyConKey] -> 'D'
+ | anyOf [floatPrimTyConKey, floatTyConKey] -> 'F'
+ | anyOf [intPrimTyConKey, int32PrimTyConKey, int64PrimTyConKey,
+ intTyConKey, int8TyConKey, int16TyConKey, int32TyConKey, int64TyConKey
+ ] -> 'I'
+ | anyOf [wordPrimTyConKey, word32PrimTyConKey, word64PrimTyConKey, wordTyConKey,
+ word8TyConKey, word16TyConKey, word32TyConKey, word64TyConKey
+ ] -> 'W'
+ | anyOf [listTyConKey] -> 'L'
+ | isTupleTyCon tycon -> 'T'
+ | isPrimTyCon tycon -> 'P'
+ | isEnumerationTyCon tycon -> 'E'
+ | isJust (tyConSingleDataCon_maybe tycon) -> 'S'
+ | otherwise -> 'M' -- oh, well...
diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs
new file mode 100644
index 0000000000..30e37bb930
--- /dev/null
+++ b/compiler/GHC/StgToCmm/Utils.hs
@@ -0,0 +1,578 @@
+{-# LANGUAGE CPP #-}
+
+-----------------------------------------------------------------------------
+--
+-- Code generator utilities; mostly monadic
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.Utils (
+ cgLit, mkSimpleLit,
+ emitDataLits, mkDataLits,
+ emitRODataLits, mkRODataLits,
+ emitRtsCall, emitRtsCallWithResult, emitRtsCallGen,
+ assignTemp, newTemp,
+
+ newUnboxedTupleRegs,
+
+ emitMultiAssign, emitCmmLitSwitch, emitSwitch,
+
+ tagToClosure, mkTaggedObjectLoad,
+
+ callerSaves, callerSaveVolatileRegs, get_GlobalReg_addr,
+
+ cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord,
+ cmmUGtWord, cmmSubWord, cmmMulWord, cmmAddWord, cmmUShrWord,
+ cmmOffsetExprW, cmmOffsetExprB,
+ cmmRegOffW, cmmRegOffB,
+ cmmLabelOffW, cmmLabelOffB,
+ cmmOffsetW, cmmOffsetB,
+ cmmOffsetLitW, cmmOffsetLitB,
+ cmmLoadIndexW,
+ cmmConstrTag1,
+
+ cmmUntag, cmmIsTagged,
+
+ addToMem, addToMemE, addToMemLblE, addToMemLbl,
+ mkWordCLit,
+ newStringCLit, newByteStringCLit,
+ blankWord,
+ ) where
+
+#include "HsVersions.h"
+
+import GhcPrelude
+
+import GHC.StgToCmm.Monad
+import GHC.StgToCmm.Closure
+import Cmm
+import BlockId
+import MkGraph
+import GHC.Platform.Regs
+import CLabel
+import CmmUtils
+import CmmSwitch
+import GHC.StgToCmm.CgUtils
+
+import ForeignCall
+import IdInfo
+import Type
+import TyCon
+import SMRep
+import Module
+import Literal
+import Digraph
+import Util
+import Unique
+import UniqSupply (MonadUnique(..))
+import DynFlags
+import FastString
+import Outputable
+import RepType
+
+import Data.ByteString (ByteString)
+import qualified Data.ByteString.Char8 as BS8
+import qualified Data.Map as M
+import Data.Char
+import Data.List
+import Data.Ord
+
+
+-------------------------------------------------------------------------
+--
+-- Literals
+--
+-------------------------------------------------------------------------
+
+cgLit :: Literal -> FCode CmmLit
+cgLit (LitString s) = newByteStringCLit s
+ -- not unpackFS; we want the UTF-8 byte stream.
+cgLit other_lit = do dflags <- getDynFlags
+ return (mkSimpleLit dflags other_lit)
+
+mkSimpleLit :: DynFlags -> Literal -> CmmLit
+mkSimpleLit dflags (LitChar c) = CmmInt (fromIntegral (ord c))
+ (wordWidth dflags)
+mkSimpleLit dflags LitNullAddr = zeroCLit dflags
+mkSimpleLit dflags (LitNumber LitNumInt i _) = CmmInt i (wordWidth dflags)
+mkSimpleLit _ (LitNumber LitNumInt64 i _) = CmmInt i W64
+mkSimpleLit dflags (LitNumber LitNumWord i _) = CmmInt i (wordWidth dflags)
+mkSimpleLit _ (LitNumber LitNumWord64 i _) = CmmInt i W64
+mkSimpleLit _ (LitFloat r) = CmmFloat r W32
+mkSimpleLit _ (LitDouble r) = CmmFloat r W64
+mkSimpleLit _ (LitLabel fs ms fod)
+ = let -- TODO: Literal labels might not actually be in the current package...
+ labelSrc = ForeignLabelInThisPackage
+ in CmmLabel (mkForeignLabel fs ms labelSrc fod)
+-- NB: LitRubbish should have been lowered in "CoreToStg"
+mkSimpleLit _ other = pprPanic "mkSimpleLit" (ppr other)
+
+--------------------------------------------------------------------------
+--
+-- Incrementing a memory location
+--
+--------------------------------------------------------------------------
+
+addToMemLbl :: CmmType -> CLabel -> Int -> CmmAGraph
+addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n
+
+addToMemLblE :: CmmType -> CLabel -> CmmExpr -> CmmAGraph
+addToMemLblE rep lbl = addToMemE rep (CmmLit (CmmLabel lbl))
+
+addToMem :: CmmType -- rep of the counter
+ -> CmmExpr -- Address
+ -> Int -- What to add (a word)
+ -> CmmAGraph
+addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) (typeWidth rep)))
+
+addToMemE :: CmmType -- rep of the counter
+ -> CmmExpr -- Address
+ -> CmmExpr -- What to add (a word-typed expression)
+ -> CmmAGraph
+addToMemE rep ptr n
+ = mkStore ptr (CmmMachOp (MO_Add (typeWidth rep)) [CmmLoad ptr rep, n])
+
+
+-------------------------------------------------------------------------
+--
+-- Loading a field from an object,
+-- where the object pointer is itself tagged
+--
+-------------------------------------------------------------------------
+
+mkTaggedObjectLoad
+ :: DynFlags -> LocalReg -> LocalReg -> ByteOff -> DynTag -> CmmAGraph
+-- (loadTaggedObjectField reg base off tag) generates assignment
+-- reg = bitsK[ base + off - tag ]
+-- where K is fixed by 'reg'
+mkTaggedObjectLoad dflags reg base offset tag
+ = mkAssign (CmmLocal reg)
+ (CmmLoad (cmmOffsetB dflags
+ (CmmReg (CmmLocal base))
+ (offset - tag))
+ (localRegType reg))
+
+-------------------------------------------------------------------------
+--
+-- Converting a closure tag to a closure for enumeration types
+-- (this is the implementation of tagToEnum#).
+--
+-------------------------------------------------------------------------
+
+tagToClosure :: DynFlags -> TyCon -> CmmExpr -> CmmExpr
+tagToClosure dflags tycon tag
+ = CmmLoad (cmmOffsetExprW dflags closure_tbl tag) (bWord dflags)
+ where closure_tbl = CmmLit (CmmLabel lbl)
+ lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs
+
+-------------------------------------------------------------------------
+--
+-- Conditionals and rts calls
+--
+-------------------------------------------------------------------------
+
+emitRtsCall :: UnitId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode ()
+emitRtsCall pkg fun args safe = emitRtsCallGen [] (mkCmmCodeLabel pkg fun) args safe
+
+emitRtsCallWithResult :: LocalReg -> ForeignHint -> UnitId -> FastString
+ -> [(CmmExpr,ForeignHint)] -> Bool -> FCode ()
+emitRtsCallWithResult res hint pkg fun args safe
+ = emitRtsCallGen [(res,hint)] (mkCmmCodeLabel pkg fun) args safe
+
+-- Make a call to an RTS C procedure
+emitRtsCallGen
+ :: [(LocalReg,ForeignHint)]
+ -> CLabel
+ -> [(CmmExpr,ForeignHint)]
+ -> Bool -- True <=> CmmSafe call
+ -> FCode ()
+emitRtsCallGen res lbl args safe
+ = do { dflags <- getDynFlags
+ ; updfr_off <- getUpdFrameOff
+ ; let (caller_save, caller_load) = callerSaveVolatileRegs dflags
+ ; emit caller_save
+ ; call updfr_off
+ ; emit caller_load }
+ where
+ call updfr_off =
+ if safe then
+ emit =<< mkCmmCall fun_expr res' args' updfr_off
+ else do
+ let conv = ForeignConvention CCallConv arg_hints res_hints CmmMayReturn
+ emit $ mkUnsafeCall (ForeignTarget fun_expr conv) res' args'
+ (args', arg_hints) = unzip args
+ (res', res_hints) = unzip res
+ fun_expr = mkLblExpr lbl
+
+
+-----------------------------------------------------------------------------
+--
+-- Caller-Save Registers
+--
+-----------------------------------------------------------------------------
+
+-- Here we generate the sequence of saves/restores required around a
+-- foreign call instruction.
+
+-- TODO: reconcile with includes/Regs.h
+-- * Regs.h claims that BaseReg should be saved last and loaded first
+-- * This might not have been tickled before since BaseReg is callee save
+-- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim
+--
+-- This code isn't actually used right now, because callerSaves
+-- only ever returns true in the current universe for registers NOT in
+-- system_regs (just do a grep for CALLER_SAVES in
+-- includes/stg/MachRegs.h). It's all one giant no-op, and for
+-- good reason: having to save system registers on every foreign call
+-- would be very expensive, so we avoid assigning them to those
+-- registers when we add support for an architecture.
+--
+-- Note that the old code generator actually does more work here: it
+-- also saves other global registers. We can't (nor want) to do that
+-- here, as we don't have liveness information. And really, we
+-- shouldn't be doing the workaround at this point in the pipeline, see
+-- Note [Register parameter passing] and the ToDo on CmmCall in
+-- cmm/CmmNode.hs. Right now the workaround is to avoid inlining across
+-- unsafe foreign calls in rewriteAssignments, but this is strictly
+-- temporary.
+callerSaveVolatileRegs :: DynFlags -> (CmmAGraph, CmmAGraph)
+callerSaveVolatileRegs dflags = (caller_save, caller_load)
+ where
+ platform = targetPlatform dflags
+
+ caller_save = catAGraphs (map callerSaveGlobalReg regs_to_save)
+ caller_load = catAGraphs (map callerRestoreGlobalReg regs_to_save)
+
+ system_regs = [ Sp,SpLim,Hp,HpLim,CCCS,CurrentTSO,CurrentNursery
+ {- ,SparkHd,SparkTl,SparkBase,SparkLim -}
+ , BaseReg ]
+
+ regs_to_save = filter (callerSaves platform) system_regs
+
+ callerSaveGlobalReg reg
+ = mkStore (get_GlobalReg_addr dflags reg) (CmmReg (CmmGlobal reg))
+
+ callerRestoreGlobalReg reg
+ = mkAssign (CmmGlobal reg)
+ (CmmLoad (get_GlobalReg_addr dflags reg) (globalRegType dflags reg))
+
+
+-------------------------------------------------------------------------
+--
+-- Strings generate a top-level data block
+--
+-------------------------------------------------------------------------
+
+emitDataLits :: CLabel -> [CmmLit] -> FCode ()
+-- Emit a data-segment data block
+emitDataLits lbl lits = emitDecl (mkDataLits (Section Data lbl) lbl lits)
+
+emitRODataLits :: CLabel -> [CmmLit] -> FCode ()
+-- Emit a read-only data block
+emitRODataLits lbl lits = emitDecl (mkRODataLits lbl lits)
+
+newStringCLit :: String -> FCode CmmLit
+-- Make a global definition for the string,
+-- and return its label
+newStringCLit str = newByteStringCLit (BS8.pack str)
+
+newByteStringCLit :: ByteString -> FCode CmmLit
+newByteStringCLit bytes
+ = do { uniq <- newUnique
+ ; let (lit, decl) = mkByteStringCLit (mkStringLitLabel uniq) bytes
+ ; emitDecl decl
+ ; return lit }
+
+-------------------------------------------------------------------------
+--
+-- Assigning expressions to temporaries
+--
+-------------------------------------------------------------------------
+
+assignTemp :: CmmExpr -> FCode LocalReg
+-- Make sure the argument is in a local register.
+-- We don't bother being particularly aggressive with avoiding
+-- unnecessary local registers, since we can rely on a later
+-- optimization pass to inline as necessary (and skipping out
+-- on things like global registers can be a little dangerous
+-- due to them being trashed on foreign calls--though it means
+-- the optimization pass doesn't have to do as much work)
+assignTemp (CmmReg (CmmLocal reg)) = return reg
+assignTemp e = do { dflags <- getDynFlags
+ ; uniq <- newUnique
+ ; let reg = LocalReg uniq (cmmExprType dflags e)
+ ; emitAssign (CmmLocal reg) e
+ ; return reg }
+
+newTemp :: MonadUnique m => CmmType -> m LocalReg
+newTemp rep = do { uniq <- getUniqueM
+ ; return (LocalReg uniq rep) }
+
+newUnboxedTupleRegs :: Type -> FCode ([LocalReg], [ForeignHint])
+-- Choose suitable local regs to use for the components
+-- of an unboxed tuple that we are about to return to
+-- the Sequel. If the Sequel is a join point, using the
+-- regs it wants will save later assignments.
+newUnboxedTupleRegs res_ty
+ = ASSERT( isUnboxedTupleType res_ty )
+ do { dflags <- getDynFlags
+ ; sequel <- getSequel
+ ; regs <- choose_regs dflags sequel
+ ; ASSERT( regs `equalLength` reps )
+ return (regs, map primRepForeignHint reps) }
+ where
+ reps = typePrimRep res_ty
+ choose_regs _ (AssignTo regs _) = return regs
+ choose_regs dflags _ = mapM (newTemp . primRepCmmType dflags) reps
+
+
+
+-------------------------------------------------------------------------
+-- emitMultiAssign
+-------------------------------------------------------------------------
+
+emitMultiAssign :: [LocalReg] -> [CmmExpr] -> FCode ()
+-- Emit code to perform the assignments in the
+-- input simultaneously, using temporary variables when necessary.
+
+type Key = Int
+type Vrtx = (Key, Stmt) -- Give each vertex a unique number,
+ -- for fast comparison
+type Stmt = (LocalReg, CmmExpr) -- r := e
+
+-- We use the strongly-connected component algorithm, in which
+-- * the vertices are the statements
+-- * an edge goes from s1 to s2 iff
+-- s1 assigns to something s2 uses
+-- that is, if s1 should *follow* s2 in the final order
+
+emitMultiAssign [] [] = return ()
+emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs
+emitMultiAssign regs rhss = do
+ dflags <- getDynFlags
+ ASSERT2( equalLength regs rhss, ppr regs $$ ppr rhss )
+ unscramble dflags ([1..] `zip` (regs `zip` rhss))
+
+unscramble :: DynFlags -> [Vrtx] -> FCode ()
+unscramble dflags vertices = mapM_ do_component components
+ where
+ edges :: [ Node Key Vrtx ]
+ edges = [ DigraphNode vertex key1 (edges_from stmt1)
+ | vertex@(key1, stmt1) <- vertices ]
+
+ edges_from :: Stmt -> [Key]
+ edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices,
+ stmt1 `mustFollow` stmt2 ]
+
+ components :: [SCC Vrtx]
+ components = stronglyConnCompFromEdgedVerticesUniq edges
+
+ -- do_components deal with one strongly-connected component
+ -- Not cyclic, or singleton? Just do it
+ do_component :: SCC Vrtx -> FCode ()
+ do_component (AcyclicSCC (_,stmt)) = mk_graph stmt
+ do_component (CyclicSCC []) = panic "do_component"
+ do_component (CyclicSCC [(_,stmt)]) = mk_graph stmt
+
+ -- Cyclic? Then go via temporaries. Pick one to
+ -- break the loop and try again with the rest.
+ do_component (CyclicSCC ((_,first_stmt) : rest)) = do
+ dflags <- getDynFlags
+ u <- newUnique
+ let (to_tmp, from_tmp) = split dflags u first_stmt
+ mk_graph to_tmp
+ unscramble dflags rest
+ mk_graph from_tmp
+
+ split :: DynFlags -> Unique -> Stmt -> (Stmt, Stmt)
+ split dflags uniq (reg, rhs)
+ = ((tmp, rhs), (reg, CmmReg (CmmLocal tmp)))
+ where
+ rep = cmmExprType dflags rhs
+ tmp = LocalReg uniq rep
+
+ mk_graph :: Stmt -> FCode ()
+ mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs
+
+ mustFollow :: Stmt -> Stmt -> Bool
+ (reg, _) `mustFollow` (_, rhs) = regUsedIn dflags (CmmLocal reg) rhs
+
+-------------------------------------------------------------------------
+-- mkSwitch
+-------------------------------------------------------------------------
+
+
+emitSwitch :: CmmExpr -- Tag to switch on
+ -> [(ConTagZ, CmmAGraphScoped)] -- Tagged branches
+ -> Maybe CmmAGraphScoped -- Default branch (if any)
+ -> ConTagZ -> ConTagZ -- Min and Max possible values;
+ -- behaviour outside this range is
+ -- undefined
+ -> FCode ()
+
+-- First, two rather common cases in which there is no work to do
+emitSwitch _ [] (Just code) _ _ = emit (fst code)
+emitSwitch _ [(_,code)] Nothing _ _ = emit (fst code)
+
+-- Right, off we go
+emitSwitch tag_expr branches mb_deflt lo_tag hi_tag = do
+ join_lbl <- newBlockId
+ mb_deflt_lbl <- label_default join_lbl mb_deflt
+ branches_lbls <- label_branches join_lbl branches
+ tag_expr' <- assignTemp' tag_expr
+
+ -- Sort the branches before calling mk_discrete_switch
+ let branches_lbls' = [ (fromIntegral i, l) | (i,l) <- sortBy (comparing fst) branches_lbls ]
+ let range = (fromIntegral lo_tag, fromIntegral hi_tag)
+
+ emit $ mk_discrete_switch False tag_expr' branches_lbls' mb_deflt_lbl range
+
+ emitLabel join_lbl
+
+mk_discrete_switch :: Bool -- ^ Use signed comparisons
+ -> CmmExpr
+ -> [(Integer, BlockId)]
+ -> Maybe BlockId
+ -> (Integer, Integer)
+ -> CmmAGraph
+
+-- SINGLETON TAG RANGE: no case analysis to do
+mk_discrete_switch _ _tag_expr [(tag, lbl)] _ (lo_tag, hi_tag)
+ | lo_tag == hi_tag
+ = ASSERT( tag == lo_tag )
+ mkBranch lbl
+
+-- SINGLETON BRANCH, NO DEFAULT: no case analysis to do
+mk_discrete_switch _ _tag_expr [(_tag,lbl)] Nothing _
+ = mkBranch lbl
+ -- The simplifier might have eliminated a case
+ -- so we may have e.g. case xs of
+ -- [] -> e
+ -- In that situation we can be sure the (:) case
+ -- can't happen, so no need to test
+
+-- SOMETHING MORE COMPLICATED: defer to CmmImplementSwitchPlans
+-- See Note [Cmm Switches, the general plan] in CmmSwitch
+mk_discrete_switch signed tag_expr branches mb_deflt range
+ = mkSwitch tag_expr $ mkSwitchTargets signed range mb_deflt (M.fromList branches)
+
+divideBranches :: Ord a => [(a,b)] -> ([(a,b)], a, [(a,b)])
+divideBranches branches = (lo_branches, mid, hi_branches)
+ where
+ -- 2 branches => n_branches `div` 2 = 1
+ -- => branches !! 1 give the *second* tag
+ -- There are always at least 2 branches here
+ (mid,_) = branches !! (length branches `div` 2)
+ (lo_branches, hi_branches) = span is_lo branches
+ is_lo (t,_) = t < mid
+
+--------------
+emitCmmLitSwitch :: CmmExpr -- Tag to switch on
+ -> [(Literal, CmmAGraphScoped)] -- Tagged branches
+ -> CmmAGraphScoped -- Default branch (always)
+ -> FCode () -- Emit the code
+emitCmmLitSwitch _scrut [] deflt = emit $ fst deflt
+emitCmmLitSwitch scrut branches deflt = do
+ scrut' <- assignTemp' scrut
+ join_lbl <- newBlockId
+ deflt_lbl <- label_code join_lbl deflt
+ branches_lbls <- label_branches join_lbl branches
+
+ dflags <- getDynFlags
+ let cmm_ty = cmmExprType dflags scrut
+ rep = typeWidth cmm_ty
+
+ -- We find the necessary type information in the literals in the branches
+ let signed = case head branches of
+ (LitNumber nt _ _, _) -> litNumIsSigned nt
+ _ -> False
+
+ let range | signed = (tARGET_MIN_INT dflags, tARGET_MAX_INT dflags)
+ | otherwise = (0, tARGET_MAX_WORD dflags)
+
+ if isFloatType cmm_ty
+ then emit =<< mk_float_switch rep scrut' deflt_lbl noBound branches_lbls
+ else emit $ mk_discrete_switch
+ signed
+ scrut'
+ [(litValue lit,l) | (lit,l) <- branches_lbls]
+ (Just deflt_lbl)
+ range
+ emitLabel join_lbl
+
+-- | lower bound (inclusive), upper bound (exclusive)
+type LitBound = (Maybe Literal, Maybe Literal)
+
+noBound :: LitBound
+noBound = (Nothing, Nothing)
+
+mk_float_switch :: Width -> CmmExpr -> BlockId
+ -> LitBound
+ -> [(Literal,BlockId)]
+ -> FCode CmmAGraph
+mk_float_switch rep scrut deflt _bounds [(lit,blk)]
+ = do dflags <- getDynFlags
+ return $ mkCbranch (cond dflags) deflt blk Nothing
+ where
+ cond dflags = CmmMachOp ne [scrut, CmmLit cmm_lit]
+ where
+ cmm_lit = mkSimpleLit dflags lit
+ ne = MO_F_Ne rep
+
+mk_float_switch rep scrut deflt_blk_id (lo_bound, hi_bound) branches
+ = do dflags <- getDynFlags
+ lo_blk <- mk_float_switch rep scrut deflt_blk_id bounds_lo lo_branches
+ hi_blk <- mk_float_switch rep scrut deflt_blk_id bounds_hi hi_branches
+ mkCmmIfThenElse (cond dflags) lo_blk hi_blk
+ where
+ (lo_branches, mid_lit, hi_branches) = divideBranches branches
+
+ bounds_lo = (lo_bound, Just mid_lit)
+ bounds_hi = (Just mid_lit, hi_bound)
+
+ cond dflags = CmmMachOp lt [scrut, CmmLit cmm_lit]
+ where
+ cmm_lit = mkSimpleLit dflags mid_lit
+ lt = MO_F_Lt rep
+
+
+--------------
+label_default :: BlockId -> Maybe CmmAGraphScoped -> FCode (Maybe BlockId)
+label_default _ Nothing
+ = return Nothing
+label_default join_lbl (Just code)
+ = do lbl <- label_code join_lbl code
+ return (Just lbl)
+
+--------------
+label_branches :: BlockId -> [(a,CmmAGraphScoped)] -> FCode [(a,BlockId)]
+label_branches _join_lbl []
+ = return []
+label_branches join_lbl ((tag,code):branches)
+ = do lbl <- label_code join_lbl code
+ branches' <- label_branches join_lbl branches
+ return ((tag,lbl):branches')
+
+--------------
+label_code :: BlockId -> CmmAGraphScoped -> FCode BlockId
+-- label_code J code
+-- generates
+-- [L: code; goto J]
+-- and returns L
+label_code join_lbl (code,tsc) = do
+ lbl <- newBlockId
+ emitOutOfLine lbl (code MkGraph.<*> mkBranch join_lbl, tsc)
+ return lbl
+
+--------------
+assignTemp' :: CmmExpr -> FCode CmmExpr
+assignTemp' e
+ | isTrivialCmmExpr e = return e
+ | otherwise = do
+ dflags <- getDynFlags
+ lreg <- newTemp (cmmExprType dflags e)
+ let reg = CmmLocal lreg
+ emitAssign reg e
+ return (CmmReg reg)