summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/CgInfoTbls.hs30
-rw-r--r--compiler/codeGen/CgMonad.lhs19
-rw-r--r--compiler/codeGen/CodeGen.lhs57
-rw-r--r--compiler/codeGen/StgCmm.hs135
-rw-r--r--compiler/codeGen/StgCmmBind.hs57
-rw-r--r--compiler/codeGen/StgCmmClosure.hs29
-rw-r--r--compiler/codeGen/StgCmmCon.hs1
-rw-r--r--compiler/codeGen/StgCmmEnv.hs3
-rw-r--r--compiler/codeGen/StgCmmExpr.hs269
-rw-r--r--compiler/codeGen/StgCmmForeign.hs134
-rw-r--r--compiler/codeGen/StgCmmHeap.hs97
-rw-r--r--compiler/codeGen/StgCmmLayout.hs238
-rw-r--r--compiler/codeGen/StgCmmMonad.hs109
-rw-r--r--compiler/codeGen/StgCmmPrim.hs59
-rw-r--r--compiler/codeGen/StgCmmProf.hs8
-rw-r--r--compiler/codeGen/StgCmmTicky.hs4
-rw-r--r--compiler/codeGen/StgCmmUtils.hs246
17 files changed, 944 insertions, 551 deletions
diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs
index 6c77255a62..7cdb1b6f7e 100644
--- a/compiler/codeGen/CgInfoTbls.hs
+++ b/compiler/codeGen/CgInfoTbls.hs
@@ -36,7 +36,7 @@ import CgBindery
import CgCallConv
import CgUtils
import CgMonad
-import CmmBuildInfoTables
+import CmmUtils
import OldCmm
import CLabel
@@ -66,10 +66,9 @@ emitClosureCodeAndInfoTable cl_info args body
-- Convert from 'ClosureInfo' to 'CmmInfo'.
-- Not used for return points. (The 'smRepClosureTypeInt' call would panic.)
-mkCmmInfo :: ClosureInfo -> FCode CmmInfo
+mkCmmInfo :: ClosureInfo -> FCode CmmInfoTable
mkCmmInfo cl_info
- = return (CmmInfo gc_target Nothing $
- CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info,
+ = return (CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info,
cit_rep = closureSMRep cl_info,
cit_prof = prof,
cit_srt = closureSRT cl_info })
@@ -79,14 +78,6 @@ mkCmmInfo cl_info
ty_descr_w8 = stringToWord8s (closureTypeDescr cl_info)
val_descr_w8 = stringToWord8s (closureValDescr cl_info)
- -- The gc_target is to inform the CPS pass when it inserts a stack check.
- -- Since that pass isn't used yet we'll punt for now.
- -- When the CPS pass is fully integrated, this should
- -- be replaced by the label that any heap check jumped to,
- -- so that branch can be shared by both the heap (from codeGen)
- -- and stack checks (from the CPS pass).
- gc_target = panic "TODO: gc_target"
-
-------------------------------------------------------------------------
--
-- Generating the info table and code for a return point
@@ -105,8 +96,7 @@ emitReturnTarget name stmts
; blks <- cgStmtsToBlocks stmts
; frame <- mkStackLayout
; let smrep = mkStackRep (mkLiveness frame)
- info = CmmInfo gc_target Nothing info_tbl
- info_tbl = CmmInfoTable { cit_lbl = info_lbl
+ info = CmmInfoTable { cit_lbl = info_lbl
, cit_prof = NoProfilingInfo
, cit_rep = smrep
, cit_srt = srt_info }
@@ -118,14 +108,6 @@ emitReturnTarget name stmts
info_lbl = mkReturnInfoLabel uniq
entry_lbl = mkReturnPtLabel uniq
- -- The gc_target is to inform the CPS pass when it inserts a stack check.
- -- Since that pass isn't used yet we'll punt for now.
- -- When the CPS pass is fully integrated, this should
- -- be replaced by the label that any heap check jumped to,
- -- so that branch can be shared by both the heap (from codeGen)
- -- and stack checks (from the CPS pass).
- gc_target = panic "TODO: gc_target"
-
-- Build stack layout information from the state of the 'FCode' monad.
-- Should go away once 'codeGen' starts using the CPS conversion
-- pass to handle the stack. Until then, this is really just
@@ -375,8 +357,8 @@ funInfoTable info_ptr
emitInfoTableAndCode
:: CLabel -- Label of entry or ret
- -> CmmInfo -- ...the info table
- -> [CmmFormal] -- ...args
+ -> CmmInfoTable -- ...the info table
+ -> [CmmFormal] -- ...args
-> [CmmBasicBlock] -- ...and body
-> Code
diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs
index b96898f591..71da9e9ae0 100644
--- a/compiler/codeGen/CgMonad.lhs
+++ b/compiler/codeGen/CgMonad.lhs
@@ -13,8 +13,8 @@ stuff fits into the Big Picture.
module CgMonad (
Code, FCode,
- initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs,
- returnFC, fixC, fixC_, checkedAbsC,
+ initC, runC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs,
+ returnFC, fixC, fixC_, checkedAbsC,
stmtC, stmtsC, labelC, emitStmts, nopC, whenC, newLabelC,
newUnique, newUniqSupply,
@@ -386,11 +386,12 @@ instance Monad FCode where
{-# INLINE thenFC #-}
{-# INLINE returnFC #-}
-initC :: DynFlags -> Module -> FCode a -> IO a
-initC dflags mod (FCode code) = do
- uniqs <- mkSplitUniqSupply 'c'
- case code (initCgInfoDown dflags mod) (initCgState uniqs) of
- (res, _) -> return res
+initC :: IO CgState
+initC = do { uniqs <- mkSplitUniqSupply 'c'
+ ; return (initCgState uniqs) }
+
+runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState)
+runC dflags mod st (FCode code) = code (initCgInfoDown dflags mod) st
returnFC :: a -> FCode a
returnFC val = FCode $ \_ state -> (val, state)
@@ -708,7 +709,7 @@ emitDecl decl = do
state <- getState
setState $ state { cgs_tops = cgs_tops state `snocOL` decl }
-emitProc :: CmmInfo -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code
+emitProc :: CmmInfoTable -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code
emitProc info lbl [] blocks = do
let proc_block = CmmProc info lbl (ListGraph blocks)
state <- getState
@@ -720,7 +721,7 @@ emitSimpleProc :: CLabel -> Code -> Code
emitSimpleProc lbl code = do
stmts <- getCgStmts code
blks <- cgStmtsToBlocks stmts
- emitProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] blks
+ emitProc CmmNonInfoTable lbl [] blks
-- Get all the CmmTops (there should be no stmts)
-- Return a single Cmm which may be split from other Cmms by
diff --git a/compiler/codeGen/CodeGen.lhs b/compiler/codeGen/CodeGen.lhs
index ce12d43bbf..9c936d3281 100644
--- a/compiler/codeGen/CodeGen.lhs
+++ b/compiler/codeGen/CodeGen.lhs
@@ -45,7 +45,13 @@ import TyCon
import Module
import ErrUtils
import Panic
-import Util
+import Outputable
+
+import OrdList
+import Stream (Stream, liftIO)
+import qualified Stream
+
+import Data.IORef
codeGen :: DynFlags
-> Module -- Module we are compiling
@@ -53,32 +59,37 @@ codeGen :: DynFlags
-> CollectedCCs -- (Local/global) cost-centres needing declaring/registering.
-> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs
-> HpcInfo -- Profiling info
- -> IO [CmmGroup]
+ -> Stream IO CmmGroup ()
-- N.B. returning '[Cmm]' and not 'Cmm' here makes it
-- possible for object splitting to split up the
-- pieces later.
-codeGen dflags this_mod data_tycons cost_centre_info stg_binds hpc_info = do
- showPass dflags "CodeGen"
- code_stuff <-
- initC dflags this_mod $ do
- cmm_binds <- mapM (getCmm . cgTopBinding dflags) stg_binds
- cmm_tycons <- mapM cgTyCon data_tycons
- cmm_init <- getCmm (mkModuleInit dflags cost_centre_info this_mod hpc_info)
- return (cmm_init : cmm_binds ++ cmm_tycons)
- -- Put datatype_stuff after code_stuff, because the
- -- datatype closure table (for enumeration types) to
- -- (say) PrelBase_True_closure, which is defined in
- -- code_stuff
-
- -- Note [codegen-split-init] the cmm_init block must
- -- come FIRST. This is because when -split-objs is on
- -- we need to combine this block with its
- -- initialisation routines; see Note
- -- [pipeline-split-init].
-
- dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" (pprCmms code_stuff)
- return code_stuff
+codeGen dflags this_mod data_tycons cost_centre_info stg_binds hpc_info
+
+ = do { liftIO $ showPass dflags "CodeGen"
+
+ ; cgref <- liftIO $ newIORef =<< initC
+ ; let cg :: FCode CmmGroup -> Stream IO CmmGroup ()
+ cg fcode = do
+ cmm <- liftIO $ do
+ st <- readIORef cgref
+ let (a,st') = runC dflags this_mod st fcode
+
+ dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" $ ppr a
+
+ -- NB. stub-out cgs_tops and cgs_stmts. This fixes
+ -- a big space leak. DO NOT REMOVE!
+ writeIORef cgref $! st'{ cgs_tops = nilOL,
+ cgs_stmts = nilOL }
+ return a
+ Stream.yield cmm
+
+ ; cg (getCmm $ mkModuleInit dflags cost_centre_info this_mod hpc_info)
+
+ ; mapM_ (cg . getCmm . cgTopBinding dflags) stg_binds
+
+ ; mapM_ (cg . cgTyCon) data_tycons
+ }
mkModuleInit
:: DynFlags
diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs
index 17a7062559..696af8107e 100644
--- a/compiler/codeGen/StgCmm.hs
+++ b/compiler/codeGen/StgCmm.hs
@@ -46,6 +46,13 @@ import TyCon
import Module
import ErrUtils
import Outputable
+import Stream
+
+import OrdList
+import MkGraph
+
+import Data.IORef
+import Control.Monad (when)
import Util
codeGen :: DynFlags
@@ -54,39 +61,51 @@ codeGen :: DynFlags
-> CollectedCCs -- (Local/global) cost-centres needing declaring/registering.
-> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs
-> HpcInfo
- -> IO [CmmGroup] -- Output
+ -> Stream IO CmmGroup () -- Output as a stream, so codegen can
+ -- be interleaved with output
codeGen dflags this_mod data_tycons
cost_centre_info stg_binds hpc_info
- = do { showPass dflags "New CodeGen"
-
--- Why?
--- ; mapM_ (\x -> seq x (return ())) data_tycons
-
- ; code_stuff <- initC dflags this_mod $ do
- { cmm_binds <- mapM (getCmm . cgTopBinding dflags) stg_binds
- ; cmm_tycons <- mapM cgTyCon data_tycons
- ; cmm_init <- getCmm (mkModuleInit cost_centre_info
- this_mod hpc_info)
- ; return (cmm_init : cmm_binds ++ cmm_tycons)
- }
+ = do { liftIO $ showPass dflags "New CodeGen"
+
+ -- cg: run the code generator, and yield the resulting CmmGroup
+ -- Using an IORef to store the state is a bit crude, but otherwise
+ -- we would need to add a state monad layer.
+ ; cgref <- liftIO $ newIORef =<< initC
+ ; let cg :: FCode () -> Stream IO CmmGroup ()
+ cg fcode = do
+ cmm <- liftIO $ do
+ st <- readIORef cgref
+ let (a,st') = runC dflags this_mod st (getCmm fcode)
+
+ -- NB. stub-out cgs_tops and cgs_stmts. This fixes
+ -- a big space leak. DO NOT REMOVE!
+ writeIORef cgref $! st'{ cgs_tops = nilOL,
+ cgs_stmts = mkNop }
+ return a
+ yield cmm
+
+ -- Note [codegen-split-init] the cmm_init block must come
+ -- FIRST. This is because when -split-objs is on we need to
+ -- combine this block with its initialisation routines; see
+ -- Note [pipeline-split-init].
+ ; cg (mkModuleInit cost_centre_info this_mod hpc_info)
+
+ ; mapM_ (cg . cgTopBinding dflags) stg_binds
+
-- Put datatype_stuff after code_stuff, because the
-- datatype closure table (for enumeration types) to
-- (say) PrelBase_True_closure, which is defined in
-- code_stuff
-
- -- N.B. returning '[Cmm]' and not 'Cmm' here makes it
- -- possible for object splitting to split up the
- -- pieces later.
-
- -- Note [codegen-split-init] the cmm_init block must
- -- come FIRST. This is because when -split-objs is on
- -- we need to combine this block with its
- -- initialisation routines; see Note
- -- [pipeline-split-init].
-
- ; return code_stuff }
-
+ ; let do_tycon tycon = do
+ -- Generate a table of static closures for an
+ -- enumeration type Note that the closure pointers are
+ -- tagged.
+ when (isEnumerationTyCon tycon) $ cg (cgEnumerationTyCon tycon)
+ mapM_ (cg . cgDataCon) (tyConDataCons tycon)
+
+ ; mapM_ do_tycon data_tycons
+ }
---------------------------------------------------------------
-- Top-level bindings
@@ -108,7 +127,7 @@ cgTopBinding dflags (StgNonRec id rhs, _srts)
; info <- cgTopRhs id' rhs
; addBindC (cg_id info) info -- Add the *un-externalised* Id to the envt,
-- so we find it when we look up occurrences
- }
+ }
cgTopBinding dflags (StgRec pairs, _srts)
= do { let (bndrs, rhss) = unzip pairs
@@ -117,7 +136,7 @@ cgTopBinding dflags (StgRec pairs, _srts)
; fixC_(\ new_binds -> do
{ addBindsC new_binds
; mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' })
- ; return () }
+ ; return () }
-- Urgh! I tried moving the forkStatics call from the rhss of cgTopRhs
-- to enclose the listFCs in cgTopBinding, but that tickled the
@@ -187,65 +206,19 @@ mkModuleInit cost_centre_info this_mod hpc_info
; emitDecl (CmmData Data (Statics (mkPlainModuleInitLabel this_mod) []))
}
+
---------------------------------------------------------------
-- Generating static stuff for algebraic data types
---------------------------------------------------------------
-{- [These comments are rather out of date]
-
-Macro Kind of constructor
-CONST_INFO_TABLE@ Zero arity (no info -- compiler uses static closure)
-CHARLIKE_INFO_TABLE Charlike (no info -- compiler indexes fixed array)
-INTLIKE_INFO_TABLE Intlike; the one macro generates both info tbls
-SPEC_INFO_TABLE SPECish, and bigger than or equal to MIN_UPD_SIZE
-GEN_INFO_TABLE GENish (hence bigger than or equal to MIN_UPD_SIZE@)
-Possible info tables for constructor con:
-
-* _con_info:
- Used for dynamically let(rec)-bound occurrences of
- the constructor, and for updates. For constructors
- which are int-like, char-like or nullary, when GC occurs,
- the closure tries to get rid of itself.
-
-* _static_info:
- Static occurrences of the constructor macro: STATIC_INFO_TABLE.
-
-For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure;
-it's place is taken by the top level defn of the constructor.
-
-For charlike and intlike closures there is a fixed array of static
-closures predeclared.
--}
-
-cgTyCon :: TyCon -> FCode CmmGroup -- All constructors merged together
-cgTyCon tycon
- = do { constrs <- mapM (getCmm . cgDataCon) (tyConDataCons tycon)
-
- -- Generate a table of static closures for an enumeration type
- -- Put the table after the data constructor decls, because the
- -- datatype closure table (for enumeration types)
- -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff
- -- Note that the closure pointers are tagged.
-
- -- N.B. comment says to put table after constructor decls, but
- -- code puts it before --- NR 16 Aug 2007
- ; extra <- cgEnumerationTyCon tycon
-
- ; return (concat (extra ++ constrs))
- }
-
-cgEnumerationTyCon :: TyCon -> FCode [CmmGroup]
+cgEnumerationTyCon :: TyCon -> FCode ()
cgEnumerationTyCon tycon
- | isEnumerationTyCon tycon
- = do { tbl <- getCmm $
- emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs)
- [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs)
- (tagForCon con)
- | con <- tyConDataCons tycon]
- ; return [tbl] }
- | otherwise
- = return []
+ = emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs)
+ [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs)
+ (tagForCon con)
+ | con <- tyConDataCons tycon]
+
cgDataCon :: DataCon -> FCode ()
-- Generate the entry code, info tables, and (for niladic constructor)
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index 3b166e3b6a..f98283f737 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -76,17 +76,17 @@ cgTopRhsClosure :: Id
cgTopRhsClosure id ccs _ upd_flag srt args body = do
{ -- LAY OUT THE OBJECT
let name = idName id
- ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args
- ; srt_info <- getSRTInfo srt
+ ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args
+ ; has_srt <- getSRTInfo srt
; mod_name <- getModuleName
; dflags <- getDynFlags
; let descr = closureDescription dflags mod_name name
- closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr
- closure_label = mkLocalClosureLabel name (idCafInfo id)
+ closure_info = mkClosureInfo True id lf_info 0 0 descr
+ closure_label = mkLocalClosureLabel name (idCafInfo id)
cg_id_info = litIdInfo id lf_info (CmmLabel closure_label)
caffy = idCafInfo id
info_tbl = mkCmmInfo closure_info -- XXX short-cut
- closure_rep = mkStaticClosureFields info_tbl ccs caffy []
+ closure_rep = mkStaticClosureFields info_tbl ccs caffy has_srt []
-- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY)
; emitDataLits closure_label closure_rep
@@ -110,7 +110,7 @@ cgBind (StgNonRec name rhs)
; emit (init <*> body) }
cgBind (StgRec pairs)
- = do { ((new_binds, inits), body) <- getCodeR $ fixC (\ new_binds_inits ->
+ = do { ((new_binds, inits), body) <- getCodeR $ fixC (\ new_binds_inits ->
do { addBindsC $ fst new_binds_inits -- avoid premature deconstruction
; liftM unzip $ listFCs [ cgRhs b e | (b,e) <- pairs ] })
; addBindsC new_binds
@@ -162,8 +162,8 @@ cgRhs :: Id -> StgRhs -> FCode (CgIdInfo, CmmAGraph)
cgRhs name (StgRhsCon cc con args)
= buildDynCon name cc con args
-cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body)
- = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag srt args body
+cgRhs name (StgRhsClosure cc bi fvs upd_flag _srt args body)
+ = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag args body
------------------------------------------------------------------------
-- Non-constructor right hand sides
@@ -171,7 +171,7 @@ cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body)
mkRhsClosure :: Id -> CostCentreStack -> StgBinderInfo
-> [NonVoid Id] -- Free vars
- -> UpdateFlag -> SRT
+ -> UpdateFlag
-> [Id] -- Args
-> StgExpr
-> FCode (CgIdInfo, CmmAGraph)
@@ -215,8 +215,7 @@ for semi-obvious reasons.
mkRhsClosure bndr cc bi
[NonVoid the_fv] -- Just one free var
upd_flag -- Updatable thunk
- _srt
- [] -- A thunk
+ [] -- A thunk
body@(StgCase (StgApp scrutinee [{-no args-}])
_ _ _ _ -- ignore uniq, etc.
(AlgAlt _)
@@ -247,8 +246,7 @@ mkRhsClosure bndr cc bi
mkRhsClosure bndr cc bi
fvs
upd_flag
- _srt
- [] -- No args; a thunk
+ [] -- No args; a thunk
body@(StgApp fun_id args)
| args `lengthIs` (arity-1)
@@ -269,7 +267,7 @@ mkRhsClosure bndr cc bi
arity = length fvs
---------- Default case ------------------
-mkRhsClosure bndr cc _ fvs upd_flag srt args body
+mkRhsClosure bndr cc _ fvs upd_flag args body
= do { -- LAY OUT THE OBJECT
-- If the binder is itself a free variable, then don't store
-- it in the closure. Instead, just bind it to Node on entry.
@@ -288,17 +286,16 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body
-- MAKE CLOSURE INFO FOR THIS CLOSURE
; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args
; mod_name <- getModuleName
- ; c_srt <- getSRTInfo srt
- ; dflags <- getDynFlags
- ; let name = idName bndr
- descr = closureDescription dflags mod_name name
- fv_details :: [(NonVoid Id, VirtualHpOffset)]
+ ; dflags <- getDynFlags
+ ; let name = idName bndr
+ descr = closureDescription dflags mod_name name
+ fv_details :: [(NonVoid Id, VirtualHpOffset)]
(tot_wds, ptr_wds, fv_details)
= mkVirtHeapOffsets (isLFThunk lf_info)
(addIdReps (map stripNV reduced_fvs))
closure_info = mkClosureInfo False -- Not static
bndr lf_info tot_wds ptr_wds
- c_srt descr
+ descr
-- BUILD ITS INFO TABLE AND CODE
; forkClosureBody $
@@ -345,8 +342,7 @@ cgStdThunk bndr _cc _bndr_info _body lf_info payload
descr = closureDescription dflags mod_name (idName bndr)
closure_info = mkClosureInfo False -- Not static
bndr lf_info tot_wds ptr_wds
- NoC_SRT -- No SRT for a std-form closure
- descr
+ descr
-- ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body
; let use_cc = curCCS; blame_cc = curCCS
@@ -546,10 +542,10 @@ emitBlackHoleCode is_single_entry = do
whenC eager_blackholing $ do
tickyBlackHole (not is_single_entry)
- emit (mkStore (cmmOffsetW (CmmReg nodeReg) fixedHdrSize)
- (CmmReg (CmmGlobal CurrentTSO)))
+ emitStore (cmmOffsetW (CmmReg nodeReg) fixedHdrSize)
+ (CmmReg (CmmGlobal CurrentTSO))
emitPrimCall [] MO_WriteBarrier []
- emit (mkStore (CmmReg nodeReg) (CmmReg (CmmGlobal EagerBlackholeInfo)))
+ emitStore (CmmReg nodeReg) (CmmReg (CmmGlobal EagerBlackholeInfo))
setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode ()
-- Nota Bene: this function does not change Node (even if it's a CAF),
@@ -598,7 +594,7 @@ pushUpdateFrame es body
offset <- foldM push updfr es
withUpdFrameOff offset body
where push off e =
- do emit (mkStore (CmmStackSlot (CallArea Old) base) e)
+ do emitStore (CmmStackSlot Old base) e
return base
where base = off + widthInBytes (cmmExprWidth e)
@@ -666,13 +662,14 @@ link_caf _is_upd = do
-- node is live, so save it.
-- see Note [atomic CAF entry] in rts/sm/Storage.c
- ; emit $ mkCmmIfThen
- (CmmMachOp mo_wordEq [ CmmReg (CmmLocal ret), CmmLit zeroCLit]) $
+ ; updfr <- getUpdFrameOff
+ ; emit =<< mkCmmIfThen
+ (CmmMachOp mo_wordEq [ CmmReg (CmmLocal ret), CmmLit zeroCLit])
-- re-enter R1. Doing this directly is slightly dodgy; we're
-- assuming lots of things, like the stack pointer hasn't
-- moved since we entered the CAF.
- let target = entryCode (closureInfoPtr (CmmReg nodeReg)) in
- mkJump target [] 0
+ (let target = entryCode (closureInfoPtr (CmmReg nodeReg)) in
+ mkJump target [] updfr)
; return hp_rel }
diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs
index 483a67c1fa..8023abddec 100644
--- a/compiler/codeGen/StgCmmClosure.hs
+++ b/compiler/codeGen/StgCmmClosure.hs
@@ -657,7 +657,6 @@ data ClosureInfo
-- the rest is just an unpacked CmmInfoTable.
closureInfoLabel :: !CLabel,
closureSMRep :: !SMRep, -- representation used by storage mgr
- closureSRT :: !C_SRT, -- What SRT applies to this closure
closureProf :: !ProfilingInfo
}
@@ -667,7 +666,7 @@ mkCmmInfo ClosureInfo {..}
= CmmInfoTable { cit_lbl = closureInfoLabel
, cit_rep = closureSMRep
, cit_prof = closureProf
- , cit_srt = closureSRT }
+ , cit_srt = NoC_SRT }
--------------------------------------
@@ -678,16 +677,14 @@ mkClosureInfo :: Bool -- Is static
-> Id
-> LambdaFormInfo
-> Int -> Int -- Total and pointer words
- -> C_SRT
- -> String -- String descriptor
+ -> String -- String descriptor
-> ClosureInfo
-mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info val_descr
+mkClosureInfo is_static id lf_info tot_wds ptr_wds val_descr
= ClosureInfo { closureName = name,
closureLFInfo = lf_info,
- closureInfoLabel = info_lbl,
- closureSMRep = sm_rep, -- These four fields are a
- closureSRT = srt_info, -- CmmInfoTable
- closureProf = prof } -- ---
+ closureInfoLabel = info_lbl, -- These three fields are
+ closureSMRep = sm_rep, -- (almost) an info table
+ closureProf = prof } -- (we don't have an SRT yet)
where
name = idName id
sm_rep = mkHeapRep is_static ptr_wds nonptr_wds (lfClosureType lf_info)
@@ -920,15 +917,21 @@ cafBlackHoleInfoTable
, cit_prof = NoProfilingInfo
, cit_srt = NoC_SRT }
-staticClosureNeedsLink :: CmmInfoTable -> Bool
+staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool
-- A static closure needs a link field to aid the GC when traversing
-- the static closure graph. But it only needs such a field if either
-- a) it has an SRT
-- b) it's a constructor with one or more pointer fields
-- In case (b), the constructor's fields themselves play the role
-- of the SRT.
-staticClosureNeedsLink info_tbl@CmmInfoTable{ cit_rep = smrep }
+--
+-- At this point, the cit_srt field has not been calculated (that
+-- happens right at the end of the Cmm pipeline), but we do have the
+-- VarSet of CAFs that CoreToStg attached, and if that is empty there
+-- will definitely not be an SRT.
+--
+staticClosureNeedsLink has_srt CmmInfoTable{ cit_rep = smrep }
| isConRep smrep = not (isStaticNoCafCon smrep)
- | otherwise = needsSRT (cit_srt info_tbl)
-staticClosureNeedsLink _ = False
+ | otherwise = has_srt -- needsSRT (cit_srt info_tbl)
+staticClosureNeedsLink _ _ = False
diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs
index a7af5662e9..c348570a54 100644
--- a/compiler/codeGen/StgCmmCon.hs
+++ b/compiler/codeGen/StgCmmCon.hs
@@ -92,6 +92,7 @@ cgTopRhsCon id con args
info_tbl
dontCareCCS -- Because it's static data
caffy -- Has CAF refs
+ False -- no SRT
payload
-- BUILD THE OBJECT
diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs
index f128e3ad60..2edd09da12 100644
--- a/compiler/codeGen/StgCmmEnv.hs
+++ b/compiler/codeGen/StgCmmEnv.hs
@@ -27,7 +27,7 @@ module StgCmmEnv (
bindArgsToRegs, bindToReg, rebindToReg,
bindArgToReg, idToReg,
- getArgAmode, getNonVoidArgAmodes,
+ getArgAmode, getNonVoidArgAmodes,
getCgIdInfo,
maybeLetNoEscape,
) where
@@ -212,7 +212,6 @@ getNonVoidArgAmodes (arg:args)
; amodes <- getNonVoidArgAmodes args
; return ( amode : amodes ) }
-
------------------------------------------------------------------------
-- Interface functions for binding and re-binding names
------------------------------------------------------------------------
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
index 9faad02f46..4db1dffdfc 100644
--- a/compiler/codeGen/StgCmmExpr.hs
+++ b/compiler/codeGen/StgCmmExpr.hs
@@ -77,7 +77,7 @@ cgExpr (StgLetNoEscape _ _ binds expr) =
; let join_id = mkBlockId (uniqFromSupply us)
; cgLneBinds join_id binds
; cgExpr expr
- ; emit $ mkLabel join_id}
+ ; emitLabel join_id}
cgExpr (StgCase expr _live_vars _save_vars bndr srt alt_type alts) =
cgCase expr bndr srt alt_type alts
@@ -130,7 +130,7 @@ cgLetNoEscapeRhs
cgLetNoEscapeRhs join_id local_cc bndr rhs =
do { (info, rhs_body) <- getCodeR $ cgLetNoEscapeRhsBody local_cc bndr rhs
; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
- ; emit (outOfLine $ mkLabel bid <*> rhs_body <*> mkBranch join_id)
+ ; emitOutOfLine bid $ rhs_body <*> mkBranch join_id
; return info
}
@@ -278,21 +278,69 @@ Hence: two basic plans for
data GcPlan
= GcInAlts -- Put a GC check at the start the case alternatives,
[LocalReg] -- which binds these registers
- SRT -- using this SRT
- | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
+ | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
-- primitive op which does no GC. Absorb the allocation
-- of the case alternative(s) into the upstream check
-------------------------------------
--- See Note [case on Bool]
cgCase :: StgExpr -> Id -> SRT -> AltType -> [StgAlt] -> FCode ()
+
+cgCase (StgOpApp (StgPrimOp op) args _) bndr _srt (AlgAlt tycon) alts
+ | isEnumerationTyCon tycon -- Note [case on bool]
+ = do { tag_expr <- do_enum_primop op args
+
+ -- If the binder is not dead, convert the tag to a constructor
+ -- and assign it.
+ ; when (not (isDeadBinder bndr)) $ do
+ { tmp_reg <- bindArgToReg (NonVoid bndr)
+ ; emitAssign (CmmLocal tmp_reg)
+ (tagToClosure tycon tag_expr) }
+
+ ; (mb_deflt, branches) <- cgAlgAltRhss NoGcInAlts Nothing
+ (NonVoid bndr) alts
+ ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
+ }
+ where
+ do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
+ do_enum_primop TagToEnumOp [arg] -- No code!
+ = getArgAmode (NonVoid arg)
+ do_enum_primop primop args
+ = do tmp <- newTemp bWord
+ cgPrimOp [tmp] primop args
+ return (CmmReg (CmmLocal tmp))
+
{-
-cgCase (OpApp ) bndr srt AlgAlt [(DataAlt flase, a2]
- | isBoolTy (idType bndr)
- , isDeadBndr bndr
- =
+Note [case on bool]
+
+This special case handles code like
+
+ case a <# b of
+ True ->
+ False ->
+
+If we let the ordinary case code handle it, we'll get something like
+
+ tmp1 = a < b
+ tmp2 = Bool_closure_tbl[tmp1]
+ if (tmp2 & 7 != 0) then ... // normal tagged case
+
+but this junk won't optimise away. What we really want is just an
+inline comparison:
+
+ if (a < b) then ...
+
+So we add a special case to generate
+
+ tmp1 = a < b
+ if (tmp1 == 0) then ...
+
+and later optimisations will further improve this.
+
+We should really change all these primops to return Int# instead, that
+would make this special case go away.
-}
+
-- Note [ticket #3132]: we might be looking at a case of a lifted Id
-- that was cast to an unlifted type. The Id will always be bottom,
-- but we don't want the code generator to fall over here. If we
@@ -319,7 +367,7 @@ cgCase (StgApp v []) bndr _ alt_type@(PrimAlt _) alts
do { when (not reps_compatible) $
panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
; v_info <- getCgIdInfo v
- ; emit (mkAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info))
+ ; emitAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info)
; _ <- bindArgsToRegs [NonVoid bndr]
; cgAlts NoGcInAlts (NonVoid bndr) alt_type alts }
where
@@ -330,8 +378,11 @@ cgCase scrut@(StgApp v []) _ _ (PrimAlt _) _
do { mb_cc <- maybeSaveCostCentre True
; withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut)
; restoreCurrentCostCentre mb_cc
- ; emit $ mkComment $ mkFastString "should be unreachable code"
- ; emit $ withFreshLabel "l" (\l -> mkLabel l <*> mkBranch l)}
+ ; emitComment $ mkFastString "should be unreachable code"
+ ; l <- newLabelC
+ ; emitLabel l
+ ; emit (mkBranch l)
+ }
{-
case seq# a s of v
@@ -349,7 +400,7 @@ cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr srt alt_type alts
= -- handle seq#, same return convention as vanilla 'a'.
cgCase (StgApp a []) bndr srt alt_type alts
-cgCase scrut bndr srt alt_type alts
+cgCase scrut bndr _srt alt_type alts
= -- the general case
do { up_hp_usg <- getVirtHp -- Upstream heap usage
; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
@@ -359,7 +410,7 @@ cgCase scrut bndr srt alt_type alts
| isSingleton alts = False
| up_hp_usg > 0 = False
| otherwise = True
- gc_plan = if gcInAlts then GcInAlts alt_regs srt else NoGcInAlts
+ gc_plan = if gcInAlts then GcInAlts alt_regs else NoGcInAlts
; mb_cc <- maybeSaveCostCentre simple_scrut
; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut)
@@ -417,14 +468,14 @@ chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
cgAlts :: GcPlan -> NonVoid Id -> AltType -> [StgAlt] -> FCode ()
-- At this point the result of the case are in the binders
cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)]
- = maybeAltHeapCheck gc_plan (cgExpr rhs)
+ = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs)
cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)]
- = maybeAltHeapCheck gc_plan (cgExpr rhs)
+ = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs)
-- Here bndrs are *already* in scope, so don't rebind them
cgAlts gc_plan bndr (PrimAlt _) alts
- = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
+ = do { tagged_cmms <- cgAltRhss gc_plan Nothing bndr alts
; let bndr_reg = CmmLocal (idToReg bndr)
(DEFAULT,deflt) = head tagged_cmms
@@ -433,20 +484,17 @@ cgAlts gc_plan bndr (PrimAlt _) alts
tagged_cmms' = [(lit,code)
| (LitAlt lit, code) <- tagged_cmms]
- ; emit (mkCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt) }
+ ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt }
cgAlts gc_plan bndr (AlgAlt tycon) alts
- = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
-
+ = do { retry_lbl <- newLabelC
+ ; emitLabel retry_lbl -- Note [alg-alt heap checks]
+
+ ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan (Just retry_lbl)
+ bndr alts
+
; let fam_sz = tyConFamilySize tycon
bndr_reg = CmmLocal (idToReg bndr)
- mb_deflt = case tagged_cmms of
- ((DEFAULT,rhs) : _) -> Just rhs
- _other -> Nothing
- -- DEFAULT is always first, if present
-
- branches = [ (dataConTagZ con, cmm)
- | (DataAlt con, cmm) <- tagged_cmms ]
-- Is the constructor tag in the node reg?
; if isSmallFamily fam_sz
@@ -467,23 +515,68 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts
cgAlts _ _ _ _ = panic "cgAlts"
-- UbxTupAlt and PolyAlt have only one alternative
+
+-- Note [alg-alt heap check]
+--
+-- In an algebraic case with more than one alternative, we will have
+-- code like
+--
+-- L0:
+-- x = R1
+-- goto L1
+-- L1:
+-- if (x & 7 >= 2) then goto L2 else goto L3
+-- L2:
+-- Hp = Hp + 16
+-- if (Hp > HpLim) then goto L4
+-- ...
+-- L4:
+-- call gc() returns to L5
+-- L5:
+-- x = R1
+-- goto L1
+
-------------------
-cgAltRhss :: GcPlan -> NonVoid Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)]
-cgAltRhss gc_plan bndr alts
+cgAlgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt]
+ -> FCode ( Maybe CmmAGraph
+ , [(ConTagZ, CmmAGraph)] )
+cgAlgAltRhss gc_plan retry_lbl bndr alts
+ = do { tagged_cmms <- cgAltRhss gc_plan retry_lbl bndr alts
+
+ ; let { mb_deflt = case tagged_cmms of
+ ((DEFAULT,rhs) : _) -> Just rhs
+ _other -> Nothing
+ -- DEFAULT is always first, if present
+
+ ; branches = [ (dataConTagZ con, cmm)
+ | (DataAlt con, cmm) <- tagged_cmms ]
+ }
+
+ ; return (mb_deflt, branches)
+ }
+
+
+-------------------
+cgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt]
+ -> FCode [(AltCon, CmmAGraph)]
+cgAltRhss gc_plan retry_lbl bndr alts
= forkAlts (map cg_alt alts)
where
base_reg = idToReg bndr
cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph)
cg_alt (con, bndrs, _uses, rhs)
= getCodeR $
- maybeAltHeapCheck gc_plan $
+ maybeAltHeapCheck gc_plan retry_lbl $
do { _ <- bindConArgs con base_reg bndrs
; cgExpr rhs
; return con }
-maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a
-maybeAltHeapCheck NoGcInAlts code = code
-maybeAltHeapCheck (GcInAlts regs _) code = altHeapCheck regs code
+maybeAltHeapCheck :: GcPlan -> Maybe BlockId -> FCode a -> FCode a
+maybeAltHeapCheck NoGcInAlts mlbl code = code
+maybeAltHeapCheck (GcInAlts regs) mlbl code =
+ case mlbl of
+ Nothing -> altHeapCheck regs code
+ Just retry_lbl -> altHeapCheckReturnsTo regs retry_lbl code
-----------------------------------------------------------------------------
-- Tail calls
@@ -517,8 +610,8 @@ cgIdApp fun_id args
cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode ()
cgLneJump blk_id lne_regs args -- Join point; discard sequel
= do { cmm_args <- getNonVoidArgAmodes args
- ; emit (mkMultiAssign lne_regs cmm_args
- <*> mkBranch blk_id) }
+ ; emitMultiAssign lne_regs cmm_args
+ ; emit (mkBranch blk_id) }
cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode ()
cgTailCall fun_id fun_info args = do
@@ -529,27 +622,21 @@ cgTailCall fun_id fun_info args = do
ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
EnterIt -> ASSERT( null args ) -- Discarding arguments
- do { let fun' = CmmLoad fun (cmmExprType fun)
- ; [ret,call] <- forkAlts [
- getCode $ emitReturn [fun], -- Is tagged; no need to untag
- getCode $ do -- emit (mkAssign nodeReg fun)
- emitCall (NativeNodeCall, NativeReturn)
- (entryCode fun') [fun]] -- Not tagged
- ; emit (mkCmmIfThenElse (cmmIsTagged fun) ret call) }
-
- SlowCall -> do -- A slow function call via the RTS apply routines
+ emitEnter fun
+
+ SlowCall -> do -- A slow function call via the RTS apply routines
{ tickySlowCall lf_info args
- ; emit $ mkComment $ mkFastString "slowCall"
+ ; emitComment $ mkFastString "slowCall"
; slowCall fun args }
-- A direct function call (possibly with some left-over arguments)
DirectEntry lbl arity -> do
{ tickyDirectCall arity args
; if node_points then
- do emit $ mkComment $ mkFastString "directEntry"
- emit (mkAssign nodeReg fun)
+ do emitComment $ mkFastString "directEntry"
+ emitAssign nodeReg fun
directCall lbl arity args
- else do emit $ mkComment $ mkFastString "directEntry else"
+ else do emitComment $ mkFastString "directEntry else"
directCall lbl arity args }
JumpToIt {} -> panic "cgTailCall" -- ???
@@ -561,33 +648,67 @@ cgTailCall fun_id fun_info args = do
node_points = nodeMustPointToIt lf_info
-{- Note [case on Bool]
- ~~~~~~~~~~~~~~~~~~~
-A case on a Boolean value does two things:
- 1. It looks up the Boolean in a closure table and assigns the
- result to the binder.
- 2. It branches to the True or False case through analysis
- of the closure assigned to the binder.
-But the indirection through the closure table is unnecessary
-if the assignment to the binder will be dead code (use isDeadBndr).
+emitEnter :: CmmExpr -> FCode ()
+emitEnter fun = do
+ { adjustHpBackwards
+ ; sequel <- getSequel
+ ; updfr_off <- getUpdFrameOff
+ ; case sequel of
+ -- For a return, we have the option of generating a tag-test or
+ -- not. If the value is tagged, we can return directly, which
+ -- is quicker than entering the value. This is a code
+ -- size/speed trade-off: when optimising for speed rather than
+ -- size we could generate the tag test.
+ --
+ -- Right now, we do what the old codegen did, and omit the tag
+ -- test, just generating an enter.
+ Return _ -> do
+ { let entry = entryCode $ closureInfoPtr $ CmmReg nodeReg
+ ; emit $ mkForeignJump NativeNodeCall entry
+ [cmmUntag fun] updfr_off
+ }
+
+ -- The result will be scrutinised in the sequel. This is where
+ -- we generate a tag-test to avoid entering the closure if
+ -- possible.
+ --
+ -- The generated code will be something like this:
+ --
+ -- R1 = fun -- copyout
+ -- if (fun & 7 != 0) goto Lcall else goto Lret
+ -- Lcall:
+ -- call [fun] returns to Lret
+ -- Lret:
+ -- fun' = R1 -- copyin
+ -- ...
+ --
+ -- Note in particular that the label Lret is used as a
+ -- destination by both the tag-test and the call. This is
+ -- becase Lret will necessarily be a proc-point, and we want to
+ -- ensure that we generate only one proc-point for this
+ -- sequence.
+ --
+ AssignTo res_regs _ -> do
+ { lret <- newLabelC
+ ; lcall <- newLabelC
+ ; let area = Young lret
+ ; let (off, copyin) = copyInOflow NativeReturn area res_regs
+ (outArgs, copyout) = copyOutOflow NativeNodeCall Call area
+ [fun] updfr_off (0,[])
+ -- refer to fun via nodeReg after the copyout, to avoid having
+ -- both live simultaneously; this sometimes enables fun to be
+ -- inlined in the RHS of the R1 assignment.
+ ; let entry = entryCode (closureInfoPtr (CmmReg nodeReg))
+ the_call = toCall entry (Just lret) updfr_off off outArgs
+ ; emit $
+ copyout <*>
+ mkCbranch (cmmIsTagged (CmmReg nodeReg)) lret lcall <*>
+ outOfLine lcall the_call <*>
+ mkLabel lret <*>
+ copyin
+ }
+ }
-The following example illustrates how badly the code turns out:
- STG:
- case <=## [ww_s7Hx y_s7HD] of wild2_sbH8 {
- GHC.Types.False -> <true code> // sbH8 dead
- GHC.Types.True -> <false code> // sbH8 dead
- };
- Cmm:
- _s7HD::F64 = F64[_sbH7::I64 + 7]; // MidAssign
- _ccsW::I64 = %MO_F_Le_W64(_s7Hx::F64, _s7HD::F64); // MidAssign
- // emitReturn // MidComment
- _sbH8::I64 = I64[ghczmprim_GHCziBool_Bool_closure_tbl + (_ccsW::I64 << 3)]; // MidAssign
- _ccsX::I64 = _sbH8::I64 & 7; // MidAssign
- if (_ccsX::I64 >= 2) goto ccsH; else goto ccsI; // LastCondBranch
-
-The assignments to _sbH8 and _ccsX are completely unnecessary.
-Instead, we should branch based on the value of _ccsW.
--}
{- Note [Better Alt Heap Checks]
If two function calls can share a return point, then they will also
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index 5bc0f7af4e..c67e0e0c95 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -22,6 +22,7 @@ import StgCmmEnv
import StgCmmMonad
import StgCmmUtils
import StgCmmClosure
+import StgCmmLayout
import BlockId
import Cmm
@@ -45,15 +46,16 @@ import Control.Monad
-- Code generation for Foreign Calls
-----------------------------------------------------------------------------
-cgForeignCall :: [LocalReg] -- r1,r2 where to put the results
- -> [ForeignHint]
- -> ForeignCall -- the op
+-- | emit code for a foreign call, and return the results to the sequel.
+--
+cgForeignCall :: ForeignCall -- the op
-> [StgArg] -- x,y arguments
+ -> Type -- result type
-> FCode ()
--- Emits code for an unsafe foreign call: r1, r2 = foo( x, y, z )
-cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_args
+cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
= do { cmm_args <- getFCallArgs stg_args
+ ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
; let ((call_args, arg_hints), cmm_target)
= case target of
StaticTarget _ _ False ->
@@ -63,7 +65,7 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
= case mPkgId of
Nothing -> ForeignLabelInThisPackage
Just pkgId -> ForeignLabelInPackage pkgId
- size = call_size cmm_args
+ size = call_size cmm_args
in ( unzip cmm_args
, CmmLit (CmmLabel
(mkForeignLabel lbl size labelSource IsFunction)))
@@ -71,13 +73,31 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
DynamicTarget -> case cmm_args of
(fn,_):rest -> (unzip rest, fn)
[] -> panic "cgForeignCall []"
- fc = ForeignConvention cconv arg_hints result_hints
+ fc = ForeignConvention cconv arg_hints res_hints
call_target = ForeignTarget cmm_target fc
- ; srt <- getSRTInfo NoSRT -- SLPJ: Not sure what SRT
- -- is right here
- -- JD: Does it matter in the new codegen?
- ; emitForeignCall safety results call_target call_args srt CmmMayReturn }
+ -- we want to emit code for the call, and then emitReturn.
+ -- However, if the sequel is AssignTo, we shortcut a little
+ -- and generate a foreign call that assigns the results
+ -- directly. Otherwise we end up generating a bunch of
+ -- useless "r = r" assignments, which are not merely annoying:
+ -- they prevent the common block elimination from working correctly
+ -- in the case of a safe foreign call.
+ -- See Note [safe foreign call convention]
+ --
+ ; sequel <- getSequel
+ ; case sequel of
+ AssignTo assign_to_these _ ->
+ do { emitForeignCall safety assign_to_these call_target
+ call_args CmmMayReturn
+ }
+
+ _something_else ->
+ do { emitForeignCall safety res_regs call_target
+ call_args CmmMayReturn
+ ; emitReturn (map (CmmReg . CmmLocal) res_regs)
+ }
+ }
where
-- in the stdcall calling convention, the symbol needs @size appended
-- to it, where size is the total number of bytes of arguments. We
@@ -88,16 +108,83 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
| otherwise = Nothing
-- ToDo: this might not be correct for 64-bit API
- arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) wORD_SIZE
+ arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg)
+ wORD_SIZE
+
+{- Note [safe foreign call convention]
+
+The simple thing to do for a safe foreign call would be the same as an
+unsafe one: just
+
+ emitForeignCall ...
+ emitReturn ...
+
+but consider what happens in this case
+
+ case foo x y z of
+ (# s, r #) -> ...
+
+The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
+as the result reg, and we generate
+
+ r = foo(x,y,z) returns to L1 -- emitForeignCall
+ L1:
+ r = r -- emitReturn
+ goto L2
+L2:
+ ...
+
+Now L1 is a proc point (by definition, it is the continuation of the
+safe foreign call). If L2 does a heap check, then L2 will also be a
+proc point.
+
+Furthermore, the stack layout algorithm has to arrange to save r
+somewhere between the call and the jump to L1, which is annoying: we
+would have to treat r differently from the other live variables, which
+have to be saved *before* the call.
+
+So we adopt a special convention for safe foreign calls: the results
+are copied out according to the NativeReturn convention by the call,
+and the continuation of the call should copyIn the results. (The
+copyOut code is actually inserted when the safe foreign call is
+lowered later). The result regs attached to the safe foreign call are
+only used temporarily to hold the results before they are copied out.
+
+We will now generate this:
+
+ r = foo(x,y,z) returns to L1
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ goto L2
+ L2:
+ ... r ...
+
+And when the safe foreign call is lowered later (see Note [lower safe
+foreign calls]) we get this:
+
+ suspendThread()
+ r = foo(x,y,z)
+ resumeThread()
+ R1 = r -- copyOut, inserted by lowerSafeForeignCall
+ jump L1
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ goto L2
+ L2:
+ ... r ...
+
+Now consider what happens if L2 does a heap check: the Adams
+optimisation kicks in and commons up L1 with the heap-check
+continuation, resulting in just one proc point instead of two. Yay!
+-}
+
emitCCall :: [(CmmFormal,ForeignHint)]
-> CmmExpr
-> [(CmmActual,ForeignHint)]
-> FCode ()
emitCCall hinted_results fn hinted_args
- = emitForeignCall PlayRisky results target args
- NoC_SRT -- No SRT b/c we PlayRisky
- CmmMayReturn
+ = emitForeignCall PlayRisky results target args CmmMayReturn
where
(args, arg_hints) = unzip hinted_args
(results, result_hints) = unzip hinted_results
@@ -107,7 +194,7 @@ emitCCall hinted_results fn hinted_args
emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode ()
emitPrimCall res op args
- = emitForeignCall PlayRisky res (PrimTarget op) args NoC_SRT CmmMayReturn
+ = emitForeignCall PlayRisky res (PrimTarget op) args CmmMayReturn
-- alternative entry point, used by CmmParse
emitForeignCall
@@ -115,11 +202,10 @@ emitForeignCall
-> [CmmFormal] -- where to put the results
-> ForeignTarget -- the op
-> [CmmActual] -- arguments
- -> C_SRT -- the SRT of the calls continuation
-> CmmReturnInfo -- This can say "never returns"
-- only RTS procedures do this
-> FCode ()
-emitForeignCall safety results target args _srt _ret
+emitForeignCall safety results target args _ret
| not (playSafe safety) = do
let (caller_save, caller_load) = callerSaveVolatileRegs
emit caller_save
@@ -129,7 +215,9 @@ emitForeignCall safety results target args _srt _ret
| otherwise = do
updfr_off <- getUpdFrameOff
temp_target <- load_target_into_temp target
- emit $ mkSafeCall temp_target results args updfr_off (playInterruptible safety)
+ emit =<< mkSafeCall temp_target results args updfr_off
+ (playInterruptible safety)
+
{-
@@ -162,7 +250,7 @@ maybe_assign_temp e
-- expressions, which are wrong here.
-- this is a NonPtr because it only duplicates an existing
reg <- newTemp (cmmExprType e) --TODO FIXME NOW
- emit (mkAssign (CmmLocal reg) e)
+ emitAssign (CmmLocal reg) e
return (CmmReg (CmmLocal reg))
-- -----------------------------------------------------------------------------
@@ -184,12 +272,12 @@ saveThreadState =
emitSaveThreadState :: BlockId -> FCode ()
emitSaveThreadState bid = do
-- CurrentTSO->stackobj->sp = Sp;
- emit $ mkStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) stack_SP)
- (CmmStackSlot (CallArea (Young bid)) (widthInBytes (typeWidth gcWord)))
+ emitStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) stack_SP)
+ (CmmStackSlot (Young bid) (widthInBytes (typeWidth gcWord)))
emit closeNursery
-- and save the current cost centre stack in the TSO when profiling:
when opt_SccProfilingOn $
- emit (mkStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS)
+ emitStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS
-- CurrentNursery->free = Hp+1;
closeNursery :: CmmAGraph
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index 25161722f7..856b04367d 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -10,7 +10,7 @@ module StgCmmHeap (
getVirtHp, setVirtHp, setRealHp,
getHpRelOffset, hpRel,
- entryHeapCheck, altHeapCheck,
+ entryHeapCheck, altHeapCheck, altHeapCheckReturnsTo,
mkVirtHeapOffsets, mkVirtConstrOffsets,
mkStaticClosureFields, mkStaticClosure,
@@ -20,7 +20,6 @@ module StgCmmHeap (
#include "HsVersions.h"
-import CmmType
import StgSyn
import CLabel
import StgCmmLayout
@@ -34,6 +33,7 @@ import StgCmmEnv
import MkGraph
+import Hoopl hiding ((<*>), mkBranch)
import SMRep
import Cmm
import CmmUtils
@@ -109,7 +109,7 @@ allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets
-- ALLOCATE THE OBJECT
; base <- getHpRelOffset info_offset
- ; emit (mkComment $ mkFastString "allocDynClosure")
+ ; emitComment $ mkFastString "allocDynClosure"
; emitSetDynHdr base info_ptr use_cc
; let (cmm_args, offsets) = unzip amodes_w_offsets
; hpStore base cmm_args offsets
@@ -151,9 +151,10 @@ mkStaticClosureFields
:: CmmInfoTable
-> CostCentreStack
-> CafInfo
+ -> Bool -- SRT is non-empty?
-> [CmmLit] -- Payload
-> [CmmLit] -- The full closure
-mkStaticClosureFields info_tbl ccs caf_refs payload
+mkStaticClosureFields info_tbl ccs caf_refs has_srt payload
= mkStaticClosure info_lbl ccs payload padding
static_link_field saved_info_field
where
@@ -178,8 +179,10 @@ mkStaticClosureFields info_tbl ccs caf_refs payload
| otherwise = ASSERT(null payload) [mkIntCLit 0]
static_link_field
- | is_caf || staticClosureNeedsLink info_tbl = [static_link_value]
- | otherwise = []
+ | is_caf || staticClosureNeedsLink has_srt info_tbl
+ = [static_link_value]
+ | otherwise
+ = []
saved_info_field
| is_caf = [mkIntCLit 0]
@@ -335,11 +338,12 @@ entryHeapCheck cl_info offset nodeSet arity args code
args' = map (CmmReg . CmmLocal) args
setN = case nodeSet of
- Just n -> mkAssign nodeReg (CmmReg $ CmmLocal n)
+ Just n -> mkNop -- No need to assign R1, it already
+ -- points to the closure
Nothing -> mkAssign nodeReg $
CmmLit (CmmLabel $ staticClosureLabel cl_info)
- {- Thunks: Set R1 = node, jump GCEnter1
+ {- Thunks: jump GCEnter1
Function (fast): Set R1 = node, jump GCFun
Function (slow): Set R1 = node, call generic_gc -}
gc_call upd = setN <*> gc_lbl upd
@@ -354,7 +358,10 @@ entryHeapCheck cl_info offset nodeSet arity args code
- GC calls, but until then this fishy code works -}
updfr_sz <- getUpdFrameOff
- heapCheck True (gc_call updfr_sz) code
+
+ loop_id <- newLabelC
+ emitLabel loop_id
+ heapCheck True (gc_call updfr_sz <*> mkBranch loop_id) code
{-
-- This code is slightly outdated now and we could easily keep the above
@@ -400,21 +407,29 @@ entryHeapCheck cl_info offset nodeSet arity args code
-}
---------------------------------------------------------------
--- A heap/stack check at in a case alternative
+-- ------------------------------------------------------------
+-- A heap/stack check in a case alternative
altHeapCheck :: [LocalReg] -> FCode a -> FCode a
altHeapCheck regs code
+ = do loop_id <- newLabelC
+ emitLabel loop_id
+ altHeapCheckReturnsTo regs loop_id code
+
+altHeapCheckReturnsTo :: [LocalReg] -> Label -> FCode a -> FCode a
+altHeapCheckReturnsTo regs retry_lbl code
= do updfr_sz <- getUpdFrameOff
- heapCheck False (gc_call updfr_sz) code
+ gc_call_code <- gc_call updfr_sz
+ heapCheck False (gc_call_code <*> mkBranch retry_lbl) code
where
reg_exprs = map (CmmReg . CmmLocal) regs
+ -- Note [stg_gc arguments]
gc_call sp =
case rts_label regs of
- Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp
- Nothing -> mkCall generic_gc (GC, GC) [] [] sp
+ Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp (0,[])
+ Nothing -> mkCall generic_gc (GC, GC) [] [] sp (0,[])
rts_label [reg]
| isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1")
@@ -432,6 +447,23 @@ altHeapCheck regs code
rts_label _ = Nothing
+-- Note [stg_gc arguments]
+-- It might seem that we could avoid passing the arguments to the
+-- stg_gc function, because they are already in the right registers.
+-- While this is usually the case, it isn't always. Sometimes the
+-- code generator has cleverly avoided the eval in a case, e.g. in
+-- ffi/should_run/4221.hs we found
+--
+-- case a_r1mb of z
+-- FunPtr x y -> ...
+--
+-- where a_r1mb is bound a top-level constructor, and is known to be
+-- evaluated. The codegen just assigns x, y and z, and continues;
+-- R1 is never assigned.
+--
+-- So we'll have to rely on optimisations to eliminatethese
+-- assignments where possible.
+
-- | The generic GC procedure; no params, no results
generic_gc :: CmmExpr
@@ -447,7 +479,7 @@ heapCheck checkStack do_gc code
= getHeapUsage $ \ hpHw ->
-- Emit heap checks, but be sure to do it lazily so
-- that the conditionals on hpHw don't cause a black hole
- do { emit $ do_checks checkStack hpHw do_gc
+ do { codeOnly $ do_checks checkStack hpHw do_gc
; tickyAllocHeap hpHw
; doGranAllocate hpHw
; setRealHp hpHw
@@ -456,22 +488,25 @@ heapCheck checkStack do_gc code
do_checks :: Bool -- Should we check the stack?
-> WordOff -- Heap headroom
-> CmmAGraph -- What to do on failure
- -> CmmAGraph
-do_checks checkStack alloc do_gc
- = withFreshLabel "gc" $ \ loop_id ->
- withFreshLabel "gc" $ \ gc_id ->
- mkLabel loop_id
- <*> (let hpCheck = if alloc == 0 then mkNop
- else mkAssign hpReg bump_hp <*>
- mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
- in if checkStack
- then mkCmmIfThenElse sp_oflo (mkBranch gc_id) hpCheck
- else hpCheck)
- <*> mkComment (mkFastString "outOfLine should follow:")
- <*> outOfLine (mkLabel gc_id
- <*> mkComment (mkFastString "outOfLine here")
- <*> do_gc
- <*> mkBranch loop_id)
+ -> FCode ()
+do_checks checkStack alloc do_gc = do
+ gc_id <- newLabelC
+ hp_check <- if alloc == 0
+ then return mkNop
+ else do
+ ifthen <- mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id)
+ return (mkAssign hpReg bump_hp <*> ifthen)
+
+ if checkStack
+ then emit =<< mkCmmIfThenElse sp_oflo (mkBranch gc_id) hp_check
+ else emit hp_check
+
+ emit $ mkComment (mkFastString "outOfLine should follow:")
+
+ emitOutOfLine gc_id $
+ mkComment (mkFastString "outOfLine here") <*>
+ do_gc -- this is expected to jump back somewhere
+
-- Test for stack pointer exhaustion, then
-- bump heap pointer, and test for heap exhaustion
-- Note that we don't move the heap pointer unless the
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 86986efdfa..9593af1f50 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -15,7 +15,7 @@
module StgCmmLayout (
mkArgDescr,
- emitCall, emitReturn,
+ emitCall, emitReturn, adjustHpBackwards,
emitClosureProcAndInfoTable,
emitClosureAndInfoTable,
@@ -41,10 +41,12 @@ import StgCmmEnv
import StgCmmTicky
import StgCmmMonad
import StgCmmUtils
+import StgCmmProf
import MkGraph
import SMRep
import Cmm
+import CmmUtils
import CLabel
import StgSyn
import Id
@@ -52,6 +54,7 @@ import Name
import TyCon ( PrimRep(..) )
import BasicTypes ( RepArity )
import StaticFlags
+import Module
import Constants
import Util
@@ -63,38 +66,60 @@ import FastString
-- Call and return sequences
------------------------------------------------------------------------
-emitReturn :: [CmmExpr] -> FCode ()
--- Return multiple values to the sequel
+-- | Return multiple values to the sequel
+--
+-- If the sequel is @Return@
+--
+-- > return (x,y)
+--
+-- If the sequel is @AssignTo [p,q]@
+--
+-- > p=x; q=y;
--
--- If the sequel is Return
--- return (x,y)
--- If the sequel is AssignTo [p,q]
--- p=x; q=y;
+emitReturn :: [CmmExpr] -> FCode ()
emitReturn results
= do { sequel <- getSequel;
; updfr_off <- getUpdFrameOff
- ; emit $ mkComment $ mkFastString ("emitReturn: " ++ show sequel)
+ ; emitComment $ mkFastString ("emitReturn: " ++ show sequel)
; case sequel of
Return _ ->
do { adjustHpBackwards
; emit (mkReturnSimple results updfr_off) }
AssignTo regs adjust ->
do { if adjust then adjustHpBackwards else return ()
- ; emit (mkMultiAssign regs results) }
+ ; emitMultiAssign regs results }
}
+
+-- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
+-- using the call/return convention @conv@, passing @args@, and
+-- returning the results to the current sequel.
+--
emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ()
--- (cgCall fun args) makes a call to the entry-code of 'fun',
--- passing 'args', and returning the results to the current sequel
-emitCall convs@(callConv, _) fun args
+emitCall convs fun args
+ = emitCallWithExtraStack convs fun args noExtraStack
+
+
+-- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
+-- entry-code of @fun@, using the call/return convention @conv@,
+-- passing @args@, pushing some extra stack frames described by
+-- @stack@, and returning the results to the current sequel.
+--
+emitCallWithExtraStack
+ :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
+ -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode ()
+emitCallWithExtraStack convs@(callConv, _) fun args extra_stack
= do { adjustHpBackwards
; sequel <- getSequel
; updfr_off <- getUpdFrameOff
- ; emit $ mkComment $ mkFastString ("emitCall: " ++ show sequel)
- ; case sequel of
- Return _ -> emit (mkForeignJump callConv fun args updfr_off)
- AssignTo res_regs _ -> emit (mkCall fun convs res_regs args updfr_off)
- }
+ ; emitComment $ mkFastString ("emitCallWithExtraStack: " ++ show sequel)
+ ; case sequel of
+ Return _ ->
+ emit $ mkForeignJumpExtra callConv fun args updfr_off extra_stack
+ AssignTo res_regs _ -> do
+ emit =<< mkCall fun convs res_regs args updfr_off extra_stack
+ }
+
adjustHpBackwards :: FCode ()
-- This function adjusts and heap pointers just before a tail call or
@@ -127,59 +152,137 @@ adjustHpBackwards
-- Making calls: directCall and slowCall
-------------------------------------------------------------------------
+-- General plan is:
+-- - we'll make *one* fast call, either to the function itself
+-- (directCall) or to stg_ap_<pat>_fast (slowCall)
+-- Any left-over arguments will be pushed on the stack,
+--
+-- e.g. Sp[old+8] = arg1
+-- Sp[old+16] = arg2
+-- Sp[old+32] = stg_ap_pp_info
+-- R2 = arg3
+-- R3 = arg4
+-- call f() return to Nothing updfr_off: 32
+
+
directCall :: CLabel -> RepArity -> [StgArg] -> FCode ()
-- (directCall f n args)
-- calls f(arg1, ..., argn), and applies the result to the remaining args
-- The function f has arity n, and there are guaranteed at least n args
-- Both arity and args include void args
directCall lbl arity stg_args
- = do { cmm_args <- getNonVoidArgAmodes stg_args
- ; direct_call "directCall" lbl arity cmm_args (argsReps stg_args) }
+ = do { argreps <- getArgRepsAmodes stg_args
+ ; direct_call "directCall" lbl arity argreps }
+
slowCall :: CmmExpr -> [StgArg] -> FCode ()
-- (slowCall fun args) applies fun to args, returning the results to Sequel
slowCall fun stg_args
- = do { cmm_args <- getNonVoidArgAmodes stg_args
- ; slow_call fun cmm_args (argsReps stg_args) }
+ = do { dflags <- getDynFlags
+ ; argsreps <- getArgRepsAmodes stg_args
+ ; let (rts_fun, arity) = slowCallPattern (map fst argsreps)
+ ; call <- getCode $ direct_call "slow_call"
+ (mkRtsApFastLabel rts_fun) arity argsreps
+ ; emitComment $ mkFastString ("slow_call for " ++
+ showSDoc dflags (ppr fun) ++
+ " with pat " ++ unpackFS rts_fun)
+ ; emit (mkAssign nodeReg fun <*> call)
+ }
+
--------------
-direct_call :: String -> CLabel -> RepArity -> [CmmExpr] -> [ArgRep] -> FCode ()
--- NB1: (length args) may be less than (length reps), because
--- the args exclude the void ones
--- NB2: 'arity' refers to the *reps*
-direct_call caller lbl arity args reps
- | debugIsOn && arity > length reps -- Too few args
+direct_call :: String -> CLabel -> RepArity -> [(ArgRep,Maybe CmmExpr)] -> FCode ()
+direct_call caller lbl arity args
+ | debugIsOn && arity > length args -- Too few args
= do -- Caller should ensure that there enough args!
- pprPanic "direct_call" (text caller <+> ppr arity
- <+> ppr lbl <+> ppr (length reps)
- <+> ppr args <+> ppr reps )
-
- | null rest_reps -- Precisely the right number of arguments
- = emitCall (NativeDirectCall, NativeReturn) target args
-
- | otherwise -- Over-saturated call
- = ASSERT( arity == length initial_reps )
- do { pap_id <- newTemp gcWord
- ; withSequel (AssignTo [pap_id] True)
- (emitCall (NativeDirectCall, NativeReturn) target fast_args)
- ; slow_call (CmmReg (CmmLocal pap_id))
- rest_args rest_reps }
+ pprPanic "direct_call" $
+ text caller <+> ppr arity <+>
+ ppr lbl <+> ppr (length args) <+>
+ ppr (map snd args) <+> ppr (map fst args)
+
+ | null rest_args -- Precisely the right number of arguments
+ = emitCall (NativeDirectCall, NativeReturn) target (nonVArgs args)
+
+ | otherwise -- Note [over-saturated calls]
+ = emitCallWithExtraStack (NativeDirectCall, NativeReturn)
+ target (nonVArgs fast_args) (mkStkOffsets stack_args)
where
target = CmmLit (CmmLabel lbl)
- (initial_reps, rest_reps) = splitAt arity reps
- arg_arity = count isNonV initial_reps
- (fast_args, rest_args) = splitAt arg_arity args
+ (fast_args, rest_args) = splitAt arity args
+ stack_args = slowArgs rest_args
---------------
-slow_call :: CmmExpr -> [CmmExpr] -> [ArgRep] -> FCode ()
-slow_call fun args reps
- = do dflags <- getDynFlags
- call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity args reps
- emit $ mkComment $ mkFastString ("slow_call for " ++ showSDoc dflags (ppr fun) ++
- " with pat " ++ unpackFS rts_fun)
- emit (mkAssign nodeReg fun <*> call)
+
+-- When constructing calls, it is easier to keep the ArgReps and the
+-- CmmExprs zipped together. However, a void argument has no
+-- representation, so we need to use Maybe CmmExpr (the alternative of
+-- using zeroCLit or even undefined would work, but would be ugly).
+--
+getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
+getArgRepsAmodes = mapM getArgRepAmode
+ where getArgRepAmode arg
+ | V <- rep = return (V, Nothing)
+ | otherwise = do expr <- getArgAmode (NonVoid arg)
+ return (rep, Just expr)
+ where rep = toArgRep (argPrimRep arg)
+
+nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
+nonVArgs [] = []
+nonVArgs ((_,Nothing) : args) = nonVArgs args
+nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
+
+{-
+Note [over-saturated calls]
+
+The natural thing to do for an over-saturated call would be to call
+the function with the correct number of arguments, and then apply the
+remaining arguments to the value returned, e.g.
+
+ f a b c d (where f has arity 2)
+ -->
+ r = call f(a,b)
+ call r(c,d)
+
+but this entails
+ - saving c and d on the stack
+ - making a continuation info table
+ - at the continuation, loading c and d off the stack into regs
+ - finally, call r
+
+Note that since there are a fixed number of different r's
+(e.g. stg_ap_pp_fast), we can also pre-compile continuations
+that correspond to each of them, rather than generating a fresh
+one for each over-saturated call.
+
+Not only does this generate much less code, it is faster too. We will
+generate something like:
+
+Sp[old+16] = c
+Sp[old+24] = d
+Sp[old+32] = stg_ap_pp_info
+call f(a,b) -- usual calling convention
+
+For the purposes of the CmmCall node, we count this extra stack as
+just more arguments that we are passing on the stack (cml_args).
+-}
+
+-- | 'slowArgs' takes a list of function arguments and prepares them for
+-- pushing on the stack for "extra" arguments to a function which requires
+-- fewer arguments than we currently have.
+slowArgs :: [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
+slowArgs [] = []
+slowArgs args -- careful: reps contains voids (V), but args does not
+ | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest_args
+ | otherwise = this_pat ++ slowArgs rest_args
where
- (rts_fun, arity) = slowCallPattern reps
+ (arg_pat, n) = slowCallPattern (map fst args)
+ (call_args, rest_args) = splitAt n args
+
+ stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
+ this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args
+ save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)]
+ save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
+
+
-- These cases were found to cover about 99% of all slow calls:
slowCallPattern :: [ArgRep] -> (FastString, RepArity)
@@ -202,6 +305,30 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0)
-------------------------------------------------------------------------
+-- Fix the byte-offsets of a bunch of things to push on the stack
+
+-- This is used for pushing slow-call continuations.
+-- See Note [over-saturated calls].
+
+mkStkOffsets
+ :: [(ArgRep, Maybe CmmExpr)] -- things to make offsets for
+ -> ( ByteOff -- OUTPUTS: Topmost allocated word
+ , [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out)
+mkStkOffsets things
+ = loop 0 [] (reverse things)
+ where
+ loop offset offs [] = (offset,offs)
+ loop offset offs ((_,Nothing):things) = loop offset offs things
+ -- ignore Void arguments
+ loop offset offs ((rep,Just thing):things)
+ = loop thing_off ((thing, thing_off):offs) things
+ where
+ thing_off = offset + argRepSizeW rep * wORD_SIZE
+ -- offset of thing is offset+size, because we're
+ -- growing the stack *downwards* as the offsets increase.
+
+
+-------------------------------------------------------------------------
-- Classifying arguments: ArgRep
-------------------------------------------------------------------------
@@ -237,10 +364,7 @@ isNonV :: ArgRep -> Bool
isNonV V = False
isNonV _ = True
-argsReps :: [StgArg] -> [ArgRep]
-argsReps = map (toArgRep . argPrimRep)
-
-argRepSizeW :: ArgRep -> WordOff -- Size in words
+argRepSizeW :: ArgRep -> WordOff -- Size in words
argRepSizeW N = 1
argRepSizeW P = 1
argRepSizeW F = 1
diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs
index 4eea38e22c..cc9919a4a0 100644
--- a/compiler/codeGen/StgCmmMonad.hs
+++ b/compiler/codeGen/StgCmmMonad.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE GADTs #-}
-----------------------------------------------------------------------------
--
-- Monad for Stg to C-- code generation
@@ -16,16 +17,21 @@
module StgCmmMonad (
FCode, -- type
- initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs,
+ initC, runC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs,
returnFC, fixC, fixC_, nopC, whenC,
newUnique, newUniqSupply,
+ newLabelC, emitLabel,
+
emit, emitDecl, emitProc, emitProcWithConvention, emitSimpleProc,
+ emitOutOfLine, emitAssign, emitStore, emitComment,
getCmm, cgStmtsToBlocks,
getCodeR, getCode, getHeapUsage,
- forkClosureBody, forkStatics, forkAlts, forkProc, codeOnly,
+ mkCmmIfThenElse, mkCmmIfThen, mkCall, mkCmmCall, mkSafeCall,
+
+ forkClosureBody, forkStatics, forkAlts, forkProc, codeOnly,
ConTagZ,
@@ -69,12 +75,12 @@ import VarEnv
import OrdList
import Unique
import UniqSupply
-import FastString(sLit)
+import FastString
import Outputable
import Control.Monad
import Data.List
-import Prelude hiding( sequence )
+import Prelude hiding( sequence, succ )
import qualified Prelude( sequence )
infixr 9 `thenC` -- Right-associative!
@@ -95,12 +101,12 @@ instance Monad FCode where
{-# INLINE thenFC #-}
{-# INLINE returnFC #-}
-initC :: DynFlags -> Module -> FCode a -> IO a
-initC dflags mod (FCode code)
- = do { uniqs <- mkSplitUniqSupply 'c'
- ; case code (initCgInfoDown dflags mod) (initCgState uniqs) of
- (res, _) -> return res
- }
+initC :: IO CgState
+initC = do { uniqs <- mkSplitUniqSupply 'c'
+ ; return (initCgState uniqs) }
+
+runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState)
+runC dflags mod st (FCode code) = code (initCgInfoDown dflags mod) st
returnFC :: a -> FCode a
returnFC val = FCode (\_info_down state -> (val, state))
@@ -270,6 +276,8 @@ data HeapUsage =
type VirtualHpOffset = WordOff
+
+
initCgState :: UniqSupply -> CgState
initCgState uniqs
= MkCgState { cgs_stmts = mkNop, cgs_tops = nilOL,
@@ -308,7 +316,6 @@ initHpUsage = HeapUsage { virtHp = 0, realHp = 0 }
maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage
hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw }
-
--------------------------------------------------------
-- Operators for getting and setting the state and "info_down".
--------------------------------------------------------
@@ -591,6 +598,33 @@ getHeapUsage fcode
-- ----------------------------------------------------------------------------
-- Combinators for emitting code
+emitCgStmt :: CgStmt -> FCode ()
+emitCgStmt stmt
+ = do { state <- getState
+ ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt }
+ }
+
+emitLabel :: BlockId -> FCode ()
+emitLabel id = emitCgStmt (CgLabel id)
+
+emitComment :: FastString -> FCode ()
+#if 0 /* def DEBUG */
+emitComment s = emitCgStmt (CgStmt (CmmComment s))
+#else
+emitComment _ = return ()
+#endif
+
+emitAssign :: CmmReg -> CmmExpr -> FCode ()
+emitAssign l r = emitCgStmt (CgStmt (CmmAssign l r))
+
+emitStore :: CmmExpr -> CmmExpr -> FCode ()
+emitStore l r = emitCgStmt (CgStmt (CmmStore l r))
+
+
+newLabelC :: FCode BlockId
+newLabelC = do { u <- newUnique
+ ; return $ mkBlockId u }
+
emit :: CmmAGraph -> FCode ()
emit ag
= do { state <- getState
@@ -601,6 +635,9 @@ emitDecl decl
= do { state <- getState
; setState $ state { cgs_tops = cgs_tops state `snocOL` decl } }
+emitOutOfLine :: BlockId -> CmmAGraph -> FCode ()
+emitOutOfLine l stmts = emitCgStmt (CgFork l stmts)
+
emitProcWithConvention :: Convention -> CmmInfoTable -> CLabel -> [CmmFormal] ->
CmmAGraph -> FCode ()
emitProcWithConvention conv info lbl args blocks
@@ -629,6 +666,55 @@ getCmm code
; setState $ state2 { cgs_tops = cgs_tops state1 }
; return (fromOL (cgs_tops state2)) }
+
+mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> FCode CmmAGraph
+mkCmmIfThenElse e tbranch fbranch = do
+ endif <- newLabelC
+ tid <- newLabelC
+ fid <- newLabelC
+ return $ mkCbranch e tid fid <*>
+ mkLabel tid <*> tbranch <*> mkBranch endif <*>
+ mkLabel fid <*> fbranch <*> mkLabel endif
+
+mkCmmIfThen :: CmmExpr -> CmmAGraph -> FCode CmmAGraph
+mkCmmIfThen e tbranch = do
+ endif <- newLabelC
+ tid <- newLabelC
+ return $ mkCbranch e tid endif <*>
+ mkLabel tid <*> tbranch <*> mkLabel endif
+
+
+mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual]
+ -> UpdFrameOffset -> (ByteOff,[(CmmExpr,ByteOff)]) -> FCode CmmAGraph
+mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do
+ k <- newLabelC
+ let area = Young k
+ (off, copyin) = copyInOflow retConv area results
+ copyout = mkCallReturnsTo f callConv actuals k off updfr_off extra_stack
+ return (copyout <*> mkLabel k <*> copyin)
+
+mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset
+ -> FCode CmmAGraph
+mkCmmCall f results actuals updfr_off
+ = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off (0,[])
+
+
+mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
+ -> UpdFrameOffset -> Bool
+ -> FCode CmmAGraph
+mkSafeCall t fs as upd i = do
+ k <- newLabelC
+ let (_off, copyout) = copyInOflow NativeReturn (Young k) fs
+ -- see Note [safe foreign call convention]
+ return
+ ( mkStore (CmmStackSlot (Young k) (widthInBytes wordWidth))
+ (CmmLit (CmmBlock k))
+ <*> mkLast (CmmForeignCall { tgt=t, res=fs, args=as, succ=k
+ , updfr=upd, intrbl=i })
+ <*> mkLabel k
+ <*> copyout
+ )
+
-- ----------------------------------------------------------------------------
-- CgStmts
@@ -640,4 +726,3 @@ cgStmtsToBlocks :: CmmAGraph -> FCode CmmGraph
cgStmtsToBlocks stmts
= do { us <- newUniqSupply
; return (initUs_ us (lgraphOfAGraph stmts)) }
-
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index efa234b5a6..bd783a3b30 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -14,7 +14,9 @@
-- for details
module StgCmmPrim (
- cgOpApp
+ cgOpApp,
+ cgPrimOp -- internal(ish), used by cgCase to get code for a
+ -- comparison without also turning it into a Bool.
) where
#include "HsVersions.h"
@@ -67,14 +69,9 @@ cgOpApp :: StgOp -- The op
-- Foreign calls
cgOpApp (StgFCallOp fcall _) stg_args res_ty
- = do { (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
- -- Choose result regs r1, r2
- -- Note [Foreign call results]
- ; cgForeignCall res_regs res_hints fcall stg_args
- -- r1, r2 = foo( x, y )
- ; emitReturn (map (CmmReg . CmmLocal) res_regs) }
- -- return (r1, r2)
-
+ = cgForeignCall fcall stg_args res_ty
+ -- Note [Foreign call results]
+
-- tagToEnum# is special: we need to pull the constructor
-- out of the table, and perform an appropriate return.
@@ -229,23 +226,23 @@ emitPrimOp [res] SparkOp [arg]
[(tmp2,NoHint)]
(CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark"))))
[(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)]
- emit (mkAssign (CmmLocal res) (CmmReg (CmmLocal tmp)))
+ emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp))
emitPrimOp [res] GetCCSOfOp [arg]
- = emit (mkAssign (CmmLocal res) val)
+ = emitAssign (CmmLocal res) val
where
val | opt_SccProfilingOn = costCentreFrom (cmmUntag arg)
| otherwise = CmmLit zeroCLit
emitPrimOp [res] GetCurrentCCSOp [_dummy_arg]
- = emit (mkAssign (CmmLocal res) curCCS)
+ = emitAssign (CmmLocal res) curCCS
emitPrimOp [res] ReadMutVarOp [mutv]
- = emit (mkAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord))
+ = emitAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)
emitPrimOp [] WriteMutVarOp [mutv,var]
= do
- emit (mkStore (cmmOffsetW mutv fixedHdrSize) var)
+ emitStore (cmmOffsetW mutv fixedHdrSize) var
emitCCall
[{-no results-}]
(CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
@@ -269,32 +266,32 @@ emitPrimOp res@[] TouchOp args@[_arg]
-- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
emitPrimOp [res] ByteArrayContents_Char [arg]
- = emit (mkAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize))
+ = emitAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize)
-- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
emitPrimOp [res] StableNameToIntOp [arg]
- = emit (mkAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord))
+ = emitAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord)
-- #define eqStableNamezh(r,sn1,sn2) \
-- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
emitPrimOp [res] EqStableNameOp [arg1,arg2]
- = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [
+ = emitAssign (CmmLocal res) (CmmMachOp mo_wordEq [
cmmLoadIndexW arg1 fixedHdrSize bWord,
cmmLoadIndexW arg2 fixedHdrSize bWord
- ]))
+ ])
emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2]
- = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2]))
+ = emitAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2])
-- #define addrToHValuezh(r,a) r=(P_)a
emitPrimOp [res] AddrToAnyOp [arg]
- = emit (mkAssign (CmmLocal res) arg)
+ = emitAssign (CmmLocal res) arg
-- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
-- Note: argument may be tagged!
emitPrimOp [res] DataToTagOp [arg]
- = emit (mkAssign (CmmLocal res) (getConstrTag (cmmUntag arg)))
+ = emitAssign (CmmLocal res) (getConstrTag (cmmUntag arg))
{- Freezing arrays-of-ptrs requires changing an info table, for the
benefit of the generational collector. It needs to scavenge mutable
@@ -317,7 +314,7 @@ emitPrimOp [res] UnsafeFreezeArrayArrayOp [arg]
-- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
emitPrimOp [res] UnsafeFreezeByteArrayOp [arg]
- = emit (mkAssign (CmmLocal res) arg)
+ = emitAssign (CmmLocal res) arg
-- Copying pointer arrays
@@ -497,11 +494,11 @@ emitPrimOp [res] PopCntOp [w] = emitPopCntCall res w wordWidth
-- The rest just translate straightforwardly
emitPrimOp [res] op [arg]
| nopOp op
- = emit (mkAssign (CmmLocal res) arg)
+ = emitAssign (CmmLocal res) arg
| Just (mop,rep) <- narrowOp op
- = emit (mkAssign (CmmLocal res) $
- CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]])
+ = emitAssign (CmmLocal res) $
+ CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]]
emitPrimOp r@[res] op args
| Just prim <- callishOp op
@@ -746,15 +743,15 @@ loadArrPtrsSize addr = CmmLoad (cmmOffsetB addr off) bWord
mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType
-> LocalReg -> CmmExpr -> CmmExpr -> FCode ()
mkBasicIndexedRead off Nothing read_rep res base idx
- = emit (mkAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx))
+ = emitAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx)
mkBasicIndexedRead off (Just cast) read_rep res base idx
- = emit (mkAssign (CmmLocal res) (CmmMachOp cast [
- cmmLoadIndexOffExpr off read_rep base idx]))
+ = emitAssign (CmmLocal res) (CmmMachOp cast [
+ cmmLoadIndexOffExpr off read_rep base idx])
mkBasicIndexedWrite :: ByteOff -> Maybe MachOp
-> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
mkBasicIndexedWrite off Nothing base idx val
- = emit (mkStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val)
+ = emitStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val
mkBasicIndexedWrite off (Just cast) base idx val
= mkBasicIndexedWrite off Nothing base idx (CmmMachOp cast [val])
@@ -805,7 +802,7 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy
getCode $ emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit 1)),
getCode $ emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit 1))
]
- emit $ mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall
+ emit =<< mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall
emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> FCode ())
@@ -875,7 +872,7 @@ doCopyMutableArrayOp = emitCopyArray copy
getCode $ emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE)),
getCode $ emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE))
]
- emit $ mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall
+ emit =<< mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall
emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr
-> FCode ())
diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs
index 6a53317385..9ff4d0be07 100644
--- a/compiler/codeGen/StgCmmProf.hs
+++ b/compiler/codeGen/StgCmmProf.hs
@@ -103,7 +103,7 @@ initUpdFrameProf :: CmmExpr -> FCode ()
-- Initialise the profiling field of an update frame
initUpdFrameProf frame_amode
= ifProfiling $ -- frame->header.prof.ccs = CCCS
- emit (mkStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS)
+ emitStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS
-- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0)
-- is unnecessary because it is not used anyhow.
@@ -143,7 +143,7 @@ saveCurrentCostCentre
= return Nothing
| otherwise
= do { local_cc <- newTemp ccType
- ; emit (mkAssign (CmmLocal local_cc) curCCS)
+ ; emitAssign (CmmLocal local_cc) curCCS
; return (Just local_cc) }
restoreCurrentCostCentre :: Maybe LocalReg -> FCode ()
@@ -338,9 +338,9 @@ ldvEnter cl_ptr
-- if (era > 0) {
-- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
-- era | LDV_STATE_USE }
- emit (mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit])
+ emit =<< mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit])
(mkStore ldv_wd new_ldv_wd)
- mkNop)
+ mkNop
where
-- don't forget to substract node's tag
ldv_wd = ldvWord cl_ptr
diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs
index d0432315ab..698bf32709 100644
--- a/compiler/codeGen/StgCmmTicky.hs
+++ b/compiler/codeGen/StgCmmTicky.hs
@@ -180,7 +180,7 @@ registerTickyCtr :: CLabel -> FCode ()
-- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */
-- f_ct.registeredp = 1 }
registerTickyCtr ctr_lbl
- = emit (mkCmmIfThen test (catAGraphs register_stmts))
+ = emit =<< mkCmmIfThen test (catAGraphs register_stmts)
where
-- krc: code generator doesn't handle Not, so we test for Eq 0 instead
test = CmmMachOp (MO_Eq wordWidth)
@@ -352,7 +352,7 @@ bumpHistogram _lbl _n
bumpHistogramE :: LitString -> CmmExpr -> FCode ()
bumpHistogramE lbl n
= do t <- newTemp cLong
- emit (mkAssign (CmmLocal t) n)
+ emitAssign (CmmLocal t) n
emit (mkCmmIfThen (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight])
(mkAssign (CmmLocal t) eight))
emit (addToMem cLong
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index bb4a653c05..7609cfe38d 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -18,12 +18,11 @@ module StgCmmUtils (
emitDataLits, mkDataLits,
emitRODataLits, mkRODataLits,
emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, emitRtsCallGen,
- assignTemp, newTemp, withTemp,
+ assignTemp, newTemp,
newUnboxedTupleRegs,
- mkMultiAssign, mkCmmSwitch, mkCmmLitSwitch,
- emitSwitch,
+ emitMultiAssign, emitCmmLitSwitch, emitSwitch,
tagToClosure, mkTaggedObjectLoad,
@@ -72,6 +71,7 @@ import Module
import Literal
import Digraph
import ListSetOps
+import VarSet
import Util
import Unique
import DynFlags
@@ -204,14 +204,14 @@ emitRtsCallGen
emitRtsCallGen res pkg fun args _vols safe
= do { updfr_off <- getUpdFrameOff
; emit caller_save
- ; emit $ call updfr_off
+ ; call updfr_off
; emit caller_load }
where
call updfr_off =
if safe then
- mkCmmCall fun_expr res' args' updfr_off
+ emit =<< mkCmmCall fun_expr res' args' updfr_off
else
- mkUnsafeCall (ForeignTarget fun_expr
+ emit $ mkUnsafeCall (ForeignTarget fun_expr
(ForeignConvention CCallConv arg_hints res_hints)) res' args'
(args', arg_hints) = unzip args
(res', res_hints) = unzip res
@@ -441,7 +441,7 @@ assignTemp :: CmmExpr -> FCode LocalReg
assignTemp (CmmReg (CmmLocal reg)) = return reg
assignTemp e = do { uniq <- newUnique
; let reg = LocalReg uniq (cmmExprType e)
- ; emit (mkAssign (CmmLocal reg) e)
+ ; emitAssign (CmmLocal reg) e
; return reg }
newTemp :: CmmType -> FCode LocalReg
@@ -471,10 +471,10 @@ newUnboxedTupleRegs res_ty
-------------------------------------------------------------------------
--- mkMultiAssign
+-- emitMultiAssign
-------------------------------------------------------------------------
-mkMultiAssign :: [LocalReg] -> [CmmExpr] -> CmmAGraph
+emitMultiAssign :: [LocalReg] -> [CmmExpr] -> FCode ()
-- Emit code to perform the assignments in the
-- input simultaneously, using temporary variables when necessary.
@@ -489,14 +489,13 @@ type Stmt = (LocalReg, CmmExpr) -- r := e
-- s1 assigns to something s2 uses
-- that is, if s1 should *follow* s2 in the final order
-mkMultiAssign [] [] = mkNop
-mkMultiAssign [reg] [rhs] = mkAssign (CmmLocal reg) rhs
-mkMultiAssign regs rhss = ASSERT( equalLength regs rhss )
- unscramble ([1..] `zip` (regs `zip` rhss))
+emitMultiAssign [] [] = return ()
+emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs
+emitMultiAssign regs rhss = ASSERT( equalLength regs rhss )
+ unscramble ([1..] `zip` (regs `zip` rhss))
-unscramble :: [Vrtx] -> CmmAGraph
-unscramble vertices
- = catAGraphs (map do_component components)
+unscramble :: [Vrtx] -> FCode ()
+unscramble vertices = mapM_ do_component components
where
edges :: [ (Vrtx, Key, [Key]) ]
edges = [ (vertex, key1, edges_from stmt1)
@@ -511,19 +510,19 @@ unscramble vertices
-- do_components deal with one strongly-connected component
-- Not cyclic, or singleton? Just do it
- do_component :: SCC Vrtx -> CmmAGraph
- do_component (AcyclicSCC (_,stmt)) = mk_graph stmt
+ do_component :: SCC Vrtx -> FCode ()
+ do_component (AcyclicSCC (_,stmt)) = mk_graph stmt
do_component (CyclicSCC []) = panic "do_component"
do_component (CyclicSCC [(_,stmt)]) = mk_graph stmt
-- Cyclic? Then go via temporaries. Pick one to
-- break the loop and try again with the rest.
- do_component (CyclicSCC ((_,first_stmt) : rest))
- = withUnique $ \u ->
+ do_component (CyclicSCC ((_,first_stmt) : rest)) = do
+ u <- newUnique
let (to_tmp, from_tmp) = split u first_stmt
- in mk_graph to_tmp
- <*> unscramble rest
- <*> mk_graph from_tmp
+ mk_graph to_tmp
+ unscramble rest
+ mk_graph from_tmp
split :: Unique -> Stmt -> (Stmt, Stmt)
split uniq (reg, rhs)
@@ -532,8 +531,8 @@ unscramble vertices
rep = cmmExprType rhs
tmp = LocalReg uniq rep
- mk_graph :: Stmt -> CmmAGraph
- mk_graph (reg, rhs) = mkAssign (CmmLocal reg) rhs
+ mk_graph :: Stmt -> FCode ()
+ mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs
mustFollow :: Stmt -> Stmt -> Bool
(reg, _) `mustFollow` (_, rhs) = CmmLocal reg `regUsedIn` rhs
@@ -551,7 +550,7 @@ emitSwitch :: CmmExpr -- Tag to switch on
-> FCode ()
emitSwitch tag_expr branches mb_deflt lo_tag hi_tag
= do { dflags <- getDynFlags
- ; emit (mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag) }
+ ; mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag }
where
via_C dflags | HscC <- hscTarget dflags = True
| otherwise = False
@@ -563,38 +562,40 @@ mkCmmSwitch :: Bool -- True <=> never generate a conditional tree
-> Maybe CmmAGraph -- Default branch (if any)
-> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour
-- outside this range is undefined
- -> CmmAGraph
+ -> FCode ()
-- First, two rather common cases in which there is no work to do
-mkCmmSwitch _ _ [] (Just code) _ _ = code
-mkCmmSwitch _ _ [(_,code)] Nothing _ _ = code
+mkCmmSwitch _ _ [] (Just code) _ _ = emit code
+mkCmmSwitch _ _ [(_,code)] Nothing _ _ = emit code
-- Right, off we go
-mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag
- = withFreshLabel "switch join" $ \ join_lbl ->
- label_default join_lbl mb_deflt $ \ mb_deflt ->
- label_branches join_lbl branches $ \ branches ->
- assignTemp' tag_expr $ \tag_expr' ->
+mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag = do
+ join_lbl <- newLabelC
+ mb_deflt_lbl <- label_default join_lbl mb_deflt
+ branches_lbls <- label_branches join_lbl branches
+ tag_expr' <- assignTemp' tag_expr
- mk_switch tag_expr' (sortBy (comparing fst) branches) mb_deflt
- lo_tag hi_tag via_C
- -- Sort the branches before calling mk_switch
- <*> mkLabel join_lbl
+ emit =<< mk_switch tag_expr' (sortBy (comparing fst) branches) mb_deflt_lbl
+ lo_tag hi_tag via_C
+
+ -- Sort the branches before calling mk_switch
+
+ emitLabel join_lbl
mk_switch :: CmmExpr -> [(ConTagZ, BlockId)]
-> Maybe BlockId
-> ConTagZ -> ConTagZ -> Bool
- -> CmmAGraph
+ -> FCode CmmAGraph
-- SINGLETON TAG RANGE: no case analysis to do
mk_switch _tag_expr [(tag, lbl)] _ lo_tag hi_tag _via_C
| lo_tag == hi_tag
= ASSERT( tag == lo_tag )
- mkBranch lbl
+ return (mkBranch lbl)
-- SINGLETON BRANCH, NO DEFAULT: no case analysis to do
mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _
- = mkBranch lbl
+ = return (mkBranch lbl)
-- The simplifier might have eliminated a case
-- so we may have e.g. case xs of
-- [] -> e
@@ -603,7 +604,7 @@ mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _
-- SINGLETON BRANCH: one equality check to do
mk_switch tag_expr [(tag,lbl)] (Just deflt) _ _ _
- = mkCbranch cond deflt lbl
+ = return (mkCbranch cond deflt lbl)
where
cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag))
-- We have lo_tag < hi_tag, but there's only one branch,
@@ -636,30 +637,34 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C
arms :: [Maybe BlockId]
arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]]
in
- mkSwitch (cmmOffset tag_expr (- real_lo_tag)) arms
+ return (mkSwitch (cmmOffset tag_expr (- real_lo_tag)) arms)
-- if we can knock off a bunch of default cases with one if, then do so
| Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches
- = mkCmmIfThenElse
+ = do stmts <- mk_switch tag_expr branches mb_deflt
+ lowest_branch hi_tag via_C
+ mkCmmIfThenElse
(cmmULtWord tag_expr (CmmLit (mkIntCLit lowest_branch)))
(mkBranch deflt)
- (mk_switch tag_expr branches mb_deflt
- lowest_branch hi_tag via_C)
+ stmts
| Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches
- = mkCmmIfThenElse
+ = do stmts <- mk_switch tag_expr branches mb_deflt
+ lo_tag highest_branch via_C
+ mkCmmIfThenElse
(cmmUGtWord tag_expr (CmmLit (mkIntCLit highest_branch)))
(mkBranch deflt)
- (mk_switch tag_expr branches mb_deflt
- lo_tag highest_branch via_C)
+ stmts
| otherwise -- Use an if-tree
- = mkCmmIfThenElse
+ = do lo_stmts <- mk_switch tag_expr lo_branches mb_deflt
+ lo_tag (mid_tag-1) via_C
+ hi_stmts <- mk_switch tag_expr hi_branches mb_deflt
+ mid_tag hi_tag via_C
+ mkCmmIfThenElse
(cmmUGeWord tag_expr (CmmLit (mkIntCLit mid_tag)))
- (mk_switch tag_expr hi_branches mb_deflt
- mid_tag hi_tag via_C)
- (mk_switch tag_expr lo_branches mb_deflt
- lo_tag (mid_tag-1) via_C)
+ hi_stmts
+ lo_stmts
-- we test (e >= mid_tag) rather than (e < mid_tag), because
-- the former works better when e is a comparison, and there
-- are two tags 0 & 1 (mid_tag == 1). In this case, the code
@@ -714,30 +719,31 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C
is_lo (t,_) = t < mid_tag
--------------
-mkCmmLitSwitch :: CmmExpr -- Tag to switch on
+emitCmmLitSwitch :: CmmExpr -- Tag to switch on
-> [(Literal, CmmAGraph)] -- Tagged branches
-> CmmAGraph -- Default branch (always)
- -> CmmAGraph -- Emit the code
+ -> FCode () -- Emit the code
-- Used for general literals, whose size might not be a word,
-- where there is always a default case, and where we don't know
-- the range of values for certain. For simplicity we always generate a tree.
--
-- ToDo: for integers we could do better here, perhaps by generalising
-- mk_switch and using that. --SDM 15/09/2004
-mkCmmLitSwitch _scrut [] deflt = deflt
-mkCmmLitSwitch scrut branches deflt
- = assignTemp' scrut $ \ scrut' ->
- withFreshLabel "switch join" $ \ join_lbl ->
- label_code join_lbl deflt $ \ deflt ->
- label_branches join_lbl branches $ \ branches ->
- mk_lit_switch scrut' deflt (sortBy (comparing fst) branches)
- <*> mkLabel join_lbl
+emitCmmLitSwitch _scrut [] deflt = emit deflt
+emitCmmLitSwitch scrut branches deflt = do
+ scrut' <- assignTemp' scrut
+ join_lbl <- newLabelC
+ deflt_lbl <- label_code join_lbl deflt
+ branches_lbls <- label_branches join_lbl branches
+ emit =<< mk_lit_switch scrut' deflt_lbl
+ (sortBy (comparing fst) branches_lbls)
+ emitLabel join_lbl
mk_lit_switch :: CmmExpr -> BlockId
-> [(Literal,BlockId)]
- -> CmmAGraph
+ -> FCode CmmAGraph
mk_lit_switch scrut deflt [(lit,blk)]
- = mkCbranch (CmmMachOp ne [scrut, CmmLit cmm_lit]) deflt blk
+ = return (mkCbranch (CmmMachOp ne [scrut, CmmLit cmm_lit]) deflt blk)
where
cmm_lit = mkSimpleLit lit
cmm_ty = cmmLitType cmm_lit
@@ -745,9 +751,9 @@ mk_lit_switch scrut deflt [(lit,blk)]
ne = if isFloatType cmm_ty then MO_F_Ne rep else MO_Ne rep
mk_lit_switch scrut deflt_blk_id branches
- = mkCmmIfThenElse cond
- (mk_lit_switch scrut deflt_blk_id lo_branches)
- (mk_lit_switch scrut deflt_blk_id hi_branches)
+ = do lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches
+ hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches
+ mkCmmIfThenElse cond lo_blk hi_blk
where
n_branches = length branches
(mid_lit,_) = branches !! (n_branches `div` 2)
@@ -761,49 +767,42 @@ mk_lit_switch scrut deflt_blk_id branches
--------------
-label_default :: BlockId -> Maybe CmmAGraph
- -> (Maybe BlockId -> CmmAGraph)
- -> CmmAGraph
-label_default _ Nothing thing_inside
- = thing_inside Nothing
-label_default join_lbl (Just code) thing_inside
- = label_code join_lbl code $ \ lbl ->
- thing_inside (Just lbl)
+label_default :: BlockId -> Maybe CmmAGraph -> FCode (Maybe BlockId)
+label_default _ Nothing
+ = return Nothing
+label_default join_lbl (Just code)
+ = do lbl <- label_code join_lbl code
+ return (Just lbl)
--------------
-label_branches :: BlockId -> [(a,CmmAGraph)]
- -> ([(a,BlockId)] -> CmmAGraph)
- -> CmmAGraph
-label_branches _join_lbl [] thing_inside
- = thing_inside []
-label_branches join_lbl ((tag,code):branches) thing_inside
- = label_code join_lbl code $ \ lbl ->
- label_branches join_lbl branches $ \ branches' ->
- thing_inside ((tag,lbl):branches')
+label_branches :: BlockId -> [(a,CmmAGraph)] -> FCode [(a,BlockId)]
+label_branches _join_lbl []
+ = return []
+label_branches join_lbl ((tag,code):branches)
+ = do lbl <- label_code join_lbl code
+ branches' <- label_branches join_lbl branches
+ return ((tag,lbl):branches')
--------------
-label_code :: BlockId -> CmmAGraph -> (BlockId -> CmmAGraph) -> CmmAGraph
--- (label_code J code fun)
+label_code :: BlockId -> CmmAGraph -> FCode BlockId
+-- label_code J code
-- generates
--- [L: code; goto J] fun L
-label_code join_lbl code thing_inside
- = withFreshLabel "switch" $ \lbl ->
- outOfLine (mkLabel lbl <*> code <*> mkBranch join_lbl)
- <*> thing_inside lbl
-
+-- [L: code; goto J]
+-- and returns L
+label_code join_lbl code = do
+ lbl <- newLabelC
+ emitOutOfLine lbl (code <*> mkBranch join_lbl)
+ return lbl
--------------
-assignTemp' :: CmmExpr -> (CmmExpr -> CmmAGraph) -> CmmAGraph
-assignTemp' e thing_inside
- | isTrivialCmmExpr e = thing_inside e
- | otherwise = withTemp (cmmExprType e) $ \ lreg ->
- let reg = CmmLocal lreg in
- mkAssign reg e <*> thing_inside (CmmReg reg)
-
-withTemp :: CmmType -> (LocalReg -> CmmAGraph) -> CmmAGraph
-withTemp rep thing_inside
- = withUnique $ \uniq -> thing_inside (LocalReg uniq rep)
-
+assignTemp' :: CmmExpr -> FCode CmmExpr
+assignTemp' e
+ | isTrivialCmmExpr e = return e
+ | otherwise = do
+ lreg <- newTemp (cmmExprType e)
+ let reg = CmmLocal lreg
+ emitAssign reg e
+ return (CmmReg reg)
-------------------------------------------------------------------------
--
@@ -811,36 +810,13 @@ withTemp rep thing_inside
--
-------------------------------------------------------------------------
--- There is just one SRT for each top level binding; all the nested
--- bindings use sub-sections of this SRT. The label is passed down to
--- the nested bindings via the monad.
-
-getSRTInfo :: SRT -> FCode C_SRT
-getSRTInfo (SRTEntries {}) = panic "getSRTInfo"
-
-getSRTInfo (SRT off len bmp)
- | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape]
- = do { id <- newUnique
- -- ; top_srt <- getSRTLabel
- ; let srt_desc_lbl = mkLargeSRTLabel id
- -- JD: We're not constructing and emitting SRTs in the back end,
- -- which renders this code wrong (it now names a now-non-existent label).
- -- ; emitRODataLits srt_desc_lbl
- -- ( cmmLabelOffW top_srt off
- -- : mkWordCLit (fromIntegral len)
- -- : map mkWordCLit bmp)
- ; return (C_SRT srt_desc_lbl 0 srt_escape) }
-
- | otherwise
- = do { top_srt <- getSRTLabel
- ; return (C_SRT top_srt off (fromIntegral (head bmp))) }
- -- The fromIntegral converts to StgHalfWord
-
-getSRTInfo NoSRT
- = -- TODO: Should we panic in this case?
- -- Someone obviously thinks there should be an SRT
- return NoC_SRT
-
+-- | Returns 'True' if there is a non-empty SRT, or 'False' otherwise
+-- NB. the SRT attached to an StgBind is still used in the new codegen
+-- to decide whether we need a static link field on a static closure
+-- or not.
+getSRTInfo :: SRT -> FCode Bool
+getSRTInfo (SRTEntries vs) = return (not (isEmptyVarSet vs))
+getSRTInfo _ = return False
srt_escape :: StgHalfWord
srt_escape = -1