diff options
Diffstat (limited to 'compiler/codeGen')
-rw-r--r-- | compiler/codeGen/CgInfoTbls.hs | 30 | ||||
-rw-r--r-- | compiler/codeGen/CgMonad.lhs | 19 | ||||
-rw-r--r-- | compiler/codeGen/CodeGen.lhs | 57 | ||||
-rw-r--r-- | compiler/codeGen/StgCmm.hs | 135 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmBind.hs | 57 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmClosure.hs | 29 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmCon.hs | 1 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmEnv.hs | 3 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmExpr.hs | 269 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmForeign.hs | 134 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmHeap.hs | 97 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmLayout.hs | 238 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmMonad.hs | 109 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 59 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmProf.hs | 8 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmTicky.hs | 4 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmUtils.hs | 246 |
17 files changed, 944 insertions, 551 deletions
diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 6c77255a62..7cdb1b6f7e 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -36,7 +36,7 @@ import CgBindery import CgCallConv import CgUtils import CgMonad -import CmmBuildInfoTables +import CmmUtils import OldCmm import CLabel @@ -66,10 +66,9 @@ emitClosureCodeAndInfoTable cl_info args body -- Convert from 'ClosureInfo' to 'CmmInfo'. -- Not used for return points. (The 'smRepClosureTypeInt' call would panic.) -mkCmmInfo :: ClosureInfo -> FCode CmmInfo +mkCmmInfo :: ClosureInfo -> FCode CmmInfoTable mkCmmInfo cl_info - = return (CmmInfo gc_target Nothing $ - CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info, + = return (CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info, cit_rep = closureSMRep cl_info, cit_prof = prof, cit_srt = closureSRT cl_info }) @@ -79,14 +78,6 @@ mkCmmInfo cl_info ty_descr_w8 = stringToWord8s (closureTypeDescr cl_info) val_descr_w8 = stringToWord8s (closureValDescr cl_info) - -- The gc_target is to inform the CPS pass when it inserts a stack check. - -- Since that pass isn't used yet we'll punt for now. - -- When the CPS pass is fully integrated, this should - -- be replaced by the label that any heap check jumped to, - -- so that branch can be shared by both the heap (from codeGen) - -- and stack checks (from the CPS pass). - gc_target = panic "TODO: gc_target" - ------------------------------------------------------------------------- -- -- Generating the info table and code for a return point @@ -105,8 +96,7 @@ emitReturnTarget name stmts ; blks <- cgStmtsToBlocks stmts ; frame <- mkStackLayout ; let smrep = mkStackRep (mkLiveness frame) - info = CmmInfo gc_target Nothing info_tbl - info_tbl = CmmInfoTable { cit_lbl = info_lbl + info = CmmInfoTable { cit_lbl = info_lbl , cit_prof = NoProfilingInfo , cit_rep = smrep , cit_srt = srt_info } @@ -118,14 +108,6 @@ emitReturnTarget name stmts info_lbl = mkReturnInfoLabel uniq entry_lbl = mkReturnPtLabel uniq - -- The gc_target is to inform the CPS pass when it inserts a stack check. - -- Since that pass isn't used yet we'll punt for now. - -- When the CPS pass is fully integrated, this should - -- be replaced by the label that any heap check jumped to, - -- so that branch can be shared by both the heap (from codeGen) - -- and stack checks (from the CPS pass). - gc_target = panic "TODO: gc_target" - -- Build stack layout information from the state of the 'FCode' monad. -- Should go away once 'codeGen' starts using the CPS conversion -- pass to handle the stack. Until then, this is really just @@ -375,8 +357,8 @@ funInfoTable info_ptr emitInfoTableAndCode :: CLabel -- Label of entry or ret - -> CmmInfo -- ...the info table - -> [CmmFormal] -- ...args + -> CmmInfoTable -- ...the info table + -> [CmmFormal] -- ...args -> [CmmBasicBlock] -- ...and body -> Code diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs index b96898f591..71da9e9ae0 100644 --- a/compiler/codeGen/CgMonad.lhs +++ b/compiler/codeGen/CgMonad.lhs @@ -13,8 +13,8 @@ stuff fits into the Big Picture. module CgMonad ( Code, FCode, - initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, - returnFC, fixC, fixC_, checkedAbsC, + initC, runC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, + returnFC, fixC, fixC_, checkedAbsC, stmtC, stmtsC, labelC, emitStmts, nopC, whenC, newLabelC, newUnique, newUniqSupply, @@ -386,11 +386,12 @@ instance Monad FCode where {-# INLINE thenFC #-} {-# INLINE returnFC #-} -initC :: DynFlags -> Module -> FCode a -> IO a -initC dflags mod (FCode code) = do - uniqs <- mkSplitUniqSupply 'c' - case code (initCgInfoDown dflags mod) (initCgState uniqs) of - (res, _) -> return res +initC :: IO CgState +initC = do { uniqs <- mkSplitUniqSupply 'c' + ; return (initCgState uniqs) } + +runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState) +runC dflags mod st (FCode code) = code (initCgInfoDown dflags mod) st returnFC :: a -> FCode a returnFC val = FCode $ \_ state -> (val, state) @@ -708,7 +709,7 @@ emitDecl decl = do state <- getState setState $ state { cgs_tops = cgs_tops state `snocOL` decl } -emitProc :: CmmInfo -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code +emitProc :: CmmInfoTable -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code emitProc info lbl [] blocks = do let proc_block = CmmProc info lbl (ListGraph blocks) state <- getState @@ -720,7 +721,7 @@ emitSimpleProc :: CLabel -> Code -> Code emitSimpleProc lbl code = do stmts <- getCgStmts code blks <- cgStmtsToBlocks stmts - emitProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] blks + emitProc CmmNonInfoTable lbl [] blks -- Get all the CmmTops (there should be no stmts) -- Return a single Cmm which may be split from other Cmms by diff --git a/compiler/codeGen/CodeGen.lhs b/compiler/codeGen/CodeGen.lhs index ce12d43bbf..9c936d3281 100644 --- a/compiler/codeGen/CodeGen.lhs +++ b/compiler/codeGen/CodeGen.lhs @@ -45,7 +45,13 @@ import TyCon import Module import ErrUtils import Panic -import Util +import Outputable + +import OrdList +import Stream (Stream, liftIO) +import qualified Stream + +import Data.IORef codeGen :: DynFlags -> Module -- Module we are compiling @@ -53,32 +59,37 @@ codeGen :: DynFlags -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering. -> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs -> HpcInfo -- Profiling info - -> IO [CmmGroup] + -> Stream IO CmmGroup () -- N.B. returning '[Cmm]' and not 'Cmm' here makes it -- possible for object splitting to split up the -- pieces later. -codeGen dflags this_mod data_tycons cost_centre_info stg_binds hpc_info = do - showPass dflags "CodeGen" - code_stuff <- - initC dflags this_mod $ do - cmm_binds <- mapM (getCmm . cgTopBinding dflags) stg_binds - cmm_tycons <- mapM cgTyCon data_tycons - cmm_init <- getCmm (mkModuleInit dflags cost_centre_info this_mod hpc_info) - return (cmm_init : cmm_binds ++ cmm_tycons) - -- Put datatype_stuff after code_stuff, because the - -- datatype closure table (for enumeration types) to - -- (say) PrelBase_True_closure, which is defined in - -- code_stuff - - -- Note [codegen-split-init] the cmm_init block must - -- come FIRST. This is because when -split-objs is on - -- we need to combine this block with its - -- initialisation routines; see Note - -- [pipeline-split-init]. - - dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" (pprCmms code_stuff) - return code_stuff +codeGen dflags this_mod data_tycons cost_centre_info stg_binds hpc_info + + = do { liftIO $ showPass dflags "CodeGen" + + ; cgref <- liftIO $ newIORef =<< initC + ; let cg :: FCode CmmGroup -> Stream IO CmmGroup () + cg fcode = do + cmm <- liftIO $ do + st <- readIORef cgref + let (a,st') = runC dflags this_mod st fcode + + dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" $ ppr a + + -- NB. stub-out cgs_tops and cgs_stmts. This fixes + -- a big space leak. DO NOT REMOVE! + writeIORef cgref $! st'{ cgs_tops = nilOL, + cgs_stmts = nilOL } + return a + Stream.yield cmm + + ; cg (getCmm $ mkModuleInit dflags cost_centre_info this_mod hpc_info) + + ; mapM_ (cg . getCmm . cgTopBinding dflags) stg_binds + + ; mapM_ (cg . cgTyCon) data_tycons + } mkModuleInit :: DynFlags diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs index 17a7062559..696af8107e 100644 --- a/compiler/codeGen/StgCmm.hs +++ b/compiler/codeGen/StgCmm.hs @@ -46,6 +46,13 @@ import TyCon import Module import ErrUtils import Outputable +import Stream + +import OrdList +import MkGraph + +import Data.IORef +import Control.Monad (when) import Util codeGen :: DynFlags @@ -54,39 +61,51 @@ codeGen :: DynFlags -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering. -> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs -> HpcInfo - -> IO [CmmGroup] -- Output + -> Stream IO CmmGroup () -- Output as a stream, so codegen can + -- be interleaved with output codeGen dflags this_mod data_tycons cost_centre_info stg_binds hpc_info - = do { showPass dflags "New CodeGen" - --- Why? --- ; mapM_ (\x -> seq x (return ())) data_tycons - - ; code_stuff <- initC dflags this_mod $ do - { cmm_binds <- mapM (getCmm . cgTopBinding dflags) stg_binds - ; cmm_tycons <- mapM cgTyCon data_tycons - ; cmm_init <- getCmm (mkModuleInit cost_centre_info - this_mod hpc_info) - ; return (cmm_init : cmm_binds ++ cmm_tycons) - } + = do { liftIO $ showPass dflags "New CodeGen" + + -- cg: run the code generator, and yield the resulting CmmGroup + -- Using an IORef to store the state is a bit crude, but otherwise + -- we would need to add a state monad layer. + ; cgref <- liftIO $ newIORef =<< initC + ; let cg :: FCode () -> Stream IO CmmGroup () + cg fcode = do + cmm <- liftIO $ do + st <- readIORef cgref + let (a,st') = runC dflags this_mod st (getCmm fcode) + + -- NB. stub-out cgs_tops and cgs_stmts. This fixes + -- a big space leak. DO NOT REMOVE! + writeIORef cgref $! st'{ cgs_tops = nilOL, + cgs_stmts = mkNop } + return a + yield cmm + + -- Note [codegen-split-init] the cmm_init block must come + -- FIRST. This is because when -split-objs is on we need to + -- combine this block with its initialisation routines; see + -- Note [pipeline-split-init]. + ; cg (mkModuleInit cost_centre_info this_mod hpc_info) + + ; mapM_ (cg . cgTopBinding dflags) stg_binds + -- Put datatype_stuff after code_stuff, because the -- datatype closure table (for enumeration types) to -- (say) PrelBase_True_closure, which is defined in -- code_stuff - - -- N.B. returning '[Cmm]' and not 'Cmm' here makes it - -- possible for object splitting to split up the - -- pieces later. - - -- Note [codegen-split-init] the cmm_init block must - -- come FIRST. This is because when -split-objs is on - -- we need to combine this block with its - -- initialisation routines; see Note - -- [pipeline-split-init]. - - ; return code_stuff } - + ; let do_tycon tycon = do + -- Generate a table of static closures for an + -- enumeration type Note that the closure pointers are + -- tagged. + when (isEnumerationTyCon tycon) $ cg (cgEnumerationTyCon tycon) + mapM_ (cg . cgDataCon) (tyConDataCons tycon) + + ; mapM_ do_tycon data_tycons + } --------------------------------------------------------------- -- Top-level bindings @@ -108,7 +127,7 @@ cgTopBinding dflags (StgNonRec id rhs, _srts) ; info <- cgTopRhs id' rhs ; addBindC (cg_id info) info -- Add the *un-externalised* Id to the envt, -- so we find it when we look up occurrences - } + } cgTopBinding dflags (StgRec pairs, _srts) = do { let (bndrs, rhss) = unzip pairs @@ -117,7 +136,7 @@ cgTopBinding dflags (StgRec pairs, _srts) ; fixC_(\ new_binds -> do { addBindsC new_binds ; mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' }) - ; return () } + ; return () } -- Urgh! I tried moving the forkStatics call from the rhss of cgTopRhs -- to enclose the listFCs in cgTopBinding, but that tickled the @@ -187,65 +206,19 @@ mkModuleInit cost_centre_info this_mod hpc_info ; emitDecl (CmmData Data (Statics (mkPlainModuleInitLabel this_mod) [])) } + --------------------------------------------------------------- -- Generating static stuff for algebraic data types --------------------------------------------------------------- -{- [These comments are rather out of date] - -Macro Kind of constructor -CONST_INFO_TABLE@ Zero arity (no info -- compiler uses static closure) -CHARLIKE_INFO_TABLE Charlike (no info -- compiler indexes fixed array) -INTLIKE_INFO_TABLE Intlike; the one macro generates both info tbls -SPEC_INFO_TABLE SPECish, and bigger than or equal to MIN_UPD_SIZE -GEN_INFO_TABLE GENish (hence bigger than or equal to MIN_UPD_SIZE@) -Possible info tables for constructor con: - -* _con_info: - Used for dynamically let(rec)-bound occurrences of - the constructor, and for updates. For constructors - which are int-like, char-like or nullary, when GC occurs, - the closure tries to get rid of itself. - -* _static_info: - Static occurrences of the constructor macro: STATIC_INFO_TABLE. - -For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure; -it's place is taken by the top level defn of the constructor. - -For charlike and intlike closures there is a fixed array of static -closures predeclared. --} - -cgTyCon :: TyCon -> FCode CmmGroup -- All constructors merged together -cgTyCon tycon - = do { constrs <- mapM (getCmm . cgDataCon) (tyConDataCons tycon) - - -- Generate a table of static closures for an enumeration type - -- Put the table after the data constructor decls, because the - -- datatype closure table (for enumeration types) - -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff - -- Note that the closure pointers are tagged. - - -- N.B. comment says to put table after constructor decls, but - -- code puts it before --- NR 16 Aug 2007 - ; extra <- cgEnumerationTyCon tycon - - ; return (concat (extra ++ constrs)) - } - -cgEnumerationTyCon :: TyCon -> FCode [CmmGroup] +cgEnumerationTyCon :: TyCon -> FCode () cgEnumerationTyCon tycon - | isEnumerationTyCon tycon - = do { tbl <- getCmm $ - emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs) - [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs) - (tagForCon con) - | con <- tyConDataCons tycon] - ; return [tbl] } - | otherwise - = return [] + = emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs) + [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs) + (tagForCon con) + | con <- tyConDataCons tycon] + cgDataCon :: DataCon -> FCode () -- Generate the entry code, info tables, and (for niladic constructor) diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 3b166e3b6a..f98283f737 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -76,17 +76,17 @@ cgTopRhsClosure :: Id cgTopRhsClosure id ccs _ upd_flag srt args body = do { -- LAY OUT THE OBJECT let name = idName id - ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args - ; srt_info <- getSRTInfo srt + ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args + ; has_srt <- getSRTInfo srt ; mod_name <- getModuleName ; dflags <- getDynFlags ; let descr = closureDescription dflags mod_name name - closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr - closure_label = mkLocalClosureLabel name (idCafInfo id) + closure_info = mkClosureInfo True id lf_info 0 0 descr + closure_label = mkLocalClosureLabel name (idCafInfo id) cg_id_info = litIdInfo id lf_info (CmmLabel closure_label) caffy = idCafInfo id info_tbl = mkCmmInfo closure_info -- XXX short-cut - closure_rep = mkStaticClosureFields info_tbl ccs caffy [] + closure_rep = mkStaticClosureFields info_tbl ccs caffy has_srt [] -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY) ; emitDataLits closure_label closure_rep @@ -110,7 +110,7 @@ cgBind (StgNonRec name rhs) ; emit (init <*> body) } cgBind (StgRec pairs) - = do { ((new_binds, inits), body) <- getCodeR $ fixC (\ new_binds_inits -> + = do { ((new_binds, inits), body) <- getCodeR $ fixC (\ new_binds_inits -> do { addBindsC $ fst new_binds_inits -- avoid premature deconstruction ; liftM unzip $ listFCs [ cgRhs b e | (b,e) <- pairs ] }) ; addBindsC new_binds @@ -162,8 +162,8 @@ cgRhs :: Id -> StgRhs -> FCode (CgIdInfo, CmmAGraph) cgRhs name (StgRhsCon cc con args) = buildDynCon name cc con args -cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) - = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag srt args body +cgRhs name (StgRhsClosure cc bi fvs upd_flag _srt args body) + = mkRhsClosure name cc bi (nonVoidIds fvs) upd_flag args body ------------------------------------------------------------------------ -- Non-constructor right hand sides @@ -171,7 +171,7 @@ cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) mkRhsClosure :: Id -> CostCentreStack -> StgBinderInfo -> [NonVoid Id] -- Free vars - -> UpdateFlag -> SRT + -> UpdateFlag -> [Id] -- Args -> StgExpr -> FCode (CgIdInfo, CmmAGraph) @@ -215,8 +215,7 @@ for semi-obvious reasons. mkRhsClosure bndr cc bi [NonVoid the_fv] -- Just one free var upd_flag -- Updatable thunk - _srt - [] -- A thunk + [] -- A thunk body@(StgCase (StgApp scrutinee [{-no args-}]) _ _ _ _ -- ignore uniq, etc. (AlgAlt _) @@ -247,8 +246,7 @@ mkRhsClosure bndr cc bi mkRhsClosure bndr cc bi fvs upd_flag - _srt - [] -- No args; a thunk + [] -- No args; a thunk body@(StgApp fun_id args) | args `lengthIs` (arity-1) @@ -269,7 +267,7 @@ mkRhsClosure bndr cc bi arity = length fvs ---------- Default case ------------------ -mkRhsClosure bndr cc _ fvs upd_flag srt args body +mkRhsClosure bndr cc _ fvs upd_flag args body = do { -- LAY OUT THE OBJECT -- If the binder is itself a free variable, then don't store -- it in the closure. Instead, just bind it to Node on entry. @@ -288,17 +286,16 @@ mkRhsClosure bndr cc _ fvs upd_flag srt args body -- MAKE CLOSURE INFO FOR THIS CLOSURE ; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args ; mod_name <- getModuleName - ; c_srt <- getSRTInfo srt - ; dflags <- getDynFlags - ; let name = idName bndr - descr = closureDescription dflags mod_name name - fv_details :: [(NonVoid Id, VirtualHpOffset)] + ; dflags <- getDynFlags + ; let name = idName bndr + descr = closureDescription dflags mod_name name + fv_details :: [(NonVoid Id, VirtualHpOffset)] (tot_wds, ptr_wds, fv_details) = mkVirtHeapOffsets (isLFThunk lf_info) (addIdReps (map stripNV reduced_fvs)) closure_info = mkClosureInfo False -- Not static bndr lf_info tot_wds ptr_wds - c_srt descr + descr -- BUILD ITS INFO TABLE AND CODE ; forkClosureBody $ @@ -345,8 +342,7 @@ cgStdThunk bndr _cc _bndr_info _body lf_info payload descr = closureDescription dflags mod_name (idName bndr) closure_info = mkClosureInfo False -- Not static bndr lf_info tot_wds ptr_wds - NoC_SRT -- No SRT for a std-form closure - descr + descr -- ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body ; let use_cc = curCCS; blame_cc = curCCS @@ -546,10 +542,10 @@ emitBlackHoleCode is_single_entry = do whenC eager_blackholing $ do tickyBlackHole (not is_single_entry) - emit (mkStore (cmmOffsetW (CmmReg nodeReg) fixedHdrSize) - (CmmReg (CmmGlobal CurrentTSO))) + emitStore (cmmOffsetW (CmmReg nodeReg) fixedHdrSize) + (CmmReg (CmmGlobal CurrentTSO)) emitPrimCall [] MO_WriteBarrier [] - emit (mkStore (CmmReg nodeReg) (CmmReg (CmmGlobal EagerBlackholeInfo))) + emitStore (CmmReg nodeReg) (CmmReg (CmmGlobal EagerBlackholeInfo)) setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode () -- Nota Bene: this function does not change Node (even if it's a CAF), @@ -598,7 +594,7 @@ pushUpdateFrame es body offset <- foldM push updfr es withUpdFrameOff offset body where push off e = - do emit (mkStore (CmmStackSlot (CallArea Old) base) e) + do emitStore (CmmStackSlot Old base) e return base where base = off + widthInBytes (cmmExprWidth e) @@ -666,13 +662,14 @@ link_caf _is_upd = do -- node is live, so save it. -- see Note [atomic CAF entry] in rts/sm/Storage.c - ; emit $ mkCmmIfThen - (CmmMachOp mo_wordEq [ CmmReg (CmmLocal ret), CmmLit zeroCLit]) $ + ; updfr <- getUpdFrameOff + ; emit =<< mkCmmIfThen + (CmmMachOp mo_wordEq [ CmmReg (CmmLocal ret), CmmLit zeroCLit]) -- re-enter R1. Doing this directly is slightly dodgy; we're -- assuming lots of things, like the stack pointer hasn't -- moved since we entered the CAF. - let target = entryCode (closureInfoPtr (CmmReg nodeReg)) in - mkJump target [] 0 + (let target = entryCode (closureInfoPtr (CmmReg nodeReg)) in + mkJump target [] updfr) ; return hp_rel } diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs index 483a67c1fa..8023abddec 100644 --- a/compiler/codeGen/StgCmmClosure.hs +++ b/compiler/codeGen/StgCmmClosure.hs @@ -657,7 +657,6 @@ data ClosureInfo -- the rest is just an unpacked CmmInfoTable. closureInfoLabel :: !CLabel, closureSMRep :: !SMRep, -- representation used by storage mgr - closureSRT :: !C_SRT, -- What SRT applies to this closure closureProf :: !ProfilingInfo } @@ -667,7 +666,7 @@ mkCmmInfo ClosureInfo {..} = CmmInfoTable { cit_lbl = closureInfoLabel , cit_rep = closureSMRep , cit_prof = closureProf - , cit_srt = closureSRT } + , cit_srt = NoC_SRT } -------------------------------------- @@ -678,16 +677,14 @@ mkClosureInfo :: Bool -- Is static -> Id -> LambdaFormInfo -> Int -> Int -- Total and pointer words - -> C_SRT - -> String -- String descriptor + -> String -- String descriptor -> ClosureInfo -mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info val_descr +mkClosureInfo is_static id lf_info tot_wds ptr_wds val_descr = ClosureInfo { closureName = name, closureLFInfo = lf_info, - closureInfoLabel = info_lbl, - closureSMRep = sm_rep, -- These four fields are a - closureSRT = srt_info, -- CmmInfoTable - closureProf = prof } -- --- + closureInfoLabel = info_lbl, -- These three fields are + closureSMRep = sm_rep, -- (almost) an info table + closureProf = prof } -- (we don't have an SRT yet) where name = idName id sm_rep = mkHeapRep is_static ptr_wds nonptr_wds (lfClosureType lf_info) @@ -920,15 +917,21 @@ cafBlackHoleInfoTable , cit_prof = NoProfilingInfo , cit_srt = NoC_SRT } -staticClosureNeedsLink :: CmmInfoTable -> Bool +staticClosureNeedsLink :: Bool -> CmmInfoTable -> Bool -- A static closure needs a link field to aid the GC when traversing -- the static closure graph. But it only needs such a field if either -- a) it has an SRT -- b) it's a constructor with one or more pointer fields -- In case (b), the constructor's fields themselves play the role -- of the SRT. -staticClosureNeedsLink info_tbl@CmmInfoTable{ cit_rep = smrep } +-- +-- At this point, the cit_srt field has not been calculated (that +-- happens right at the end of the Cmm pipeline), but we do have the +-- VarSet of CAFs that CoreToStg attached, and if that is empty there +-- will definitely not be an SRT. +-- +staticClosureNeedsLink has_srt CmmInfoTable{ cit_rep = smrep } | isConRep smrep = not (isStaticNoCafCon smrep) - | otherwise = needsSRT (cit_srt info_tbl) -staticClosureNeedsLink _ = False + | otherwise = has_srt -- needsSRT (cit_srt info_tbl) +staticClosureNeedsLink _ _ = False diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs index a7af5662e9..c348570a54 100644 --- a/compiler/codeGen/StgCmmCon.hs +++ b/compiler/codeGen/StgCmmCon.hs @@ -92,6 +92,7 @@ cgTopRhsCon id con args info_tbl dontCareCCS -- Because it's static data caffy -- Has CAF refs + False -- no SRT payload -- BUILD THE OBJECT diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs index f128e3ad60..2edd09da12 100644 --- a/compiler/codeGen/StgCmmEnv.hs +++ b/compiler/codeGen/StgCmmEnv.hs @@ -27,7 +27,7 @@ module StgCmmEnv ( bindArgsToRegs, bindToReg, rebindToReg, bindArgToReg, idToReg, - getArgAmode, getNonVoidArgAmodes, + getArgAmode, getNonVoidArgAmodes, getCgIdInfo, maybeLetNoEscape, ) where @@ -212,7 +212,6 @@ getNonVoidArgAmodes (arg:args) ; amodes <- getNonVoidArgAmodes args ; return ( amode : amodes ) } - ------------------------------------------------------------------------ -- Interface functions for binding and re-binding names ------------------------------------------------------------------------ diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 9faad02f46..4db1dffdfc 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -77,7 +77,7 @@ cgExpr (StgLetNoEscape _ _ binds expr) = ; let join_id = mkBlockId (uniqFromSupply us) ; cgLneBinds join_id binds ; cgExpr expr - ; emit $ mkLabel join_id} + ; emitLabel join_id} cgExpr (StgCase expr _live_vars _save_vars bndr srt alt_type alts) = cgCase expr bndr srt alt_type alts @@ -130,7 +130,7 @@ cgLetNoEscapeRhs cgLetNoEscapeRhs join_id local_cc bndr rhs = do { (info, rhs_body) <- getCodeR $ cgLetNoEscapeRhsBody local_cc bndr rhs ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info - ; emit (outOfLine $ mkLabel bid <*> rhs_body <*> mkBranch join_id) + ; emitOutOfLine bid $ rhs_body <*> mkBranch join_id ; return info } @@ -278,21 +278,69 @@ Hence: two basic plans for data GcPlan = GcInAlts -- Put a GC check at the start the case alternatives, [LocalReg] -- which binds these registers - SRT -- using this SRT - | NoGcInAlts -- The scrutinee is a primitive value, or a call to a + | NoGcInAlts -- The scrutinee is a primitive value, or a call to a -- primitive op which does no GC. Absorb the allocation -- of the case alternative(s) into the upstream check ------------------------------------- --- See Note [case on Bool] cgCase :: StgExpr -> Id -> SRT -> AltType -> [StgAlt] -> FCode () + +cgCase (StgOpApp (StgPrimOp op) args _) bndr _srt (AlgAlt tycon) alts + | isEnumerationTyCon tycon -- Note [case on bool] + = do { tag_expr <- do_enum_primop op args + + -- If the binder is not dead, convert the tag to a constructor + -- and assign it. + ; when (not (isDeadBinder bndr)) $ do + { tmp_reg <- bindArgToReg (NonVoid bndr) + ; emitAssign (CmmLocal tmp_reg) + (tagToClosure tycon tag_expr) } + + ; (mb_deflt, branches) <- cgAlgAltRhss NoGcInAlts Nothing + (NonVoid bndr) alts + ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1) + } + where + do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr + do_enum_primop TagToEnumOp [arg] -- No code! + = getArgAmode (NonVoid arg) + do_enum_primop primop args + = do tmp <- newTemp bWord + cgPrimOp [tmp] primop args + return (CmmReg (CmmLocal tmp)) + {- -cgCase (OpApp ) bndr srt AlgAlt [(DataAlt flase, a2] - | isBoolTy (idType bndr) - , isDeadBndr bndr - = +Note [case on bool] + +This special case handles code like + + case a <# b of + True -> + False -> + +If we let the ordinary case code handle it, we'll get something like + + tmp1 = a < b + tmp2 = Bool_closure_tbl[tmp1] + if (tmp2 & 7 != 0) then ... // normal tagged case + +but this junk won't optimise away. What we really want is just an +inline comparison: + + if (a < b) then ... + +So we add a special case to generate + + tmp1 = a < b + if (tmp1 == 0) then ... + +and later optimisations will further improve this. + +We should really change all these primops to return Int# instead, that +would make this special case go away. -} + -- Note [ticket #3132]: we might be looking at a case of a lifted Id -- that was cast to an unlifted type. The Id will always be bottom, -- but we don't want the code generator to fall over here. If we @@ -319,7 +367,7 @@ cgCase (StgApp v []) bndr _ alt_type@(PrimAlt _) alts do { when (not reps_compatible) $ panic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?" ; v_info <- getCgIdInfo v - ; emit (mkAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info)) + ; emitAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info) ; _ <- bindArgsToRegs [NonVoid bndr] ; cgAlts NoGcInAlts (NonVoid bndr) alt_type alts } where @@ -330,8 +378,11 @@ cgCase scrut@(StgApp v []) _ _ (PrimAlt _) _ do { mb_cc <- maybeSaveCostCentre True ; withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut) ; restoreCurrentCostCentre mb_cc - ; emit $ mkComment $ mkFastString "should be unreachable code" - ; emit $ withFreshLabel "l" (\l -> mkLabel l <*> mkBranch l)} + ; emitComment $ mkFastString "should be unreachable code" + ; l <- newLabelC + ; emitLabel l + ; emit (mkBranch l) + } {- case seq# a s of v @@ -349,7 +400,7 @@ cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr srt alt_type alts = -- handle seq#, same return convention as vanilla 'a'. cgCase (StgApp a []) bndr srt alt_type alts -cgCase scrut bndr srt alt_type alts +cgCase scrut bndr _srt alt_type alts = -- the general case do { up_hp_usg <- getVirtHp -- Upstream heap usage ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts @@ -359,7 +410,7 @@ cgCase scrut bndr srt alt_type alts | isSingleton alts = False | up_hp_usg > 0 = False | otherwise = True - gc_plan = if gcInAlts then GcInAlts alt_regs srt else NoGcInAlts + gc_plan = if gcInAlts then GcInAlts alt_regs else NoGcInAlts ; mb_cc <- maybeSaveCostCentre simple_scrut ; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut) @@ -417,14 +468,14 @@ chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs" cgAlts :: GcPlan -> NonVoid Id -> AltType -> [StgAlt] -> FCode () -- At this point the result of the case are in the binders cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)] - = maybeAltHeapCheck gc_plan (cgExpr rhs) + = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs) cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)] - = maybeAltHeapCheck gc_plan (cgExpr rhs) + = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs) -- Here bndrs are *already* in scope, so don't rebind them cgAlts gc_plan bndr (PrimAlt _) alts - = do { tagged_cmms <- cgAltRhss gc_plan bndr alts + = do { tagged_cmms <- cgAltRhss gc_plan Nothing bndr alts ; let bndr_reg = CmmLocal (idToReg bndr) (DEFAULT,deflt) = head tagged_cmms @@ -433,20 +484,17 @@ cgAlts gc_plan bndr (PrimAlt _) alts tagged_cmms' = [(lit,code) | (LitAlt lit, code) <- tagged_cmms] - ; emit (mkCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt) } + ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt } cgAlts gc_plan bndr (AlgAlt tycon) alts - = do { tagged_cmms <- cgAltRhss gc_plan bndr alts - + = do { retry_lbl <- newLabelC + ; emitLabel retry_lbl -- Note [alg-alt heap checks] + + ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan (Just retry_lbl) + bndr alts + ; let fam_sz = tyConFamilySize tycon bndr_reg = CmmLocal (idToReg bndr) - mb_deflt = case tagged_cmms of - ((DEFAULT,rhs) : _) -> Just rhs - _other -> Nothing - -- DEFAULT is always first, if present - - branches = [ (dataConTagZ con, cmm) - | (DataAlt con, cmm) <- tagged_cmms ] -- Is the constructor tag in the node reg? ; if isSmallFamily fam_sz @@ -467,23 +515,68 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts cgAlts _ _ _ _ = panic "cgAlts" -- UbxTupAlt and PolyAlt have only one alternative + +-- Note [alg-alt heap check] +-- +-- In an algebraic case with more than one alternative, we will have +-- code like +-- +-- L0: +-- x = R1 +-- goto L1 +-- L1: +-- if (x & 7 >= 2) then goto L2 else goto L3 +-- L2: +-- Hp = Hp + 16 +-- if (Hp > HpLim) then goto L4 +-- ... +-- L4: +-- call gc() returns to L5 +-- L5: +-- x = R1 +-- goto L1 + ------------------- -cgAltRhss :: GcPlan -> NonVoid Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] -cgAltRhss gc_plan bndr alts +cgAlgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt] + -> FCode ( Maybe CmmAGraph + , [(ConTagZ, CmmAGraph)] ) +cgAlgAltRhss gc_plan retry_lbl bndr alts + = do { tagged_cmms <- cgAltRhss gc_plan retry_lbl bndr alts + + ; let { mb_deflt = case tagged_cmms of + ((DEFAULT,rhs) : _) -> Just rhs + _other -> Nothing + -- DEFAULT is always first, if present + + ; branches = [ (dataConTagZ con, cmm) + | (DataAlt con, cmm) <- tagged_cmms ] + } + + ; return (mb_deflt, branches) + } + + +------------------- +cgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt] + -> FCode [(AltCon, CmmAGraph)] +cgAltRhss gc_plan retry_lbl bndr alts = forkAlts (map cg_alt alts) where base_reg = idToReg bndr cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph) cg_alt (con, bndrs, _uses, rhs) = getCodeR $ - maybeAltHeapCheck gc_plan $ + maybeAltHeapCheck gc_plan retry_lbl $ do { _ <- bindConArgs con base_reg bndrs ; cgExpr rhs ; return con } -maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a -maybeAltHeapCheck NoGcInAlts code = code -maybeAltHeapCheck (GcInAlts regs _) code = altHeapCheck regs code +maybeAltHeapCheck :: GcPlan -> Maybe BlockId -> FCode a -> FCode a +maybeAltHeapCheck NoGcInAlts mlbl code = code +maybeAltHeapCheck (GcInAlts regs) mlbl code = + case mlbl of + Nothing -> altHeapCheck regs code + Just retry_lbl -> altHeapCheckReturnsTo regs retry_lbl code ----------------------------------------------------------------------------- -- Tail calls @@ -517,8 +610,8 @@ cgIdApp fun_id args cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode () cgLneJump blk_id lne_regs args -- Join point; discard sequel = do { cmm_args <- getNonVoidArgAmodes args - ; emit (mkMultiAssign lne_regs cmm_args - <*> mkBranch blk_id) } + ; emitMultiAssign lne_regs cmm_args + ; emit (mkBranch blk_id) } cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode () cgTailCall fun_id fun_info args = do @@ -529,27 +622,21 @@ cgTailCall fun_id fun_info args = do ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged? EnterIt -> ASSERT( null args ) -- Discarding arguments - do { let fun' = CmmLoad fun (cmmExprType fun) - ; [ret,call] <- forkAlts [ - getCode $ emitReturn [fun], -- Is tagged; no need to untag - getCode $ do -- emit (mkAssign nodeReg fun) - emitCall (NativeNodeCall, NativeReturn) - (entryCode fun') [fun]] -- Not tagged - ; emit (mkCmmIfThenElse (cmmIsTagged fun) ret call) } - - SlowCall -> do -- A slow function call via the RTS apply routines + emitEnter fun + + SlowCall -> do -- A slow function call via the RTS apply routines { tickySlowCall lf_info args - ; emit $ mkComment $ mkFastString "slowCall" + ; emitComment $ mkFastString "slowCall" ; slowCall fun args } -- A direct function call (possibly with some left-over arguments) DirectEntry lbl arity -> do { tickyDirectCall arity args ; if node_points then - do emit $ mkComment $ mkFastString "directEntry" - emit (mkAssign nodeReg fun) + do emitComment $ mkFastString "directEntry" + emitAssign nodeReg fun directCall lbl arity args - else do emit $ mkComment $ mkFastString "directEntry else" + else do emitComment $ mkFastString "directEntry else" directCall lbl arity args } JumpToIt {} -> panic "cgTailCall" -- ??? @@ -561,33 +648,67 @@ cgTailCall fun_id fun_info args = do node_points = nodeMustPointToIt lf_info -{- Note [case on Bool] - ~~~~~~~~~~~~~~~~~~~ -A case on a Boolean value does two things: - 1. It looks up the Boolean in a closure table and assigns the - result to the binder. - 2. It branches to the True or False case through analysis - of the closure assigned to the binder. -But the indirection through the closure table is unnecessary -if the assignment to the binder will be dead code (use isDeadBndr). +emitEnter :: CmmExpr -> FCode () +emitEnter fun = do + { adjustHpBackwards + ; sequel <- getSequel + ; updfr_off <- getUpdFrameOff + ; case sequel of + -- For a return, we have the option of generating a tag-test or + -- not. If the value is tagged, we can return directly, which + -- is quicker than entering the value. This is a code + -- size/speed trade-off: when optimising for speed rather than + -- size we could generate the tag test. + -- + -- Right now, we do what the old codegen did, and omit the tag + -- test, just generating an enter. + Return _ -> do + { let entry = entryCode $ closureInfoPtr $ CmmReg nodeReg + ; emit $ mkForeignJump NativeNodeCall entry + [cmmUntag fun] updfr_off + } + + -- The result will be scrutinised in the sequel. This is where + -- we generate a tag-test to avoid entering the closure if + -- possible. + -- + -- The generated code will be something like this: + -- + -- R1 = fun -- copyout + -- if (fun & 7 != 0) goto Lcall else goto Lret + -- Lcall: + -- call [fun] returns to Lret + -- Lret: + -- fun' = R1 -- copyin + -- ... + -- + -- Note in particular that the label Lret is used as a + -- destination by both the tag-test and the call. This is + -- becase Lret will necessarily be a proc-point, and we want to + -- ensure that we generate only one proc-point for this + -- sequence. + -- + AssignTo res_regs _ -> do + { lret <- newLabelC + ; lcall <- newLabelC + ; let area = Young lret + ; let (off, copyin) = copyInOflow NativeReturn area res_regs + (outArgs, copyout) = copyOutOflow NativeNodeCall Call area + [fun] updfr_off (0,[]) + -- refer to fun via nodeReg after the copyout, to avoid having + -- both live simultaneously; this sometimes enables fun to be + -- inlined in the RHS of the R1 assignment. + ; let entry = entryCode (closureInfoPtr (CmmReg nodeReg)) + the_call = toCall entry (Just lret) updfr_off off outArgs + ; emit $ + copyout <*> + mkCbranch (cmmIsTagged (CmmReg nodeReg)) lret lcall <*> + outOfLine lcall the_call <*> + mkLabel lret <*> + copyin + } + } -The following example illustrates how badly the code turns out: - STG: - case <=## [ww_s7Hx y_s7HD] of wild2_sbH8 { - GHC.Types.False -> <true code> // sbH8 dead - GHC.Types.True -> <false code> // sbH8 dead - }; - Cmm: - _s7HD::F64 = F64[_sbH7::I64 + 7]; // MidAssign - _ccsW::I64 = %MO_F_Le_W64(_s7Hx::F64, _s7HD::F64); // MidAssign - // emitReturn // MidComment - _sbH8::I64 = I64[ghczmprim_GHCziBool_Bool_closure_tbl + (_ccsW::I64 << 3)]; // MidAssign - _ccsX::I64 = _sbH8::I64 & 7; // MidAssign - if (_ccsX::I64 >= 2) goto ccsH; else goto ccsI; // LastCondBranch - -The assignments to _sbH8 and _ccsX are completely unnecessary. -Instead, we should branch based on the value of _ccsW. --} {- Note [Better Alt Heap Checks] If two function calls can share a return point, then they will also diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index 5bc0f7af4e..c67e0e0c95 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -22,6 +22,7 @@ import StgCmmEnv import StgCmmMonad import StgCmmUtils import StgCmmClosure +import StgCmmLayout import BlockId import Cmm @@ -45,15 +46,16 @@ import Control.Monad -- Code generation for Foreign Calls ----------------------------------------------------------------------------- -cgForeignCall :: [LocalReg] -- r1,r2 where to put the results - -> [ForeignHint] - -> ForeignCall -- the op +-- | emit code for a foreign call, and return the results to the sequel. +-- +cgForeignCall :: ForeignCall -- the op -> [StgArg] -- x,y arguments + -> Type -- result type -> FCode () --- Emits code for an unsafe foreign call: r1, r2 = foo( x, y, z ) -cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_args +cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty = do { cmm_args <- getFCallArgs stg_args + ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty ; let ((call_args, arg_hints), cmm_target) = case target of StaticTarget _ _ False -> @@ -63,7 +65,7 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a = case mPkgId of Nothing -> ForeignLabelInThisPackage Just pkgId -> ForeignLabelInPackage pkgId - size = call_size cmm_args + size = call_size cmm_args in ( unzip cmm_args , CmmLit (CmmLabel (mkForeignLabel lbl size labelSource IsFunction))) @@ -71,13 +73,31 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a DynamicTarget -> case cmm_args of (fn,_):rest -> (unzip rest, fn) [] -> panic "cgForeignCall []" - fc = ForeignConvention cconv arg_hints result_hints + fc = ForeignConvention cconv arg_hints res_hints call_target = ForeignTarget cmm_target fc - ; srt <- getSRTInfo NoSRT -- SLPJ: Not sure what SRT - -- is right here - -- JD: Does it matter in the new codegen? - ; emitForeignCall safety results call_target call_args srt CmmMayReturn } + -- we want to emit code for the call, and then emitReturn. + -- However, if the sequel is AssignTo, we shortcut a little + -- and generate a foreign call that assigns the results + -- directly. Otherwise we end up generating a bunch of + -- useless "r = r" assignments, which are not merely annoying: + -- they prevent the common block elimination from working correctly + -- in the case of a safe foreign call. + -- See Note [safe foreign call convention] + -- + ; sequel <- getSequel + ; case sequel of + AssignTo assign_to_these _ -> + do { emitForeignCall safety assign_to_these call_target + call_args CmmMayReturn + } + + _something_else -> + do { emitForeignCall safety res_regs call_target + call_args CmmMayReturn + ; emitReturn (map (CmmReg . CmmLocal) res_regs) + } + } where -- in the stdcall calling convention, the symbol needs @size appended -- to it, where size is the total number of bytes of arguments. We @@ -88,16 +108,83 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a | otherwise = Nothing -- ToDo: this might not be correct for 64-bit API - arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) wORD_SIZE + arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) + wORD_SIZE + +{- Note [safe foreign call convention] + +The simple thing to do for a safe foreign call would be the same as an +unsafe one: just + + emitForeignCall ... + emitReturn ... + +but consider what happens in this case + + case foo x y z of + (# s, r #) -> ... + +The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r] +as the result reg, and we generate + + r = foo(x,y,z) returns to L1 -- emitForeignCall + L1: + r = r -- emitReturn + goto L2 +L2: + ... + +Now L1 is a proc point (by definition, it is the continuation of the +safe foreign call). If L2 does a heap check, then L2 will also be a +proc point. + +Furthermore, the stack layout algorithm has to arrange to save r +somewhere between the call and the jump to L1, which is annoying: we +would have to treat r differently from the other live variables, which +have to be saved *before* the call. + +So we adopt a special convention for safe foreign calls: the results +are copied out according to the NativeReturn convention by the call, +and the continuation of the call should copyIn the results. (The +copyOut code is actually inserted when the safe foreign call is +lowered later). The result regs attached to the safe foreign call are +only used temporarily to hold the results before they are copied out. + +We will now generate this: + + r = foo(x,y,z) returns to L1 + L1: + r = R1 -- copyIn, inserted by mkSafeCall + goto L2 + L2: + ... r ... + +And when the safe foreign call is lowered later (see Note [lower safe +foreign calls]) we get this: + + suspendThread() + r = foo(x,y,z) + resumeThread() + R1 = r -- copyOut, inserted by lowerSafeForeignCall + jump L1 + L1: + r = R1 -- copyIn, inserted by mkSafeCall + goto L2 + L2: + ... r ... + +Now consider what happens if L2 does a heap check: the Adams +optimisation kicks in and commons up L1 with the heap-check +continuation, resulting in just one proc point instead of two. Yay! +-} + emitCCall :: [(CmmFormal,ForeignHint)] -> CmmExpr -> [(CmmActual,ForeignHint)] -> FCode () emitCCall hinted_results fn hinted_args - = emitForeignCall PlayRisky results target args - NoC_SRT -- No SRT b/c we PlayRisky - CmmMayReturn + = emitForeignCall PlayRisky results target args CmmMayReturn where (args, arg_hints) = unzip hinted_args (results, result_hints) = unzip hinted_results @@ -107,7 +194,7 @@ emitCCall hinted_results fn hinted_args emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode () emitPrimCall res op args - = emitForeignCall PlayRisky res (PrimTarget op) args NoC_SRT CmmMayReturn + = emitForeignCall PlayRisky res (PrimTarget op) args CmmMayReturn -- alternative entry point, used by CmmParse emitForeignCall @@ -115,11 +202,10 @@ emitForeignCall -> [CmmFormal] -- where to put the results -> ForeignTarget -- the op -> [CmmActual] -- arguments - -> C_SRT -- the SRT of the calls continuation -> CmmReturnInfo -- This can say "never returns" -- only RTS procedures do this -> FCode () -emitForeignCall safety results target args _srt _ret +emitForeignCall safety results target args _ret | not (playSafe safety) = do let (caller_save, caller_load) = callerSaveVolatileRegs emit caller_save @@ -129,7 +215,9 @@ emitForeignCall safety results target args _srt _ret | otherwise = do updfr_off <- getUpdFrameOff temp_target <- load_target_into_temp target - emit $ mkSafeCall temp_target results args updfr_off (playInterruptible safety) + emit =<< mkSafeCall temp_target results args updfr_off + (playInterruptible safety) + {- @@ -162,7 +250,7 @@ maybe_assign_temp e -- expressions, which are wrong here. -- this is a NonPtr because it only duplicates an existing reg <- newTemp (cmmExprType e) --TODO FIXME NOW - emit (mkAssign (CmmLocal reg) e) + emitAssign (CmmLocal reg) e return (CmmReg (CmmLocal reg)) -- ----------------------------------------------------------------------------- @@ -184,12 +272,12 @@ saveThreadState = emitSaveThreadState :: BlockId -> FCode () emitSaveThreadState bid = do -- CurrentTSO->stackobj->sp = Sp; - emit $ mkStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) stack_SP) - (CmmStackSlot (CallArea (Young bid)) (widthInBytes (typeWidth gcWord))) + emitStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) stack_SP) + (CmmStackSlot (Young bid) (widthInBytes (typeWidth gcWord))) emit closeNursery -- and save the current cost centre stack in the TSO when profiling: when opt_SccProfilingOn $ - emit (mkStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS) + emitStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS -- CurrentNursery->free = Hp+1; closeNursery :: CmmAGraph diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index 25161722f7..856b04367d 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -10,7 +10,7 @@ module StgCmmHeap ( getVirtHp, setVirtHp, setRealHp, getHpRelOffset, hpRel, - entryHeapCheck, altHeapCheck, + entryHeapCheck, altHeapCheck, altHeapCheckReturnsTo, mkVirtHeapOffsets, mkVirtConstrOffsets, mkStaticClosureFields, mkStaticClosure, @@ -20,7 +20,6 @@ module StgCmmHeap ( #include "HsVersions.h" -import CmmType import StgSyn import CLabel import StgCmmLayout @@ -34,6 +33,7 @@ import StgCmmEnv import MkGraph +import Hoopl hiding ((<*>), mkBranch) import SMRep import Cmm import CmmUtils @@ -109,7 +109,7 @@ allocDynClosureCmm info_tbl lf_info use_cc _blame_cc amodes_w_offsets -- ALLOCATE THE OBJECT ; base <- getHpRelOffset info_offset - ; emit (mkComment $ mkFastString "allocDynClosure") + ; emitComment $ mkFastString "allocDynClosure" ; emitSetDynHdr base info_ptr use_cc ; let (cmm_args, offsets) = unzip amodes_w_offsets ; hpStore base cmm_args offsets @@ -151,9 +151,10 @@ mkStaticClosureFields :: CmmInfoTable -> CostCentreStack -> CafInfo + -> Bool -- SRT is non-empty? -> [CmmLit] -- Payload -> [CmmLit] -- The full closure -mkStaticClosureFields info_tbl ccs caf_refs payload +mkStaticClosureFields info_tbl ccs caf_refs has_srt payload = mkStaticClosure info_lbl ccs payload padding static_link_field saved_info_field where @@ -178,8 +179,10 @@ mkStaticClosureFields info_tbl ccs caf_refs payload | otherwise = ASSERT(null payload) [mkIntCLit 0] static_link_field - | is_caf || staticClosureNeedsLink info_tbl = [static_link_value] - | otherwise = [] + | is_caf || staticClosureNeedsLink has_srt info_tbl + = [static_link_value] + | otherwise + = [] saved_info_field | is_caf = [mkIntCLit 0] @@ -335,11 +338,12 @@ entryHeapCheck cl_info offset nodeSet arity args code args' = map (CmmReg . CmmLocal) args setN = case nodeSet of - Just n -> mkAssign nodeReg (CmmReg $ CmmLocal n) + Just n -> mkNop -- No need to assign R1, it already + -- points to the closure Nothing -> mkAssign nodeReg $ CmmLit (CmmLabel $ staticClosureLabel cl_info) - {- Thunks: Set R1 = node, jump GCEnter1 + {- Thunks: jump GCEnter1 Function (fast): Set R1 = node, jump GCFun Function (slow): Set R1 = node, call generic_gc -} gc_call upd = setN <*> gc_lbl upd @@ -354,7 +358,10 @@ entryHeapCheck cl_info offset nodeSet arity args code - GC calls, but until then this fishy code works -} updfr_sz <- getUpdFrameOff - heapCheck True (gc_call updfr_sz) code + + loop_id <- newLabelC + emitLabel loop_id + heapCheck True (gc_call updfr_sz <*> mkBranch loop_id) code {- -- This code is slightly outdated now and we could easily keep the above @@ -400,21 +407,29 @@ entryHeapCheck cl_info offset nodeSet arity args code -} --------------------------------------------------------------- --- A heap/stack check at in a case alternative +-- ------------------------------------------------------------ +-- A heap/stack check in a case alternative altHeapCheck :: [LocalReg] -> FCode a -> FCode a altHeapCheck regs code + = do loop_id <- newLabelC + emitLabel loop_id + altHeapCheckReturnsTo regs loop_id code + +altHeapCheckReturnsTo :: [LocalReg] -> Label -> FCode a -> FCode a +altHeapCheckReturnsTo regs retry_lbl code = do updfr_sz <- getUpdFrameOff - heapCheck False (gc_call updfr_sz) code + gc_call_code <- gc_call updfr_sz + heapCheck False (gc_call_code <*> mkBranch retry_lbl) code where reg_exprs = map (CmmReg . CmmLocal) regs + -- Note [stg_gc arguments] gc_call sp = case rts_label regs of - Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp - Nothing -> mkCall generic_gc (GC, GC) [] [] sp + Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp (0,[]) + Nothing -> mkCall generic_gc (GC, GC) [] [] sp (0,[]) rts_label [reg] | isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1") @@ -432,6 +447,23 @@ altHeapCheck regs code rts_label _ = Nothing +-- Note [stg_gc arguments] +-- It might seem that we could avoid passing the arguments to the +-- stg_gc function, because they are already in the right registers. +-- While this is usually the case, it isn't always. Sometimes the +-- code generator has cleverly avoided the eval in a case, e.g. in +-- ffi/should_run/4221.hs we found +-- +-- case a_r1mb of z +-- FunPtr x y -> ... +-- +-- where a_r1mb is bound a top-level constructor, and is known to be +-- evaluated. The codegen just assigns x, y and z, and continues; +-- R1 is never assigned. +-- +-- So we'll have to rely on optimisations to eliminatethese +-- assignments where possible. + -- | The generic GC procedure; no params, no results generic_gc :: CmmExpr @@ -447,7 +479,7 @@ heapCheck checkStack do_gc code = getHeapUsage $ \ hpHw -> -- Emit heap checks, but be sure to do it lazily so -- that the conditionals on hpHw don't cause a black hole - do { emit $ do_checks checkStack hpHw do_gc + do { codeOnly $ do_checks checkStack hpHw do_gc ; tickyAllocHeap hpHw ; doGranAllocate hpHw ; setRealHp hpHw @@ -456,22 +488,25 @@ heapCheck checkStack do_gc code do_checks :: Bool -- Should we check the stack? -> WordOff -- Heap headroom -> CmmAGraph -- What to do on failure - -> CmmAGraph -do_checks checkStack alloc do_gc - = withFreshLabel "gc" $ \ loop_id -> - withFreshLabel "gc" $ \ gc_id -> - mkLabel loop_id - <*> (let hpCheck = if alloc == 0 then mkNop - else mkAssign hpReg bump_hp <*> - mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id) - in if checkStack - then mkCmmIfThenElse sp_oflo (mkBranch gc_id) hpCheck - else hpCheck) - <*> mkComment (mkFastString "outOfLine should follow:") - <*> outOfLine (mkLabel gc_id - <*> mkComment (mkFastString "outOfLine here") - <*> do_gc - <*> mkBranch loop_id) + -> FCode () +do_checks checkStack alloc do_gc = do + gc_id <- newLabelC + hp_check <- if alloc == 0 + then return mkNop + else do + ifthen <- mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id) + return (mkAssign hpReg bump_hp <*> ifthen) + + if checkStack + then emit =<< mkCmmIfThenElse sp_oflo (mkBranch gc_id) hp_check + else emit hp_check + + emit $ mkComment (mkFastString "outOfLine should follow:") + + emitOutOfLine gc_id $ + mkComment (mkFastString "outOfLine here") <*> + do_gc -- this is expected to jump back somewhere + -- Test for stack pointer exhaustion, then -- bump heap pointer, and test for heap exhaustion -- Note that we don't move the heap pointer unless the diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 86986efdfa..9593af1f50 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -15,7 +15,7 @@ module StgCmmLayout ( mkArgDescr, - emitCall, emitReturn, + emitCall, emitReturn, adjustHpBackwards, emitClosureProcAndInfoTable, emitClosureAndInfoTable, @@ -41,10 +41,12 @@ import StgCmmEnv import StgCmmTicky import StgCmmMonad import StgCmmUtils +import StgCmmProf import MkGraph import SMRep import Cmm +import CmmUtils import CLabel import StgSyn import Id @@ -52,6 +54,7 @@ import Name import TyCon ( PrimRep(..) ) import BasicTypes ( RepArity ) import StaticFlags +import Module import Constants import Util @@ -63,38 +66,60 @@ import FastString -- Call and return sequences ------------------------------------------------------------------------ -emitReturn :: [CmmExpr] -> FCode () --- Return multiple values to the sequel +-- | Return multiple values to the sequel +-- +-- If the sequel is @Return@ +-- +-- > return (x,y) +-- +-- If the sequel is @AssignTo [p,q]@ +-- +-- > p=x; q=y; -- --- If the sequel is Return --- return (x,y) --- If the sequel is AssignTo [p,q] --- p=x; q=y; +emitReturn :: [CmmExpr] -> FCode () emitReturn results = do { sequel <- getSequel; ; updfr_off <- getUpdFrameOff - ; emit $ mkComment $ mkFastString ("emitReturn: " ++ show sequel) + ; emitComment $ mkFastString ("emitReturn: " ++ show sequel) ; case sequel of Return _ -> do { adjustHpBackwards ; emit (mkReturnSimple results updfr_off) } AssignTo regs adjust -> do { if adjust then adjustHpBackwards else return () - ; emit (mkMultiAssign regs results) } + ; emitMultiAssign regs results } } + +-- | @emitCall conv fun args@ makes a call to the entry-code of @fun@, +-- using the call/return convention @conv@, passing @args@, and +-- returning the results to the current sequel. +-- emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode () --- (cgCall fun args) makes a call to the entry-code of 'fun', --- passing 'args', and returning the results to the current sequel -emitCall convs@(callConv, _) fun args +emitCall convs fun args + = emitCallWithExtraStack convs fun args noExtraStack + + +-- | @emitCallWithExtraStack conv fun args stack@ makes a call to the +-- entry-code of @fun@, using the call/return convention @conv@, +-- passing @args@, pushing some extra stack frames described by +-- @stack@, and returning the results to the current sequel. +-- +emitCallWithExtraStack + :: (Convention, Convention) -> CmmExpr -> [CmmExpr] + -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode () +emitCallWithExtraStack convs@(callConv, _) fun args extra_stack = do { adjustHpBackwards ; sequel <- getSequel ; updfr_off <- getUpdFrameOff - ; emit $ mkComment $ mkFastString ("emitCall: " ++ show sequel) - ; case sequel of - Return _ -> emit (mkForeignJump callConv fun args updfr_off) - AssignTo res_regs _ -> emit (mkCall fun convs res_regs args updfr_off) - } + ; emitComment $ mkFastString ("emitCallWithExtraStack: " ++ show sequel) + ; case sequel of + Return _ -> + emit $ mkForeignJumpExtra callConv fun args updfr_off extra_stack + AssignTo res_regs _ -> do + emit =<< mkCall fun convs res_regs args updfr_off extra_stack + } + adjustHpBackwards :: FCode () -- This function adjusts and heap pointers just before a tail call or @@ -127,59 +152,137 @@ adjustHpBackwards -- Making calls: directCall and slowCall ------------------------------------------------------------------------- +-- General plan is: +-- - we'll make *one* fast call, either to the function itself +-- (directCall) or to stg_ap_<pat>_fast (slowCall) +-- Any left-over arguments will be pushed on the stack, +-- +-- e.g. Sp[old+8] = arg1 +-- Sp[old+16] = arg2 +-- Sp[old+32] = stg_ap_pp_info +-- R2 = arg3 +-- R3 = arg4 +-- call f() return to Nothing updfr_off: 32 + + directCall :: CLabel -> RepArity -> [StgArg] -> FCode () -- (directCall f n args) -- calls f(arg1, ..., argn), and applies the result to the remaining args -- The function f has arity n, and there are guaranteed at least n args -- Both arity and args include void args directCall lbl arity stg_args - = do { cmm_args <- getNonVoidArgAmodes stg_args - ; direct_call "directCall" lbl arity cmm_args (argsReps stg_args) } + = do { argreps <- getArgRepsAmodes stg_args + ; direct_call "directCall" lbl arity argreps } + slowCall :: CmmExpr -> [StgArg] -> FCode () -- (slowCall fun args) applies fun to args, returning the results to Sequel slowCall fun stg_args - = do { cmm_args <- getNonVoidArgAmodes stg_args - ; slow_call fun cmm_args (argsReps stg_args) } + = do { dflags <- getDynFlags + ; argsreps <- getArgRepsAmodes stg_args + ; let (rts_fun, arity) = slowCallPattern (map fst argsreps) + ; call <- getCode $ direct_call "slow_call" + (mkRtsApFastLabel rts_fun) arity argsreps + ; emitComment $ mkFastString ("slow_call for " ++ + showSDoc dflags (ppr fun) ++ + " with pat " ++ unpackFS rts_fun) + ; emit (mkAssign nodeReg fun <*> call) + } + -------------- -direct_call :: String -> CLabel -> RepArity -> [CmmExpr] -> [ArgRep] -> FCode () --- NB1: (length args) may be less than (length reps), because --- the args exclude the void ones --- NB2: 'arity' refers to the *reps* -direct_call caller lbl arity args reps - | debugIsOn && arity > length reps -- Too few args +direct_call :: String -> CLabel -> RepArity -> [(ArgRep,Maybe CmmExpr)] -> FCode () +direct_call caller lbl arity args + | debugIsOn && arity > length args -- Too few args = do -- Caller should ensure that there enough args! - pprPanic "direct_call" (text caller <+> ppr arity - <+> ppr lbl <+> ppr (length reps) - <+> ppr args <+> ppr reps ) - - | null rest_reps -- Precisely the right number of arguments - = emitCall (NativeDirectCall, NativeReturn) target args - - | otherwise -- Over-saturated call - = ASSERT( arity == length initial_reps ) - do { pap_id <- newTemp gcWord - ; withSequel (AssignTo [pap_id] True) - (emitCall (NativeDirectCall, NativeReturn) target fast_args) - ; slow_call (CmmReg (CmmLocal pap_id)) - rest_args rest_reps } + pprPanic "direct_call" $ + text caller <+> ppr arity <+> + ppr lbl <+> ppr (length args) <+> + ppr (map snd args) <+> ppr (map fst args) + + | null rest_args -- Precisely the right number of arguments + = emitCall (NativeDirectCall, NativeReturn) target (nonVArgs args) + + | otherwise -- Note [over-saturated calls] + = emitCallWithExtraStack (NativeDirectCall, NativeReturn) + target (nonVArgs fast_args) (mkStkOffsets stack_args) where target = CmmLit (CmmLabel lbl) - (initial_reps, rest_reps) = splitAt arity reps - arg_arity = count isNonV initial_reps - (fast_args, rest_args) = splitAt arg_arity args + (fast_args, rest_args) = splitAt arity args + stack_args = slowArgs rest_args --------------- -slow_call :: CmmExpr -> [CmmExpr] -> [ArgRep] -> FCode () -slow_call fun args reps - = do dflags <- getDynFlags - call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity args reps - emit $ mkComment $ mkFastString ("slow_call for " ++ showSDoc dflags (ppr fun) ++ - " with pat " ++ unpackFS rts_fun) - emit (mkAssign nodeReg fun <*> call) + +-- When constructing calls, it is easier to keep the ArgReps and the +-- CmmExprs zipped together. However, a void argument has no +-- representation, so we need to use Maybe CmmExpr (the alternative of +-- using zeroCLit or even undefined would work, but would be ugly). +-- +getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)] +getArgRepsAmodes = mapM getArgRepAmode + where getArgRepAmode arg + | V <- rep = return (V, Nothing) + | otherwise = do expr <- getArgAmode (NonVoid arg) + return (rep, Just expr) + where rep = toArgRep (argPrimRep arg) + +nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr] +nonVArgs [] = [] +nonVArgs ((_,Nothing) : args) = nonVArgs args +nonVArgs ((_,Just arg) : args) = arg : nonVArgs args + +{- +Note [over-saturated calls] + +The natural thing to do for an over-saturated call would be to call +the function with the correct number of arguments, and then apply the +remaining arguments to the value returned, e.g. + + f a b c d (where f has arity 2) + --> + r = call f(a,b) + call r(c,d) + +but this entails + - saving c and d on the stack + - making a continuation info table + - at the continuation, loading c and d off the stack into regs + - finally, call r + +Note that since there are a fixed number of different r's +(e.g. stg_ap_pp_fast), we can also pre-compile continuations +that correspond to each of them, rather than generating a fresh +one for each over-saturated call. + +Not only does this generate much less code, it is faster too. We will +generate something like: + +Sp[old+16] = c +Sp[old+24] = d +Sp[old+32] = stg_ap_pp_info +call f(a,b) -- usual calling convention + +For the purposes of the CmmCall node, we count this extra stack as +just more arguments that we are passing on the stack (cml_args). +-} + +-- | 'slowArgs' takes a list of function arguments and prepares them for +-- pushing on the stack for "extra" arguments to a function which requires +-- fewer arguments than we currently have. +slowArgs :: [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)] +slowArgs [] = [] +slowArgs args -- careful: reps contains voids (V), but args does not + | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest_args + | otherwise = this_pat ++ slowArgs rest_args where - (rts_fun, arity) = slowCallPattern reps + (arg_pat, n) = slowCallPattern (map fst args) + (call_args, rest_args) = splitAt n args + + stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat + this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args + save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)] + save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs") + + -- These cases were found to cover about 99% of all slow calls: slowCallPattern :: [ArgRep] -> (FastString, RepArity) @@ -202,6 +305,30 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0) ------------------------------------------------------------------------- +-- Fix the byte-offsets of a bunch of things to push on the stack + +-- This is used for pushing slow-call continuations. +-- See Note [over-saturated calls]. + +mkStkOffsets + :: [(ArgRep, Maybe CmmExpr)] -- things to make offsets for + -> ( ByteOff -- OUTPUTS: Topmost allocated word + , [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out) +mkStkOffsets things + = loop 0 [] (reverse things) + where + loop offset offs [] = (offset,offs) + loop offset offs ((_,Nothing):things) = loop offset offs things + -- ignore Void arguments + loop offset offs ((rep,Just thing):things) + = loop thing_off ((thing, thing_off):offs) things + where + thing_off = offset + argRepSizeW rep * wORD_SIZE + -- offset of thing is offset+size, because we're + -- growing the stack *downwards* as the offsets increase. + + +------------------------------------------------------------------------- -- Classifying arguments: ArgRep ------------------------------------------------------------------------- @@ -237,10 +364,7 @@ isNonV :: ArgRep -> Bool isNonV V = False isNonV _ = True -argsReps :: [StgArg] -> [ArgRep] -argsReps = map (toArgRep . argPrimRep) - -argRepSizeW :: ArgRep -> WordOff -- Size in words +argRepSizeW :: ArgRep -> WordOff -- Size in words argRepSizeW N = 1 argRepSizeW P = 1 argRepSizeW F = 1 diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index 4eea38e22c..cc9919a4a0 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE GADTs #-} ----------------------------------------------------------------------------- -- -- Monad for Stg to C-- code generation @@ -16,16 +17,21 @@ module StgCmmMonad ( FCode, -- type - initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, + initC, runC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, returnFC, fixC, fixC_, nopC, whenC, newUnique, newUniqSupply, + newLabelC, emitLabel, + emit, emitDecl, emitProc, emitProcWithConvention, emitSimpleProc, + emitOutOfLine, emitAssign, emitStore, emitComment, getCmm, cgStmtsToBlocks, getCodeR, getCode, getHeapUsage, - forkClosureBody, forkStatics, forkAlts, forkProc, codeOnly, + mkCmmIfThenElse, mkCmmIfThen, mkCall, mkCmmCall, mkSafeCall, + + forkClosureBody, forkStatics, forkAlts, forkProc, codeOnly, ConTagZ, @@ -69,12 +75,12 @@ import VarEnv import OrdList import Unique import UniqSupply -import FastString(sLit) +import FastString import Outputable import Control.Monad import Data.List -import Prelude hiding( sequence ) +import Prelude hiding( sequence, succ ) import qualified Prelude( sequence ) infixr 9 `thenC` -- Right-associative! @@ -95,12 +101,12 @@ instance Monad FCode where {-# INLINE thenFC #-} {-# INLINE returnFC #-} -initC :: DynFlags -> Module -> FCode a -> IO a -initC dflags mod (FCode code) - = do { uniqs <- mkSplitUniqSupply 'c' - ; case code (initCgInfoDown dflags mod) (initCgState uniqs) of - (res, _) -> return res - } +initC :: IO CgState +initC = do { uniqs <- mkSplitUniqSupply 'c' + ; return (initCgState uniqs) } + +runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState) +runC dflags mod st (FCode code) = code (initCgInfoDown dflags mod) st returnFC :: a -> FCode a returnFC val = FCode (\_info_down state -> (val, state)) @@ -270,6 +276,8 @@ data HeapUsage = type VirtualHpOffset = WordOff + + initCgState :: UniqSupply -> CgState initCgState uniqs = MkCgState { cgs_stmts = mkNop, cgs_tops = nilOL, @@ -308,7 +316,6 @@ initHpUsage = HeapUsage { virtHp = 0, realHp = 0 } maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw } - -------------------------------------------------------- -- Operators for getting and setting the state and "info_down". -------------------------------------------------------- @@ -591,6 +598,33 @@ getHeapUsage fcode -- ---------------------------------------------------------------------------- -- Combinators for emitting code +emitCgStmt :: CgStmt -> FCode () +emitCgStmt stmt + = do { state <- getState + ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt } + } + +emitLabel :: BlockId -> FCode () +emitLabel id = emitCgStmt (CgLabel id) + +emitComment :: FastString -> FCode () +#if 0 /* def DEBUG */ +emitComment s = emitCgStmt (CgStmt (CmmComment s)) +#else +emitComment _ = return () +#endif + +emitAssign :: CmmReg -> CmmExpr -> FCode () +emitAssign l r = emitCgStmt (CgStmt (CmmAssign l r)) + +emitStore :: CmmExpr -> CmmExpr -> FCode () +emitStore l r = emitCgStmt (CgStmt (CmmStore l r)) + + +newLabelC :: FCode BlockId +newLabelC = do { u <- newUnique + ; return $ mkBlockId u } + emit :: CmmAGraph -> FCode () emit ag = do { state <- getState @@ -601,6 +635,9 @@ emitDecl decl = do { state <- getState ; setState $ state { cgs_tops = cgs_tops state `snocOL` decl } } +emitOutOfLine :: BlockId -> CmmAGraph -> FCode () +emitOutOfLine l stmts = emitCgStmt (CgFork l stmts) + emitProcWithConvention :: Convention -> CmmInfoTable -> CLabel -> [CmmFormal] -> CmmAGraph -> FCode () emitProcWithConvention conv info lbl args blocks @@ -629,6 +666,55 @@ getCmm code ; setState $ state2 { cgs_tops = cgs_tops state1 } ; return (fromOL (cgs_tops state2)) } + +mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> FCode CmmAGraph +mkCmmIfThenElse e tbranch fbranch = do + endif <- newLabelC + tid <- newLabelC + fid <- newLabelC + return $ mkCbranch e tid fid <*> + mkLabel tid <*> tbranch <*> mkBranch endif <*> + mkLabel fid <*> fbranch <*> mkLabel endif + +mkCmmIfThen :: CmmExpr -> CmmAGraph -> FCode CmmAGraph +mkCmmIfThen e tbranch = do + endif <- newLabelC + tid <- newLabelC + return $ mkCbranch e tid endif <*> + mkLabel tid <*> tbranch <*> mkLabel endif + + +mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual] + -> UpdFrameOffset -> (ByteOff,[(CmmExpr,ByteOff)]) -> FCode CmmAGraph +mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do + k <- newLabelC + let area = Young k + (off, copyin) = copyInOflow retConv area results + copyout = mkCallReturnsTo f callConv actuals k off updfr_off extra_stack + return (copyout <*> mkLabel k <*> copyin) + +mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset + -> FCode CmmAGraph +mkCmmCall f results actuals updfr_off + = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off (0,[]) + + +mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] + -> UpdFrameOffset -> Bool + -> FCode CmmAGraph +mkSafeCall t fs as upd i = do + k <- newLabelC + let (_off, copyout) = copyInOflow NativeReturn (Young k) fs + -- see Note [safe foreign call convention] + return + ( mkStore (CmmStackSlot (Young k) (widthInBytes wordWidth)) + (CmmLit (CmmBlock k)) + <*> mkLast (CmmForeignCall { tgt=t, res=fs, args=as, succ=k + , updfr=upd, intrbl=i }) + <*> mkLabel k + <*> copyout + ) + -- ---------------------------------------------------------------------------- -- CgStmts @@ -640,4 +726,3 @@ cgStmtsToBlocks :: CmmAGraph -> FCode CmmGraph cgStmtsToBlocks stmts = do { us <- newUniqSupply ; return (initUs_ us (lgraphOfAGraph stmts)) } - diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index efa234b5a6..bd783a3b30 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -14,7 +14,9 @@ -- for details module StgCmmPrim ( - cgOpApp + cgOpApp, + cgPrimOp -- internal(ish), used by cgCase to get code for a + -- comparison without also turning it into a Bool. ) where #include "HsVersions.h" @@ -67,14 +69,9 @@ cgOpApp :: StgOp -- The op -- Foreign calls cgOpApp (StgFCallOp fcall _) stg_args res_ty - = do { (res_regs, res_hints) <- newUnboxedTupleRegs res_ty - -- Choose result regs r1, r2 - -- Note [Foreign call results] - ; cgForeignCall res_regs res_hints fcall stg_args - -- r1, r2 = foo( x, y ) - ; emitReturn (map (CmmReg . CmmLocal) res_regs) } - -- return (r1, r2) - + = cgForeignCall fcall stg_args res_ty + -- Note [Foreign call results] + -- tagToEnum# is special: we need to pull the constructor -- out of the table, and perform an appropriate return. @@ -229,23 +226,23 @@ emitPrimOp [res] SparkOp [arg] [(tmp2,NoHint)] (CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "newSpark")))) [(CmmReg (CmmGlobal BaseReg), AddrHint), ((CmmReg (CmmLocal tmp)), AddrHint)] - emit (mkAssign (CmmLocal res) (CmmReg (CmmLocal tmp))) + emitAssign (CmmLocal res) (CmmReg (CmmLocal tmp)) emitPrimOp [res] GetCCSOfOp [arg] - = emit (mkAssign (CmmLocal res) val) + = emitAssign (CmmLocal res) val where val | opt_SccProfilingOn = costCentreFrom (cmmUntag arg) | otherwise = CmmLit zeroCLit emitPrimOp [res] GetCurrentCCSOp [_dummy_arg] - = emit (mkAssign (CmmLocal res) curCCS) + = emitAssign (CmmLocal res) curCCS emitPrimOp [res] ReadMutVarOp [mutv] - = emit (mkAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)) + = emitAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord) emitPrimOp [] WriteMutVarOp [mutv,var] = do - emit (mkStore (cmmOffsetW mutv fixedHdrSize) var) + emitStore (cmmOffsetW mutv fixedHdrSize) var emitCCall [{-no results-}] (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) @@ -269,32 +266,32 @@ emitPrimOp res@[] TouchOp args@[_arg] -- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a) emitPrimOp [res] ByteArrayContents_Char [arg] - = emit (mkAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize)) + = emitAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize) -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn) emitPrimOp [res] StableNameToIntOp [arg] - = emit (mkAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord)) + = emitAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord) -- #define eqStableNamezh(r,sn1,sn2) \ -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) emitPrimOp [res] EqStableNameOp [arg1,arg2] - = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [ + = emitAssign (CmmLocal res) (CmmMachOp mo_wordEq [ cmmLoadIndexW arg1 fixedHdrSize bWord, cmmLoadIndexW arg2 fixedHdrSize bWord - ])) + ]) emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2] - = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2])) + = emitAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2]) -- #define addrToHValuezh(r,a) r=(P_)a emitPrimOp [res] AddrToAnyOp [arg] - = emit (mkAssign (CmmLocal res) arg) + = emitAssign (CmmLocal res) arg -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info)) -- Note: argument may be tagged! emitPrimOp [res] DataToTagOp [arg] - = emit (mkAssign (CmmLocal res) (getConstrTag (cmmUntag arg))) + = emitAssign (CmmLocal res) (getConstrTag (cmmUntag arg)) {- Freezing arrays-of-ptrs requires changing an info table, for the benefit of the generational collector. It needs to scavenge mutable @@ -317,7 +314,7 @@ emitPrimOp [res] UnsafeFreezeArrayArrayOp [arg] -- #define unsafeFreezzeByteArrayzh(r,a) r=(a) emitPrimOp [res] UnsafeFreezeByteArrayOp [arg] - = emit (mkAssign (CmmLocal res) arg) + = emitAssign (CmmLocal res) arg -- Copying pointer arrays @@ -497,11 +494,11 @@ emitPrimOp [res] PopCntOp [w] = emitPopCntCall res w wordWidth -- The rest just translate straightforwardly emitPrimOp [res] op [arg] | nopOp op - = emit (mkAssign (CmmLocal res) arg) + = emitAssign (CmmLocal res) arg | Just (mop,rep) <- narrowOp op - = emit (mkAssign (CmmLocal res) $ - CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]]) + = emitAssign (CmmLocal res) $ + CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]] emitPrimOp r@[res] op args | Just prim <- callishOp op @@ -746,15 +743,15 @@ loadArrPtrsSize addr = CmmLoad (cmmOffsetB addr off) bWord mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType -> LocalReg -> CmmExpr -> CmmExpr -> FCode () mkBasicIndexedRead off Nothing read_rep res base idx - = emit (mkAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx)) + = emitAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx) mkBasicIndexedRead off (Just cast) read_rep res base idx - = emit (mkAssign (CmmLocal res) (CmmMachOp cast [ - cmmLoadIndexOffExpr off read_rep base idx])) + = emitAssign (CmmLocal res) (CmmMachOp cast [ + cmmLoadIndexOffExpr off read_rep base idx]) mkBasicIndexedWrite :: ByteOff -> Maybe MachOp -> CmmExpr -> CmmExpr -> CmmExpr -> FCode () mkBasicIndexedWrite off Nothing base idx val - = emit (mkStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val) + = emitStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val mkBasicIndexedWrite off (Just cast) base idx val = mkBasicIndexedWrite off Nothing base idx (CmmMachOp cast [val]) @@ -805,7 +802,7 @@ doCopyMutableByteArrayOp = emitCopyByteArray copy getCode $ emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit 1)), getCode $ emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit 1)) ] - emit $ mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall + emit =<< mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall emitCopyByteArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()) @@ -875,7 +872,7 @@ doCopyMutableArrayOp = emitCopyArray copy getCode $ emitMemmoveCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE)), getCode $ emitMemcpyCall dst_p src_p bytes (CmmLit (mkIntCLit wORD_SIZE)) ] - emit $ mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall + emit =<< mkCmmIfThenElse (cmmEqWord src dst) moveCall cpyCall emitCopyArray :: (CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> CmmExpr -> FCode ()) diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs index 6a53317385..9ff4d0be07 100644 --- a/compiler/codeGen/StgCmmProf.hs +++ b/compiler/codeGen/StgCmmProf.hs @@ -103,7 +103,7 @@ initUpdFrameProf :: CmmExpr -> FCode () -- Initialise the profiling field of an update frame initUpdFrameProf frame_amode = ifProfiling $ -- frame->header.prof.ccs = CCCS - emit (mkStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS) + emitStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) -- is unnecessary because it is not used anyhow. @@ -143,7 +143,7 @@ saveCurrentCostCentre = return Nothing | otherwise = do { local_cc <- newTemp ccType - ; emit (mkAssign (CmmLocal local_cc) curCCS) + ; emitAssign (CmmLocal local_cc) curCCS ; return (Just local_cc) } restoreCurrentCostCentre :: Maybe LocalReg -> FCode () @@ -338,9 +338,9 @@ ldvEnter cl_ptr -- if (era > 0) { -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | -- era | LDV_STATE_USE } - emit (mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) + emit =<< mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) (mkStore ldv_wd new_ldv_wd) - mkNop) + mkNop where -- don't forget to substract node's tag ldv_wd = ldvWord cl_ptr diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs index d0432315ab..698bf32709 100644 --- a/compiler/codeGen/StgCmmTicky.hs +++ b/compiler/codeGen/StgCmmTicky.hs @@ -180,7 +180,7 @@ registerTickyCtr :: CLabel -> FCode () -- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */ -- f_ct.registeredp = 1 } registerTickyCtr ctr_lbl - = emit (mkCmmIfThen test (catAGraphs register_stmts)) + = emit =<< mkCmmIfThen test (catAGraphs register_stmts) where -- krc: code generator doesn't handle Not, so we test for Eq 0 instead test = CmmMachOp (MO_Eq wordWidth) @@ -352,7 +352,7 @@ bumpHistogram _lbl _n bumpHistogramE :: LitString -> CmmExpr -> FCode () bumpHistogramE lbl n = do t <- newTemp cLong - emit (mkAssign (CmmLocal t) n) + emitAssign (CmmLocal t) n emit (mkCmmIfThen (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight]) (mkAssign (CmmLocal t) eight)) emit (addToMem cLong diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index bb4a653c05..7609cfe38d 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -18,12 +18,11 @@ module StgCmmUtils ( emitDataLits, mkDataLits, emitRODataLits, mkRODataLits, emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, emitRtsCallGen, - assignTemp, newTemp, withTemp, + assignTemp, newTemp, newUnboxedTupleRegs, - mkMultiAssign, mkCmmSwitch, mkCmmLitSwitch, - emitSwitch, + emitMultiAssign, emitCmmLitSwitch, emitSwitch, tagToClosure, mkTaggedObjectLoad, @@ -72,6 +71,7 @@ import Module import Literal import Digraph import ListSetOps +import VarSet import Util import Unique import DynFlags @@ -204,14 +204,14 @@ emitRtsCallGen emitRtsCallGen res pkg fun args _vols safe = do { updfr_off <- getUpdFrameOff ; emit caller_save - ; emit $ call updfr_off + ; call updfr_off ; emit caller_load } where call updfr_off = if safe then - mkCmmCall fun_expr res' args' updfr_off + emit =<< mkCmmCall fun_expr res' args' updfr_off else - mkUnsafeCall (ForeignTarget fun_expr + emit $ mkUnsafeCall (ForeignTarget fun_expr (ForeignConvention CCallConv arg_hints res_hints)) res' args' (args', arg_hints) = unzip args (res', res_hints) = unzip res @@ -441,7 +441,7 @@ assignTemp :: CmmExpr -> FCode LocalReg assignTemp (CmmReg (CmmLocal reg)) = return reg assignTemp e = do { uniq <- newUnique ; let reg = LocalReg uniq (cmmExprType e) - ; emit (mkAssign (CmmLocal reg) e) + ; emitAssign (CmmLocal reg) e ; return reg } newTemp :: CmmType -> FCode LocalReg @@ -471,10 +471,10 @@ newUnboxedTupleRegs res_ty ------------------------------------------------------------------------- --- mkMultiAssign +-- emitMultiAssign ------------------------------------------------------------------------- -mkMultiAssign :: [LocalReg] -> [CmmExpr] -> CmmAGraph +emitMultiAssign :: [LocalReg] -> [CmmExpr] -> FCode () -- Emit code to perform the assignments in the -- input simultaneously, using temporary variables when necessary. @@ -489,14 +489,13 @@ type Stmt = (LocalReg, CmmExpr) -- r := e -- s1 assigns to something s2 uses -- that is, if s1 should *follow* s2 in the final order -mkMultiAssign [] [] = mkNop -mkMultiAssign [reg] [rhs] = mkAssign (CmmLocal reg) rhs -mkMultiAssign regs rhss = ASSERT( equalLength regs rhss ) - unscramble ([1..] `zip` (regs `zip` rhss)) +emitMultiAssign [] [] = return () +emitMultiAssign [reg] [rhs] = emitAssign (CmmLocal reg) rhs +emitMultiAssign regs rhss = ASSERT( equalLength regs rhss ) + unscramble ([1..] `zip` (regs `zip` rhss)) -unscramble :: [Vrtx] -> CmmAGraph -unscramble vertices - = catAGraphs (map do_component components) +unscramble :: [Vrtx] -> FCode () +unscramble vertices = mapM_ do_component components where edges :: [ (Vrtx, Key, [Key]) ] edges = [ (vertex, key1, edges_from stmt1) @@ -511,19 +510,19 @@ unscramble vertices -- do_components deal with one strongly-connected component -- Not cyclic, or singleton? Just do it - do_component :: SCC Vrtx -> CmmAGraph - do_component (AcyclicSCC (_,stmt)) = mk_graph stmt + do_component :: SCC Vrtx -> FCode () + do_component (AcyclicSCC (_,stmt)) = mk_graph stmt do_component (CyclicSCC []) = panic "do_component" do_component (CyclicSCC [(_,stmt)]) = mk_graph stmt -- Cyclic? Then go via temporaries. Pick one to -- break the loop and try again with the rest. - do_component (CyclicSCC ((_,first_stmt) : rest)) - = withUnique $ \u -> + do_component (CyclicSCC ((_,first_stmt) : rest)) = do + u <- newUnique let (to_tmp, from_tmp) = split u first_stmt - in mk_graph to_tmp - <*> unscramble rest - <*> mk_graph from_tmp + mk_graph to_tmp + unscramble rest + mk_graph from_tmp split :: Unique -> Stmt -> (Stmt, Stmt) split uniq (reg, rhs) @@ -532,8 +531,8 @@ unscramble vertices rep = cmmExprType rhs tmp = LocalReg uniq rep - mk_graph :: Stmt -> CmmAGraph - mk_graph (reg, rhs) = mkAssign (CmmLocal reg) rhs + mk_graph :: Stmt -> FCode () + mk_graph (reg, rhs) = emitAssign (CmmLocal reg) rhs mustFollow :: Stmt -> Stmt -> Bool (reg, _) `mustFollow` (_, rhs) = CmmLocal reg `regUsedIn` rhs @@ -551,7 +550,7 @@ emitSwitch :: CmmExpr -- Tag to switch on -> FCode () emitSwitch tag_expr branches mb_deflt lo_tag hi_tag = do { dflags <- getDynFlags - ; emit (mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag) } + ; mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag } where via_C dflags | HscC <- hscTarget dflags = True | otherwise = False @@ -563,38 +562,40 @@ mkCmmSwitch :: Bool -- True <=> never generate a conditional tree -> Maybe CmmAGraph -- Default branch (if any) -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour -- outside this range is undefined - -> CmmAGraph + -> FCode () -- First, two rather common cases in which there is no work to do -mkCmmSwitch _ _ [] (Just code) _ _ = code -mkCmmSwitch _ _ [(_,code)] Nothing _ _ = code +mkCmmSwitch _ _ [] (Just code) _ _ = emit code +mkCmmSwitch _ _ [(_,code)] Nothing _ _ = emit code -- Right, off we go -mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag - = withFreshLabel "switch join" $ \ join_lbl -> - label_default join_lbl mb_deflt $ \ mb_deflt -> - label_branches join_lbl branches $ \ branches -> - assignTemp' tag_expr $ \tag_expr' -> +mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag = do + join_lbl <- newLabelC + mb_deflt_lbl <- label_default join_lbl mb_deflt + branches_lbls <- label_branches join_lbl branches + tag_expr' <- assignTemp' tag_expr - mk_switch tag_expr' (sortBy (comparing fst) branches) mb_deflt - lo_tag hi_tag via_C - -- Sort the branches before calling mk_switch - <*> mkLabel join_lbl + emit =<< mk_switch tag_expr' (sortBy (comparing fst) branches) mb_deflt_lbl + lo_tag hi_tag via_C + + -- Sort the branches before calling mk_switch + + emitLabel join_lbl mk_switch :: CmmExpr -> [(ConTagZ, BlockId)] -> Maybe BlockId -> ConTagZ -> ConTagZ -> Bool - -> CmmAGraph + -> FCode CmmAGraph -- SINGLETON TAG RANGE: no case analysis to do mk_switch _tag_expr [(tag, lbl)] _ lo_tag hi_tag _via_C | lo_tag == hi_tag = ASSERT( tag == lo_tag ) - mkBranch lbl + return (mkBranch lbl) -- SINGLETON BRANCH, NO DEFAULT: no case analysis to do mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _ - = mkBranch lbl + = return (mkBranch lbl) -- The simplifier might have eliminated a case -- so we may have e.g. case xs of -- [] -> e @@ -603,7 +604,7 @@ mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _ -- SINGLETON BRANCH: one equality check to do mk_switch tag_expr [(tag,lbl)] (Just deflt) _ _ _ - = mkCbranch cond deflt lbl + = return (mkCbranch cond deflt lbl) where cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag)) -- We have lo_tag < hi_tag, but there's only one branch, @@ -636,30 +637,34 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C arms :: [Maybe BlockId] arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]] in - mkSwitch (cmmOffset tag_expr (- real_lo_tag)) arms + return (mkSwitch (cmmOffset tag_expr (- real_lo_tag)) arms) -- if we can knock off a bunch of default cases with one if, then do so | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches - = mkCmmIfThenElse + = do stmts <- mk_switch tag_expr branches mb_deflt + lowest_branch hi_tag via_C + mkCmmIfThenElse (cmmULtWord tag_expr (CmmLit (mkIntCLit lowest_branch))) (mkBranch deflt) - (mk_switch tag_expr branches mb_deflt - lowest_branch hi_tag via_C) + stmts | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches - = mkCmmIfThenElse + = do stmts <- mk_switch tag_expr branches mb_deflt + lo_tag highest_branch via_C + mkCmmIfThenElse (cmmUGtWord tag_expr (CmmLit (mkIntCLit highest_branch))) (mkBranch deflt) - (mk_switch tag_expr branches mb_deflt - lo_tag highest_branch via_C) + stmts | otherwise -- Use an if-tree - = mkCmmIfThenElse + = do lo_stmts <- mk_switch tag_expr lo_branches mb_deflt + lo_tag (mid_tag-1) via_C + hi_stmts <- mk_switch tag_expr hi_branches mb_deflt + mid_tag hi_tag via_C + mkCmmIfThenElse (cmmUGeWord tag_expr (CmmLit (mkIntCLit mid_tag))) - (mk_switch tag_expr hi_branches mb_deflt - mid_tag hi_tag via_C) - (mk_switch tag_expr lo_branches mb_deflt - lo_tag (mid_tag-1) via_C) + hi_stmts + lo_stmts -- we test (e >= mid_tag) rather than (e < mid_tag), because -- the former works better when e is a comparison, and there -- are two tags 0 & 1 (mid_tag == 1). In this case, the code @@ -714,30 +719,31 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C is_lo (t,_) = t < mid_tag -------------- -mkCmmLitSwitch :: CmmExpr -- Tag to switch on +emitCmmLitSwitch :: CmmExpr -- Tag to switch on -> [(Literal, CmmAGraph)] -- Tagged branches -> CmmAGraph -- Default branch (always) - -> CmmAGraph -- Emit the code + -> FCode () -- Emit the code -- Used for general literals, whose size might not be a word, -- where there is always a default case, and where we don't know -- the range of values for certain. For simplicity we always generate a tree. -- -- ToDo: for integers we could do better here, perhaps by generalising -- mk_switch and using that. --SDM 15/09/2004 -mkCmmLitSwitch _scrut [] deflt = deflt -mkCmmLitSwitch scrut branches deflt - = assignTemp' scrut $ \ scrut' -> - withFreshLabel "switch join" $ \ join_lbl -> - label_code join_lbl deflt $ \ deflt -> - label_branches join_lbl branches $ \ branches -> - mk_lit_switch scrut' deflt (sortBy (comparing fst) branches) - <*> mkLabel join_lbl +emitCmmLitSwitch _scrut [] deflt = emit deflt +emitCmmLitSwitch scrut branches deflt = do + scrut' <- assignTemp' scrut + join_lbl <- newLabelC + deflt_lbl <- label_code join_lbl deflt + branches_lbls <- label_branches join_lbl branches + emit =<< mk_lit_switch scrut' deflt_lbl + (sortBy (comparing fst) branches_lbls) + emitLabel join_lbl mk_lit_switch :: CmmExpr -> BlockId -> [(Literal,BlockId)] - -> CmmAGraph + -> FCode CmmAGraph mk_lit_switch scrut deflt [(lit,blk)] - = mkCbranch (CmmMachOp ne [scrut, CmmLit cmm_lit]) deflt blk + = return (mkCbranch (CmmMachOp ne [scrut, CmmLit cmm_lit]) deflt blk) where cmm_lit = mkSimpleLit lit cmm_ty = cmmLitType cmm_lit @@ -745,9 +751,9 @@ mk_lit_switch scrut deflt [(lit,blk)] ne = if isFloatType cmm_ty then MO_F_Ne rep else MO_Ne rep mk_lit_switch scrut deflt_blk_id branches - = mkCmmIfThenElse cond - (mk_lit_switch scrut deflt_blk_id lo_branches) - (mk_lit_switch scrut deflt_blk_id hi_branches) + = do lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches + hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches + mkCmmIfThenElse cond lo_blk hi_blk where n_branches = length branches (mid_lit,_) = branches !! (n_branches `div` 2) @@ -761,49 +767,42 @@ mk_lit_switch scrut deflt_blk_id branches -------------- -label_default :: BlockId -> Maybe CmmAGraph - -> (Maybe BlockId -> CmmAGraph) - -> CmmAGraph -label_default _ Nothing thing_inside - = thing_inside Nothing -label_default join_lbl (Just code) thing_inside - = label_code join_lbl code $ \ lbl -> - thing_inside (Just lbl) +label_default :: BlockId -> Maybe CmmAGraph -> FCode (Maybe BlockId) +label_default _ Nothing + = return Nothing +label_default join_lbl (Just code) + = do lbl <- label_code join_lbl code + return (Just lbl) -------------- -label_branches :: BlockId -> [(a,CmmAGraph)] - -> ([(a,BlockId)] -> CmmAGraph) - -> CmmAGraph -label_branches _join_lbl [] thing_inside - = thing_inside [] -label_branches join_lbl ((tag,code):branches) thing_inside - = label_code join_lbl code $ \ lbl -> - label_branches join_lbl branches $ \ branches' -> - thing_inside ((tag,lbl):branches') +label_branches :: BlockId -> [(a,CmmAGraph)] -> FCode [(a,BlockId)] +label_branches _join_lbl [] + = return [] +label_branches join_lbl ((tag,code):branches) + = do lbl <- label_code join_lbl code + branches' <- label_branches join_lbl branches + return ((tag,lbl):branches') -------------- -label_code :: BlockId -> CmmAGraph -> (BlockId -> CmmAGraph) -> CmmAGraph --- (label_code J code fun) +label_code :: BlockId -> CmmAGraph -> FCode BlockId +-- label_code J code -- generates --- [L: code; goto J] fun L -label_code join_lbl code thing_inside - = withFreshLabel "switch" $ \lbl -> - outOfLine (mkLabel lbl <*> code <*> mkBranch join_lbl) - <*> thing_inside lbl - +-- [L: code; goto J] +-- and returns L +label_code join_lbl code = do + lbl <- newLabelC + emitOutOfLine lbl (code <*> mkBranch join_lbl) + return lbl -------------- -assignTemp' :: CmmExpr -> (CmmExpr -> CmmAGraph) -> CmmAGraph -assignTemp' e thing_inside - | isTrivialCmmExpr e = thing_inside e - | otherwise = withTemp (cmmExprType e) $ \ lreg -> - let reg = CmmLocal lreg in - mkAssign reg e <*> thing_inside (CmmReg reg) - -withTemp :: CmmType -> (LocalReg -> CmmAGraph) -> CmmAGraph -withTemp rep thing_inside - = withUnique $ \uniq -> thing_inside (LocalReg uniq rep) - +assignTemp' :: CmmExpr -> FCode CmmExpr +assignTemp' e + | isTrivialCmmExpr e = return e + | otherwise = do + lreg <- newTemp (cmmExprType e) + let reg = CmmLocal lreg + emitAssign reg e + return (CmmReg reg) ------------------------------------------------------------------------- -- @@ -811,36 +810,13 @@ withTemp rep thing_inside -- ------------------------------------------------------------------------- --- There is just one SRT for each top level binding; all the nested --- bindings use sub-sections of this SRT. The label is passed down to --- the nested bindings via the monad. - -getSRTInfo :: SRT -> FCode C_SRT -getSRTInfo (SRTEntries {}) = panic "getSRTInfo" - -getSRTInfo (SRT off len bmp) - | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape] - = do { id <- newUnique - -- ; top_srt <- getSRTLabel - ; let srt_desc_lbl = mkLargeSRTLabel id - -- JD: We're not constructing and emitting SRTs in the back end, - -- which renders this code wrong (it now names a now-non-existent label). - -- ; emitRODataLits srt_desc_lbl - -- ( cmmLabelOffW top_srt off - -- : mkWordCLit (fromIntegral len) - -- : map mkWordCLit bmp) - ; return (C_SRT srt_desc_lbl 0 srt_escape) } - - | otherwise - = do { top_srt <- getSRTLabel - ; return (C_SRT top_srt off (fromIntegral (head bmp))) } - -- The fromIntegral converts to StgHalfWord - -getSRTInfo NoSRT - = -- TODO: Should we panic in this case? - -- Someone obviously thinks there should be an SRT - return NoC_SRT - +-- | Returns 'True' if there is a non-empty SRT, or 'False' otherwise +-- NB. the SRT attached to an StgBind is still used in the new codegen +-- to decide whether we need a static link field on a static closure +-- or not. +getSRTInfo :: SRT -> FCode Bool +getSRTInfo (SRTEntries vs) = return (not (isEmptyVarSet vs)) +getSRTInfo _ = return False srt_escape :: StgHalfWord srt_escape = -1 |