diff options
Diffstat (limited to 'compiler/codeGen')
37 files changed, 8254 insertions, 496 deletions
diff --git a/compiler/codeGen/CgBindery.lhs b/compiler/codeGen/CgBindery.lhs index 66776930c5..1928308a31 100644 --- a/compiler/codeGen/CgBindery.lhs +++ b/compiler/codeGen/CgBindery.lhs @@ -286,7 +286,7 @@ getCgIdInfo id name = idName id in if isExternalName name then do - let ext_lbl = CmmLit (CmmLabel (mkClosureLabel name)) + let ext_lbl = CmmLit (CmmLabel (mkClosureLabel name $ idCafInfo id)) return (stableIdInfo id ext_lbl (mkLFImported id)) else if isVoidArg (idCgRep id) then @@ -447,10 +447,7 @@ bindNewToTemp id return temp_reg where uniq = getUnique id - temp_reg = LocalReg uniq (argMachRep (idCgRep id)) kind - kind = if isFollowableArg (idCgRep id) - then GCKindPtr - else GCKindNonPtr + temp_reg = LocalReg uniq (argMachRep (idCgRep id)) lf_info = mkLFArgument id -- Always used of things we -- know nothing about diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs index 752769f4e3..87c69b6331 100644 --- a/compiler/codeGen/CgCallConv.hs +++ b/compiler/codeGen/CgCallConv.hs @@ -39,7 +39,6 @@ import CgUtils import CgMonad import SMRep -import MachOp import Cmm import CLabel @@ -149,7 +148,7 @@ mkLiveness :: Name -> Int -> Bitmap -> FCode Liveness mkLiveness name size bits | size > mAX_SMALL_BITMAP_SIZE -- Bitmap does not fit in one word = do { let lbl = mkBitmapLabel (getUnique name) - ; emitRODataLits lbl ( mkWordCLit (fromIntegral size) + ; emitRODataLits "mkLiveness" lbl ( mkWordCLit (fromIntegral size) : map mkWordCLit bits) ; return (BigLiveness lbl) } @@ -196,7 +195,7 @@ mkRegLiveness regs ptrs nptrs all_non_ptrs = 0xff reg_bits [] = 0 - reg_bits ((id, VanillaReg i) : regs) | isFollowableArg (idCgRep id) + reg_bits ((id, VanillaReg i _) : regs) | isFollowableArg (idCgRep id) = (1 `shiftL` (i - 1)) .|. reg_bits regs reg_bits (_ : regs) = reg_bits regs @@ -264,8 +263,8 @@ slowCallPattern _ = panic "CgStackery.slowCallPattern" ------------------------------------------------------------------------- dataReturnConvPrim :: CgRep -> CmmReg -dataReturnConvPrim PtrArg = CmmGlobal (VanillaReg 1) -dataReturnConvPrim NonPtrArg = CmmGlobal (VanillaReg 1) +dataReturnConvPrim PtrArg = CmmGlobal (VanillaReg 1 VGcPtr) +dataReturnConvPrim NonPtrArg = CmmGlobal (VanillaReg 1 VNonGcPtr) dataReturnConvPrim LongArg = CmmGlobal (LongReg 1) dataReturnConvPrim FloatArg = CmmGlobal (FloatReg 1) dataReturnConvPrim DoubleArg = CmmGlobal (DoubleReg 1) @@ -288,7 +287,7 @@ getSequelAmode = do { EndOfBlockInfo virt_sp sequel <- getEndOfBlockInfo ; case sequel of OnStack -> do { sp_rel <- getSpRelOffset virt_sp - ; returnFC (CmmLoad sp_rel wordRep) } + ; returnFC (CmmLoad sp_rel bWord) } UpdateCode -> returnFC (CmmLit (CmmLabel mkUpdInfoLabel)) CaseAlts lbl _ _ -> returnFC (CmmLit (CmmLabel lbl)) @@ -361,7 +360,7 @@ assign_regs args supply where go [] acc supply = (acc, []) -- Return the results reversed (doesn't matter) go ((VoidArg,_) : args) acc supply -- Skip void arguments; they aren't passed, and - = go args acc supply -- there's nothign to bind them to + = go args acc supply -- there's nothing to bind them to go ((rep,arg) : args) acc supply = case assign_reg rep supply of Just (reg, supply') -> go args ((arg,reg):acc) supply' @@ -370,9 +369,9 @@ assign_regs args supply assign_reg :: CgRep -> AvailRegs -> Maybe (GlobalReg, AvailRegs) assign_reg FloatArg (vs, f:fs, ds, ls) = Just (FloatReg f, (vs, fs, ds, ls)) assign_reg DoubleArg (vs, fs, d:ds, ls) = Just (DoubleReg d, (vs, fs, ds, ls)) -assign_reg LongArg (vs, fs, ds, l:ls) = Just (LongReg l, (vs, fs, ds, ls)) -assign_reg PtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls)) -assign_reg NonPtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls)) +assign_reg LongArg (vs, fs, ds, l:ls) = pprTrace "longArg" (ppr l) $ Just (LongReg l, (vs, fs, ds, ls)) +assign_reg PtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v VGcPtr, (vs, fs, ds, ls)) +assign_reg NonPtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v VNonGcPtr, (vs, fs, ds, ls)) -- PtrArg and NonPtrArg both go in a vanilla register assign_reg other not_enough_regs = Nothing @@ -430,11 +429,11 @@ mkRegTbl_allRegs regs_in_use mkRegTbl' regs_in_use vanillas floats doubles longs = (ok_vanilla, ok_float, ok_double, ok_long) where - ok_vanilla = mapCatMaybes (select VanillaReg) vanillas + ok_vanilla = mapCatMaybes (select (\i -> VanillaReg i VNonGcPtr)) vanillas + -- ptrhood isn't looked at, hence we can use any old rep. ok_float = mapCatMaybes (select FloatReg) floats ok_double = mapCatMaybes (select DoubleReg) doubles ok_long = mapCatMaybes (select LongReg) longs - -- rep isn't looked at, hence we can use any old rep. select :: (Int -> GlobalReg) -> Int{-cand-} -> Maybe Int -- one we've unboxed the Int, we make a GlobalReg diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs index 49c782e12a..859b2208fe 100644 --- a/compiler/codeGen/CgCase.lhs +++ b/compiler/codeGen/CgCase.lhs @@ -36,7 +36,6 @@ import ClosureInfo import SMRep import CmmUtils import Cmm -import MachOp import StgSyn import StaticFlags @@ -164,8 +163,8 @@ cgCase (StgOpApp op@(StgFCallOp fcall _) args _) do -- *must* be an unboxed tuple alt. -- exactly like the cgInlinePrimOp case for unboxed tuple alts.. { res_tmps <- mapFCs bindNewToTemp non_void_res_ids - ; let res_hints = map (typeHint.idType) non_void_res_ids - ; cgForeignCall (zipWith CmmKinded res_tmps res_hints) fcall args live_in_alts + ; let res_hints = map (typeForeignHint.idType) non_void_res_ids + ; cgForeignCall (zipWith CmmHinted res_tmps res_hints) fcall args live_in_alts ; cgExpr rhs } where (_, res_ids, _, rhs) = head alts @@ -340,7 +339,7 @@ cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts (_,e) <- getArgAmode arg return e do_enum_primop primop - = do tmp <- newNonPtrTemp wordRep + = do tmp <- newTemp bWord cgPrimOp [tmp] primop args live_in_alts returnFC (CmmReg (CmmLocal tmp)) @@ -612,6 +611,6 @@ restoreCurrentCostCentre Nothing _freeit = nopC restoreCurrentCostCentre (Just slot) freeit = do { sp_rel <- getSpRelOffset slot ; whenC freeit (freeStackSlots [slot]) - ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel wordRep)) } + ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel bWord)) } \end{code} diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs index 80949e7513..b7f9f3b7dc 100644 --- a/compiler/codeGen/CgClosure.lhs +++ b/compiler/codeGen/CgClosure.lhs @@ -38,7 +38,6 @@ import CgCallConv import CgUtils import ClosureInfo import SMRep -import MachOp import Cmm import CmmUtils import CLabel @@ -85,7 +84,7 @@ cgTopRhsClosure id ccs binder_info upd_flag args body = do ; mod_name <- getModuleName ; let descr = closureDescription mod_name name closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr - closure_label = mkLocalClosureLabel name + closure_label = mkLocalClosureLabel name $ idCafInfo id cg_id_info = stableIdInfo id (mkLblExpr closure_label) lf_info closure_rep = mkStaticClosureFields closure_info ccs True [] @@ -259,6 +258,7 @@ closureCodeBody binder_info cl_info cc [{- No args i.e. thunk -}] body = do -- in update frame CAF/DICT functions will be -- subsumed by this enclosing cc { enterCostCentre cl_info cc body + ; stmtsC [CmmComment $ mkFastString $ showSDoc $ ppr body] ; cgExpr body } } @@ -282,7 +282,7 @@ closureCodeBody binder_info cl_info cc args body (sp_top, stk_args) = mkVirtStkOffsets vSp other_args -- Allocate the global ticky counter - ; let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info) + ; let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info) (clHasCafRefs cl_info) ; emitTickyCounter cl_info args sp_top -- ...and establish the ticky-counter @@ -355,7 +355,8 @@ mkSlowEntryCode cl_info reg_args | otherwise = return noStmts where name = closureName cl_info - slow_lbl = mkSlowEntryLabel name + has_caf_refs = clHasCafRefs cl_info + slow_lbl = mkSlowEntryLabel name has_caf_refs load_stmts = mkStmts load_assts `plusStmts` mkStmts [stk_adj_pop, jump_to_entry] save_stmts = oneStmt stk_adj_push `plusStmts` mkStmts save_assts @@ -372,13 +373,13 @@ mkSlowEntryCode cl_info reg_args (argMachRep rep)) save_assts = zipWithEqual "mk_save" mk_save reps_w_regs stk_offsets - mk_save (rep,reg) offset = ASSERT( argMachRep rep == globalRegRep reg ) + mk_save (rep,reg) offset = ASSERT( argMachRep rep `cmmEqType` globalRegType reg ) CmmStore (cmmRegOffW spReg offset) (CmmReg (CmmGlobal reg)) stk_adj_pop = CmmAssign spReg (cmmRegOffW spReg final_stk_offset) stk_adj_push = CmmAssign spReg (cmmRegOffW spReg (- final_stk_offset)) - jump_to_entry = CmmJump (mkLblExpr (enterLocalIdLabel name)) [] + jump_to_entry = CmmJump (mkLblExpr (enterLocalIdLabel name has_caf_refs)) [] \end{code} @@ -565,7 +566,7 @@ link_caf cl_info is_upd = do -- so that the garbage collector can find them -- This must be done *before* the info table pointer is overwritten, -- because the old info table ptr is needed for reversion - ; emitRtsCallWithVols (sLit "newCAF") [CmmKinded (CmmReg nodeReg) PtrHint] [node] False + ; emitRtsCallWithVols (sLit "newCAF") [CmmHinted (CmmReg nodeReg) AddrHint] [node] False -- node is live, so save it. -- Overwrite the closure with a (static) indirection diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs index ff012ef4cf..b22e56f70c 100644 --- a/compiler/codeGen/CgCon.lhs +++ b/compiler/codeGen/CgCon.lhs @@ -47,6 +47,7 @@ import Constants import TyCon import DataCon import Id +import IdInfo import Type import PrelInfo import Outputable @@ -82,7 +83,7 @@ cgTopRhsCon id con args ; let name = idName id lf_info = mkConLFInfo con - closure_label = mkClosureLabel name + closure_label = mkClosureLabel name $ idCafInfo id caffy = any stgArgHasCafRefs args (closure_info, amodes_w_offsets) = layOutStaticConstr con amodes closure_rep = mkStaticClosureFields @@ -142,7 +143,8 @@ at all. \begin{code} buildDynCon binder cc con [] = returnFC (taggedStableIdInfo binder - (mkLblExpr (mkClosureLabel (dataConName con))) + (mkLblExpr (mkClosureLabel (dataConName con) + (idCafInfo binder))) (mkConLFInfo con) con) \end{code} @@ -174,7 +176,7 @@ buildDynCon binder cc con [arg_amode] , (_, CmmLit (CmmInt val _)) <- arg_amode , let val_int = (fromIntegral val) :: Int , val_int <= mAX_INTLIKE && val_int >= mIN_INTLIKE - = do { let intlike_lbl = mkRtsDataLabel (sLit "stg_INTLIKE_closure") + = do { let intlike_lbl = mkRtsGcPtrLabel (sLit "stg_INTLIKE_closure") offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1) -- INTLIKE closures consist of a header and one word payload intlike_amode = CmmLit (cmmLabelOffW intlike_lbl offsetW) @@ -185,7 +187,7 @@ buildDynCon binder cc con [arg_amode] , (_, CmmLit (CmmInt val _)) <- arg_amode , let val_int = (fromIntegral val) :: Int , val_int <= mAX_CHARLIKE && val_int >= mIN_CHARLIKE - = do { let charlike_lbl = mkRtsDataLabel (sLit "stg_CHARLIKE_closure") + = do { let charlike_lbl = mkRtsGcPtrLabel (sLit "stg_CHARLIKE_closure") offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1) -- CHARLIKE closures consist of a header and one word payload charlike_amode = CmmLit (cmmLabelOffW charlike_lbl offsetW) @@ -401,9 +403,8 @@ cgTyCon tycon -- code appears to put it before --- NR 16 Aug 2007 ; extra <- if isEnumerationTyCon tycon then do - tbl <- getCmm (emitRODataLits (mkLocalClosureTableLabel - (tyConName tycon)) - [ CmmLabelOff (mkLocalClosureLabel (dataConName con)) (tagForCon con) + tbl <- getCmm (emitRODataLits "cgTyCon" (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs) + [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs) (tagForCon con) | con <- tyConDataCons tycon]) return [tbl] else diff --git a/compiler/codeGen/CgExpr.lhs b/compiler/codeGen/CgExpr.lhs index f22071e2c5..3b75267385 100644 --- a/compiler/codeGen/CgExpr.lhs +++ b/compiler/codeGen/CgExpr.lhs @@ -37,7 +37,7 @@ import CgHpc import CgUtils import ClosureInfo import Cmm -import MachOp +import CmmUtils import VarSet import Literal import PrimOp @@ -48,6 +48,7 @@ import Maybes import ListSetOps import BasicTypes import Util +import FastString import Outputable \end{code} @@ -128,18 +129,15 @@ cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, nonVoidArg rep] - arg_tmps <- sequence [ - if isFollowableArg (typeCgRep (stgArgType stg_arg)) - then assignPtrTemp arg - else assignNonPtrTemp arg - | (arg, stg_arg) <- arg_exprs] - let arg_hints = zipWith CmmKinded arg_tmps (map (typeHint.stgArgType) stg_args) + arg_tmps <- sequence [ assignTemp arg + | (arg, stg_arg) <- arg_exprs] + let arg_hints = zipWith CmmHinted arg_tmps (map (typeForeignHint.stgArgType) stg_args) {- Now, allocate some result regs. -} (res_reps,res_regs,res_hints) <- newUnboxedTupleRegs res_ty ccallReturnUnboxedTuple (zip res_reps (map (CmmReg . CmmLocal) res_regs)) $ - emitForeignCall (zipWith CmmKinded res_regs res_hints) fcall + emitForeignCall (zipWith CmmHinted res_regs res_hints) fcall arg_hints emptyVarSet{-no live vars-} -- tagToEnum# is special: we need to pull the constructor out of the table, @@ -148,10 +146,7 @@ cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do cgExpr (StgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty) = ASSERT(isEnumerationTyCon tycon) do { (rep,amode) <- getArgAmode arg - ; amode' <- if isFollowableArg rep - then assignPtrTemp amode - else assignNonPtrTemp amode - -- We're going to use it twice, + ; amode' <- assignTemp amode -- We're going to use it twice, -- so save in a temp if non-trivial ; stmtC (CmmAssign nodeReg (tagToClosure tycon amode')) ; performReturn emitReturnInstr } @@ -173,9 +168,7 @@ cgExpr x@(StgOpApp op@(StgPrimOp primop) args res_ty) performReturn emitReturnInstr | ReturnsPrim rep <- result_info - = do res <- if isFollowableArg (typeCgRep res_ty) - then newPtrTemp (argMachRep (typeCgRep res_ty)) - else newNonPtrTemp (argMachRep (typeCgRep res_ty)) + = do res <- newTemp (typeCmmType res_ty) cgPrimOp [res] primop args emptyVarSet performPrimReturn (primRepToCgRep rep) (CmmReg (CmmLocal res)) @@ -186,9 +179,7 @@ cgExpr x@(StgOpApp op@(StgPrimOp primop) args res_ty) | ReturnsAlg tycon <- result_info, isEnumerationTyCon tycon -- c.f. cgExpr (...TagToEnumOp...) - = do tag_reg <- if isFollowableArg (typeCgRep res_ty) - then newPtrTemp wordRep - else newNonPtrTemp wordRep + = do tag_reg <- newTemp bWord -- The tag is a word cgPrimOp [tag_reg] primop args emptyVarSet stmtC (CmmAssign nodeReg (tagToClosure tycon @@ -455,16 +446,14 @@ cgLetNoEscapeRhs full_live_in_rhss rhs_eob_info maybe_cc_slot rec binder Little helper for primitives that return unboxed tuples. \begin{code} -newUnboxedTupleRegs :: Type -> FCode ([CgRep], [LocalReg], [MachHint]) +newUnboxedTupleRegs :: Type -> FCode ([CgRep], [LocalReg], [ForeignHint]) newUnboxedTupleRegs res_ty = let ty_args = tyConAppArgs (repType res_ty) - (reps,hints) = unzip [ (rep, typeHint ty) | ty <- ty_args, + (reps,hints) = unzip [ (rep, typeForeignHint ty) | ty <- ty_args, let rep = typeCgRep ty, nonVoidArg rep ] - make_new_temp rep = if isFollowableArg rep - then newPtrTemp (argMachRep rep) - else newNonPtrTemp (argMachRep rep) + make_new_temp rep = newTemp (argMachRep rep) in do regs <- mapM make_new_temp reps return (reps,regs,hints) diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs index b3d779e182..6e338061b4 100644 --- a/compiler/codeGen/CgForeignCall.hs +++ b/compiler/codeGen/CgForeignCall.hs @@ -34,7 +34,6 @@ import TysPrim import CLabel import Cmm import CmmUtils -import MachOp import SMRep import ForeignCall import ClosureInfo @@ -49,7 +48,7 @@ import Control.Monad -- Code generation for Foreign Calls cgForeignCall - :: CmmFormals -- where to put the results + :: HintedCmmFormals -- where to put the results -> ForeignCall -- the op -> [StgArg] -- arguments -> StgLiveVars -- live vars, in case we need to save them @@ -63,16 +62,16 @@ cgForeignCall results fcall stg_args live | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, nonVoidArg rep] - arg_hints = zipWith CmmKinded - arg_exprs (map (typeHint.stgArgType) stg_args) + arg_hints = zipWith CmmHinted + arg_exprs (map (typeForeignHint.stgArgType) stg_args) -- in emitForeignCall results fcall arg_hints live emitForeignCall - :: CmmFormals -- where to put the results + :: HintedCmmFormals -- where to put the results -> ForeignCall -- the op - -> [CmmKinded CmmExpr] -- arguments + -> [CmmHinted CmmExpr] -- arguments -> StgLiveVars -- live vars, in case we need to save them -> Code @@ -86,18 +85,18 @@ emitForeignCall results (CCall (CCallSpec target cconv safety)) args live = case target of StaticTarget lbl -> (args, CmmLit (CmmLabel (mkForeignLabel lbl call_size False))) - DynamicTarget -> case args of (CmmKinded fn _):rest -> (rest, fn) + DynamicTarget -> case args of (CmmHinted fn _):rest -> (rest, fn) -- in the stdcall calling convention, the symbol needs @size appended -- to it, where size is the total number of bytes of arguments. We -- attach this info to the CLabel here, and the CLabel pretty printer -- will generate the suffix when the label is printed. call_size - | StdCallConv <- cconv = Just (sum (map (arg_size.cmmExprRep.kindlessCmm) args)) + | StdCallConv <- cconv = Just (sum (map (arg_size.cmmExprType.hintlessCmm) args)) | otherwise = Nothing -- ToDo: this might not be correct for 64-bit API - arg_size rep = max (machRepByteWidth rep) wORD_SIZE + arg_size rep = max (widthInBytes (typeWidth rep)) wORD_SIZE emitForeignCall _ (DNCall _) _ _ = panic "emitForeignCall: DNCall" @@ -106,9 +105,9 @@ emitForeignCall _ (DNCall _) _ _ -- alternative entry point, used by CmmParse emitForeignCall' :: Safety - -> CmmFormals -- where to put the results + -> HintedCmmFormals -- where to put the results -> CmmCallTarget -- the op - -> [CmmKinded CmmExpr] -- arguments + -> [CmmHinted CmmExpr] -- arguments -> Maybe [GlobalReg] -- live vars, in case we need to save them -> C_SRT -- the SRT of the calls continuation -> CmmReturnInfo @@ -124,8 +123,8 @@ emitForeignCall' safety results target args vols srt ret | otherwise = do -- Both 'id' and 'new_base' are GCKindNonPtr because they're -- RTS only objects and are not subject to garbage collection - id <- newNonPtrTemp wordRep - new_base <- newNonPtrTemp (cmmRegRep (CmmGlobal BaseReg)) + id <- newTemp bWord + new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) temp_args <- load_args_into_temps args temp_target <- load_target_into_temp target let (caller_save, caller_load) = callerSaveVolatileRegs vols @@ -134,16 +133,16 @@ emitForeignCall' safety results target args vols srt ret -- The CmmUnsafe arguments are only correct because this part -- of the code hasn't been moved into the CPS pass yet. -- Once that happens, this function will just emit a (CmmSafe srt) call, - -- and the CPS will will be the one to convert that + -- and the CPS will be the one to convert that -- to this sequence of three CmmUnsafe calls. stmtC (CmmCall (CmmCallee suspendThread CCallConv) - [ CmmKinded id PtrHint ] - [ CmmKinded (CmmReg (CmmGlobal BaseReg)) PtrHint ] + [ CmmHinted id AddrHint ] + [ CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint ] CmmUnsafe ret) stmtC (CmmCall temp_target results temp_args CmmUnsafe ret) stmtC (CmmCall (CmmCallee resumeThread CCallConv) - [ CmmKinded new_base PtrHint ] - [ CmmKinded (CmmReg (CmmLocal id)) PtrHint ] + [ CmmHinted new_base AddrHint ] + [ CmmHinted (CmmReg (CmmLocal id)) AddrHint ] CmmUnsafe ret) -- Assign the result to BaseReg: we -- might now have a different Capability! @@ -163,9 +162,9 @@ resumeThread = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "resumeThread"))) -- This is a HACK; really it should be done in the back end, but -- it's easier to generate the temporaries here. load_args_into_temps = mapM arg_assign_temp - where arg_assign_temp (CmmKinded e hint) = do + where arg_assign_temp (CmmHinted e hint) = do tmp <- maybe_assign_temp e - return (CmmKinded tmp hint) + return (CmmHinted tmp hint) load_target_into_temp (CmmCallee expr conv) = do tmp <- maybe_assign_temp expr @@ -179,7 +178,7 @@ maybe_assign_temp e -- don't use assignTemp, it uses its own notion of "trivial" -- expressions, which are wrong here. -- this is a NonPtr because it only duplicates an existing - reg <- newNonPtrTemp (cmmExprRep e) --TODO FIXME NOW + reg <- newTemp (cmmExprType e) --TODO FIXME NOW stmtC (CmmAssign (CmmLocal reg) e) return (CmmReg (CmmLocal reg)) @@ -201,13 +200,13 @@ emitSaveThreadState = do emitCloseNursery = stmtC $ CmmStore nursery_bdescr_free (cmmOffsetW stgHp 1) emitLoadThreadState = do - tso <- newNonPtrTemp wordRep -- TODO FIXME NOW + tso <- newTemp bWord -- TODO FIXME NOW stmtsC [ -- tso = CurrentTSO; CmmAssign (CmmLocal tso) stgCurrentTSO, -- Sp = tso->sp; CmmAssign sp (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_SP) - wordRep), + bWord), -- SpLim = tso->stack + RESERVED_STACK_WORDS; CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg (CmmLocal tso)) tso_STACK) rESERVED_STACK_WORDS) @@ -216,21 +215,21 @@ emitLoadThreadState = do -- and load the current cost centre stack from the TSO when profiling: when opt_SccProfilingOn $ stmtC (CmmStore curCCSAddr - (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) wordRep)) + (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord)) emitOpenNursery = stmtsC [ -- Hp = CurrentNursery->free - 1; - CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free wordRep) (-1)), + CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free gcWord) (-1)), -- HpLim = CurrentNursery->start + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; CmmAssign hpLim (cmmOffsetExpr - (CmmLoad nursery_bdescr_start wordRep) + (CmmLoad nursery_bdescr_start bWord) (cmmOffset (CmmMachOp mo_wordMul [ - CmmMachOp (MO_S_Conv I32 wordRep) - [CmmLoad nursery_bdescr_blocks I32], + CmmMachOp (MO_SS_Conv W32 wordWidth) + [CmmLoad nursery_bdescr_blocks b32], CmmLit (mkIntCLit bLOCK_SIZE) ]) (-1) diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs index 66d41d3d96..252989105c 100644 --- a/compiler/codeGen/CgHeapery.lhs +++ b/compiler/codeGen/CgHeapery.lhs @@ -42,9 +42,9 @@ import ClosureInfo import SMRep import Cmm -import MachOp import CmmUtils import Id +import IdInfo import DataCon import TyCon import CostCentre @@ -191,7 +191,7 @@ mkStaticClosureFields cl_info ccs caf_refs payload = mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field where - info_lbl = infoTableLabelFromCI cl_info + info_lbl = infoTableLabelFromCI cl_info $ clHasCafRefs cl_info -- CAFs must have consistent layout, regardless of whether they -- are actually updatable or not. The layout of a CAF is: @@ -226,7 +226,6 @@ mkStaticClosureFields cl_info ccs caf_refs payload | caf_refs = mkIntCLit 0 | otherwise = mkIntCLit 1 - mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field @@ -245,14 +244,14 @@ mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_fi padLitToWord :: CmmLit -> [CmmLit] padLitToWord lit = lit : padding pad_length - where rep = cmmLitRep lit - pad_length = wORD_SIZE - machRepByteWidth rep :: Int + where width = typeWidth (cmmLitType lit) + pad_length = wORD_SIZE - widthInBytes width :: Int padding n | n <= 0 = [] - | n `rem` 2 /= 0 = CmmInt 0 I8 : padding (n-1) - | n `rem` 4 /= 0 = CmmInt 0 I16 : padding (n-2) - | n `rem` 8 /= 0 = CmmInt 0 I32 : padding (n-4) - | otherwise = CmmInt 0 I64 : padding (n-8) + | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1) + | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2) + | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4) + | otherwise = CmmInt 0 W64 : padding (n-8) \end{code} %************************************************************************ @@ -309,7 +308,7 @@ hpStkCheck cl_info is_fun reg_save_code code -- Strictly speaking, we should tag node here. But if -- node doesn't point to the closure, the code for the closure -- cannot depend on the value of R1 anyway, so we're safe. - closure_lbl = closureLabelFromCI cl_info + closure_lbl = closureLabelFromCI cl_info (clHasCafRefs cl_info) full_save_code = node_asst `plusStmts` reg_save_code @@ -410,7 +409,7 @@ unbxTupleHeapCheck regs ptrs nptrs fail_code code ; code } where full_fail_code = fail_code `plusStmts` oneStmt assign_liveness - assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho! + assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9 VNonGcPtr)) -- Ho ho ho! (CmmLit (mkWordCLit liveness)) liveness = mkRegLiveness regs ptrs nptrs rts_label = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut"))) @@ -495,10 +494,8 @@ hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code hpChkGen bytes liveness reentry = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assigns stg_gc_gen where - assigns = mkStmts [ - CmmAssign (CmmGlobal (VanillaReg 9)) liveness, - CmmAssign (CmmGlobal (VanillaReg 10)) reentry - ] + assigns = mkStmts [ mk_vanilla_assignment 9 liveness, + mk_vanilla_assignment 10 reentry ] -- a heap check where R1 points to the closure to enter on return, and -- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP). @@ -511,10 +508,12 @@ stkChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code stkChkGen bytes liveness reentry = do_checks' bytes (CmmLit (mkIntCLit 0)) True False assigns stg_gc_gen where - assigns = mkStmts [ - CmmAssign (CmmGlobal (VanillaReg 9)) liveness, - CmmAssign (CmmGlobal (VanillaReg 10)) reentry - ] + assigns = mkStmts [ mk_vanilla_assignment 9 liveness, + mk_vanilla_assignment 10 reentry ] + +mk_vanilla_assignment :: Int -> CmmExpr -> CmmStmt +mk_vanilla_assignment n e + = CmmAssign (CmmGlobal (VanillaReg n (vgcFlag (cmmExprType e)))) e stkChkNodePoints :: CmmExpr -> Code stkChkNodePoints bytes @@ -554,7 +553,8 @@ allocDynClosure cl_info use_cc blame_cc amodes_with_offsets -- Remember, virtHp points to last allocated word, -- ie 1 *before* the info-ptr word of new object. - info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info)) + info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info + (clHasCafRefs cl_info))) hdr_w_offsets = initDynHdr info_ptr use_cc `zip` [0..] -- SAY WHAT WE ARE ABOUT TO DO diff --git a/compiler/codeGen/CgHpc.hs b/compiler/codeGen/CgHpc.hs index 0d0fdb1183..768a307e3a 100644 --- a/compiler/codeGen/CgHpc.hs +++ b/compiler/codeGen/CgHpc.hs @@ -18,7 +18,6 @@ module CgHpc (cgTickBox, initHpc, hpcTable) where import Cmm import CLabel import Module -import MachOp import CmmUtils import CgUtils import CgMonad @@ -35,14 +34,14 @@ import Data.Word cgTickBox :: Module -> Int -> Code cgTickBox mod n = do - let tick_box = (cmmIndex I64 + let tick_box = (cmmIndex W64 (CmmLit $ CmmLabel $ mkHpcTicksLabel $ mod) (fromIntegral n) ) stmtsC [ CmmStore tick_box - (CmmMachOp (MO_Add I64) - [ CmmLoad tick_box I64 - , CmmLit (CmmInt 1 I64) + (CmmMachOp (MO_Add W64) + [ CmmLoad tick_box b64 + , CmmLit (CmmInt 1 W64) ]) ] @@ -56,7 +55,7 @@ hpcTable this_mod (HpcInfo hpc_tickCount _) = do ] emitData Data $ [ CmmDataLabel (mkHpcTicksLabel this_mod) ] ++ - [ CmmStaticLit (CmmInt 0 I64) + [ CmmStaticLit (CmmInt 0 W64) | _ <- take hpc_tickCount [0::Int ..] ] where @@ -70,24 +69,24 @@ hpcTable this_mod (NoHpcInfo {}) = error "TODO: impossible" initHpc :: Module -> HpcInfo -> Code initHpc this_mod (HpcInfo tickCount hashNo) - = do { id <- newNonPtrTemp wordRep -- TODO FIXME NOW + = do { id <- newTemp bWord ; emitForeignCall' PlayRisky - [CmmKinded id NoHint] + [CmmHinted id NoHint] (CmmCallee (CmmLit $ CmmLabel $ mkForeignLabel mod_alloc Nothing False) CCallConv ) - [ CmmKinded (mkLblExpr mkHpcModuleNameLabel) PtrHint - , CmmKinded (word32 tickCount) NoHint - , CmmKinded (word32 hashNo) NoHint - , CmmKinded (CmmLit $ CmmLabel $ mkHpcTicksLabel $ this_mod) PtrHint + [ CmmHinted (mkLblExpr mkHpcModuleNameLabel) AddrHint + , CmmHinted (word32 tickCount) NoHint + , CmmHinted (word32 hashNo) NoHint + , CmmHinted (CmmLit $ CmmLabel $ mkHpcTicksLabel $ this_mod) AddrHint ] (Just []) NoC_SRT -- No SRT b/c we PlayRisky CmmMayReturn } where - word32 i = CmmLit (CmmInt (fromIntegral (fromIntegral i :: Word32)) I32) + word32 i = CmmLit (CmmInt (fromIntegral (fromIntegral i :: Word32)) W32) mod_alloc = mkFastString "hs_hpc_module" diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 14004ceef8..9fbe4fb36d 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -40,7 +40,6 @@ import CgMonad import CmmUtils import Cmm -import MachOp import CLabel import StgSyn import Name @@ -64,13 +63,13 @@ import Outputable -- representation as a list of 'CmmAddr' is handled later -- in the pipeline by 'cmmToRawCmm'. -emitClosureCodeAndInfoTable :: ClosureInfo -> CmmFormalsWithoutKinds -> CgStmts -> Code +emitClosureCodeAndInfoTable :: ClosureInfo -> CmmFormals -> CgStmts -> Code emitClosureCodeAndInfoTable cl_info args body = do { blks <- cgStmtsToBlocks body ; info <- mkCmmInfo cl_info ; emitInfoTableAndCode (infoLblToEntryLbl info_lbl) info args blks } where - info_lbl = infoTableLabelFromCI cl_info + info_lbl = infoTableLabelFromCI cl_info $ clHasCafRefs cl_info -- We keep the *zero-indexed* tag in the srt_len field of the info -- table of a data constructor. @@ -107,17 +106,17 @@ mkCmmInfo cl_info = do LFReEntrant _ arity _ arg_descr -> FunInfo (ptrs, nptrs) srt - (argDescrType arg_descr) (fromIntegral arity) arg_descr - (CmmLabel (mkSlowEntryLabel name)) + (CmmLabel (mkSlowEntryLabel name has_caf_refs)) LFThunk _ _ _ (SelectorThunk offset) _ -> ThunkSelectorInfo (fromIntegral offset) srt LFThunk _ _ _ _ _ -> ThunkInfo (ptrs, nptrs) srt _ -> panic "unexpected lambda form in mkCmmInfo" where - info_lbl = infoTableLabelFromCI cl_info + info_lbl = infoTableLabelFromCI cl_info has_caf_refs + has_caf_refs = clHasCafRefs cl_info cl_type = smRepClosureTypeInt (closureSMRep cl_info) @@ -235,12 +234,9 @@ stack_layout ((off, bind):binds) sizeW | off == sizeW - 1 = (Just stack_bind) : (stack_layout binds (sizeW - rep_size)) where rep_size = cgRepSizeW (cgIdInfoArgRep bind) - stack_bind = LocalReg unique machRep kind + stack_bind = LocalReg unique machRep unique = getUnique (cgIdInfoId bind) machRep = argMachRep (cgIdInfoArgRep bind) - kind = if isFollowableArg (cgIdInfoArgRep bind) - then GCKindPtr - else GCKindNonPtr stack_layout binds@((off, _):_) sizeW | otherwise = Nothing : (stack_layout binds (sizeW - 1)) @@ -344,13 +340,13 @@ stdNonPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE + hALF_WORD_SIZE closureInfoPtr :: CmmExpr -> CmmExpr -- Takes a closure pointer and returns the info table pointer -closureInfoPtr e = CmmLoad e wordRep +closureInfoPtr e = CmmLoad e bWord entryCode :: CmmExpr -> CmmExpr -- Takes an info pointer (the first word of a closure) -- and returns its entry code entryCode e | tablesNextToCode = e - | otherwise = CmmLoad e wordRep + | otherwise = CmmLoad e bWord getConstrTag :: CmmExpr -> CmmExpr -- Takes a closure pointer, and return the *zero-indexed* @@ -358,7 +354,7 @@ getConstrTag :: CmmExpr -> CmmExpr -- This lives in the SRT field of the info table -- (constructors don't need SRTs). getConstrTag closure_ptr - = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableConstrTag info_table] + = CmmMachOp (MO_UU_Conv halfWordWidth wordWidth) [infoTableConstrTag info_table] where info_table = infoTable (closureInfoPtr closure_ptr) @@ -366,7 +362,7 @@ cmmGetClosureType :: CmmExpr -> CmmExpr -- Takes a closure pointer, and return the closure type -- obtained from the info table cmmGetClosureType closure_ptr - = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableClosureType info_table] + = CmmMachOp (MO_UU_Conv halfWordWidth wordWidth) [infoTableClosureType info_table] where info_table = infoTable (closureInfoPtr closure_ptr) @@ -387,21 +383,21 @@ infoTableSrtBitmap :: CmmExpr -> CmmExpr -- Takes an info table pointer (from infoTable) and returns the srt_bitmap -- field of the info table infoTableSrtBitmap info_tbl - = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) halfWordRep + = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) bHalfWord infoTableClosureType :: CmmExpr -> CmmExpr -- Takes an info table pointer (from infoTable) and returns the closure type -- field of the info table. infoTableClosureType info_tbl - = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) halfWordRep + = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) bHalfWord infoTablePtrs :: CmmExpr -> CmmExpr infoTablePtrs info_tbl - = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) halfWordRep + = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) bHalfWord infoTableNonPtrs :: CmmExpr -> CmmExpr infoTableNonPtrs info_tbl - = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) halfWordRep + = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) bHalfWord funInfoTable :: CmmExpr -> CmmExpr -- Takes the info pointer of a function, @@ -427,7 +423,7 @@ funInfoTable info_ptr emitInfoTableAndCode :: CLabel -- Label of entry or ret -> CmmInfo -- ...the info table - -> CmmFormalsWithoutKinds -- ...args + -> CmmFormals -- ...args -> [CmmBasicBlock] -- ...and body -> Code diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs index 51c07b213d..e624f4b436 100644 --- a/compiler/codeGen/CgMonad.lhs +++ b/compiler/codeGen/CgMonad.lhs @@ -74,6 +74,7 @@ import BlockId import Cmm import CmmUtils import CLabel +import PprCmm import StgSyn (SRT) import SMRep import Module @@ -746,7 +747,7 @@ emitData sect lits where data_block = CmmData sect lits -emitProc :: CmmInfo -> CLabel -> CmmFormalsWithoutKinds -> [CmmBasicBlock] -> Code +emitProc :: CmmInfo -> CLabel -> CmmFormals -> [CmmBasicBlock] -> Code emitProc info lbl args blocks = do { let proc_block = CmmProc info lbl args (ListGraph blocks) ; state <- getState @@ -767,7 +768,8 @@ getCmm code = do { state1 <- getState ; ((), state2) <- withState code (state1 { cgs_tops = nilOL }) ; setState $ state2 { cgs_tops = cgs_tops state1 } - ; return (Cmm (fromOL (cgs_tops state2))) } + ; return (Cmm (fromOL (cgs_tops state2))) + } -- ---------------------------------------------------------------------------- -- CgStmts diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs index 85a41515e6..05e45b5097 100644 --- a/compiler/codeGen/CgPrimOp.hs +++ b/compiler/codeGen/CgPrimOp.hs @@ -28,7 +28,6 @@ import CgUtils import Cmm import CLabel import CmmUtils -import MachOp import PrimOp import SMRep import Constants @@ -38,7 +37,7 @@ import FastString -- --------------------------------------------------------------------------- -- Code generation for PrimOps -cgPrimOp :: CmmFormalsWithoutKinds -- where to put the results +cgPrimOp :: CmmFormals -- where to put the results -> PrimOp -- the op -> [StgArg] -- arguments -> StgLiveVars -- live vars, in case we need to save them @@ -50,7 +49,7 @@ cgPrimOp results op args live emitPrimOp results op non_void_args live -emitPrimOp :: CmmFormalsWithoutKinds -- where to put the results +emitPrimOp :: CmmFormals -- where to put the results -> PrimOp -- the op -> [CmmExpr] -- arguments -> StgLiveVars -- live vars, in case we need to save them @@ -122,10 +121,10 @@ emitPrimOp [res] ParOp [arg] live -- later, we might want to inline it. vols <- getVolatileRegs live emitForeignCall' PlayRisky - [CmmKinded res NoHint] + [CmmHinted res NoHint] (CmmCallee newspark CCallConv) - [ (CmmKinded (CmmReg (CmmGlobal BaseReg)) PtrHint) - , (CmmKinded arg PtrHint) ] + [ (CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint) + , (CmmHinted arg AddrHint) ] (Just vols) NoC_SRT -- No SRT b/c we do PlayRisky CmmMayReturn @@ -133,7 +132,7 @@ emitPrimOp [res] ParOp [arg] live newspark = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "newSpark"))) emitPrimOp [res] ReadMutVarOp [mutv] live - = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize)) + = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)) emitPrimOp [] WriteMutVarOp [mutv,var] live = do @@ -143,8 +142,8 @@ emitPrimOp [] WriteMutVarOp [mutv,var] live [{-no results-}] (CmmCallee (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) CCallConv) - [ (CmmKinded (CmmReg (CmmGlobal BaseReg)) PtrHint) - , (CmmKinded mutv PtrHint) ] + [ (CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint) + , (CmmHinted mutv AddrHint) ] (Just vols) NoC_SRT -- No SRT b/c we do PlayRisky CmmMayReturn @@ -154,7 +153,7 @@ emitPrimOp [] WriteMutVarOp [mutv,var] live emitPrimOp [res] SizeofByteArrayOp [arg] live = stmtC $ CmmAssign (CmmLocal res) (CmmMachOp mo_wordMul [ - cmmLoadIndexW arg fixedHdrSize, + cmmLoadIndexW arg fixedHdrSize bWord, CmmLit (mkIntCLit wORD_SIZE) ]) @@ -174,14 +173,14 @@ emitPrimOp [res] ByteArrayContents_Char [arg] live -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn) emitPrimOp [res] StableNameToIntOp [arg] live - = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize)) + = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord)) -- #define eqStableNamezh(r,sn1,sn2) \ -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) emitPrimOp [res] EqStableNameOp [arg1,arg2] live = stmtC (CmmAssign (CmmLocal res) (CmmMachOp mo_wordEq [ - cmmLoadIndexW arg1 fixedHdrSize, - cmmLoadIndexW arg2 fixedHdrSize + cmmLoadIndexW arg1 fixedHdrSize bWord, + cmmLoadIndexW arg2 fixedHdrSize bWord ])) @@ -223,117 +222,117 @@ emitPrimOp [] WriteArrayOp [obj,ix,v] live = doWritePtrArrayOp obj ix v -- IndexXXXoffAddr -emitPrimOp res IndexOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res IndexOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res IndexOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res IndexOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res IndexOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res IndexOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args -emitPrimOp res IndexOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args -emitPrimOp res IndexOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res IndexOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args -emitPrimOp res IndexOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args -emitPrimOp res IndexOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args -emitPrimOp res IndexOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args -emitPrimOp res IndexOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res IndexOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args -emitPrimOp res IndexOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res IndexOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args +emitPrimOp res IndexOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Int args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Word args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Addr args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Float args live = doIndexOffAddrOp Nothing f32 res args +emitPrimOp res IndexOffAddrOp_Double args live = doIndexOffAddrOp Nothing f64 res args +emitPrimOp res IndexOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res IndexOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing b64 res args +emitPrimOp res IndexOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res IndexOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing b64 res args -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr. -emitPrimOp res ReadOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res ReadOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res ReadOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res ReadOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res ReadOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res ReadOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args -emitPrimOp res ReadOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args -emitPrimOp res ReadOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res ReadOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args -emitPrimOp res ReadOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args -emitPrimOp res ReadOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args -emitPrimOp res ReadOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args -emitPrimOp res ReadOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res ReadOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args -emitPrimOp res ReadOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res ReadOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args +emitPrimOp res ReadOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Int args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Word args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Addr args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Float args live = doIndexOffAddrOp Nothing f32 res args +emitPrimOp res ReadOffAddrOp_Double args live = doIndexOffAddrOp Nothing f64 res args +emitPrimOp res ReadOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res ReadOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing b64 res args +emitPrimOp res ReadOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res ReadOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing b64 res args -- IndexXXXArray -emitPrimOp res IndexByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res IndexByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res IndexByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res IndexByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res IndexByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res IndexByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args -emitPrimOp res IndexByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args -emitPrimOp res IndexByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res IndexByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args -emitPrimOp res IndexByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args -emitPrimOp res IndexByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args -emitPrimOp res IndexByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args -emitPrimOp res IndexByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res IndexByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args -emitPrimOp res IndexByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res IndexByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args +emitPrimOp res IndexByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Int args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Word args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Addr args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Float args live = doIndexByteArrayOp Nothing f32 res args +emitPrimOp res IndexByteArrayOp_Double args live = doIndexByteArrayOp Nothing f64 res args +emitPrimOp res IndexByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res IndexByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing b64 res args +emitPrimOp res IndexByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res IndexByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing b64 res args -- ReadXXXArray, identical to IndexXXXArray. -emitPrimOp res ReadByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res ReadByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res ReadByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res ReadByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res ReadByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res ReadByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args -emitPrimOp res ReadByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args -emitPrimOp res ReadByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res ReadByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args -emitPrimOp res ReadByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args -emitPrimOp res ReadByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args -emitPrimOp res ReadByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args -emitPrimOp res ReadByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res ReadByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args -emitPrimOp res ReadByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res ReadByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args +emitPrimOp res ReadByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Int args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Word args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Addr args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Float args live = doIndexByteArrayOp Nothing f32 res args +emitPrimOp res ReadByteArrayOp_Double args live = doIndexByteArrayOp Nothing f64 res args +emitPrimOp res ReadByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res ReadByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing b64 res args +emitPrimOp res ReadByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res ReadByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing b64 res args -- WriteXXXoffAddr -emitPrimOp res WriteOffAddrOp_Char args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteOffAddrOp_WideChar args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteOffAddrOp_Int args live = doWriteOffAddrOp Nothing wordRep res args -emitPrimOp res WriteOffAddrOp_Word args live = doWriteOffAddrOp Nothing wordRep res args -emitPrimOp res WriteOffAddrOp_Addr args live = doWriteOffAddrOp Nothing wordRep res args -emitPrimOp res WriteOffAddrOp_Float args live = doWriteOffAddrOp Nothing F32 res args -emitPrimOp res WriteOffAddrOp_Double args live = doWriteOffAddrOp Nothing F64 res args -emitPrimOp res WriteOffAddrOp_StablePtr args live = doWriteOffAddrOp Nothing wordRep res args -emitPrimOp res WriteOffAddrOp_Int8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteOffAddrOp_Int16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args -emitPrimOp res WriteOffAddrOp_Int32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteOffAddrOp_Int64 args live = doWriteOffAddrOp Nothing I64 res args -emitPrimOp res WriteOffAddrOp_Word8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteOffAddrOp_Word16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args -emitPrimOp res WriteOffAddrOp_Word32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteOffAddrOp_Word64 args live = doWriteOffAddrOp Nothing I64 res args +emitPrimOp res WriteOffAddrOp_Char args live = doWriteOffAddrOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteOffAddrOp_WideChar args live = doWriteOffAddrOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteOffAddrOp_Int args live = doWriteOffAddrOp Nothing bWord res args +emitPrimOp res WriteOffAddrOp_Word args live = doWriteOffAddrOp Nothing bWord res args +emitPrimOp res WriteOffAddrOp_Addr args live = doWriteOffAddrOp Nothing bWord res args +emitPrimOp res WriteOffAddrOp_Float args live = doWriteOffAddrOp Nothing f32 res args +emitPrimOp res WriteOffAddrOp_Double args live = doWriteOffAddrOp Nothing f64 res args +emitPrimOp res WriteOffAddrOp_StablePtr args live = doWriteOffAddrOp Nothing bWord res args +emitPrimOp res WriteOffAddrOp_Int8 args live = doWriteOffAddrOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteOffAddrOp_Int16 args live = doWriteOffAddrOp (Just mo_WordTo16) b16 res args +emitPrimOp res WriteOffAddrOp_Int32 args live = doWriteOffAddrOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteOffAddrOp_Int64 args live = doWriteOffAddrOp Nothing b64 res args +emitPrimOp res WriteOffAddrOp_Word8 args live = doWriteOffAddrOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteOffAddrOp_Word16 args live = doWriteOffAddrOp (Just mo_WordTo16) b16 res args +emitPrimOp res WriteOffAddrOp_Word32 args live = doWriteOffAddrOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteOffAddrOp_Word64 args live = doWriteOffAddrOp Nothing b64 res args -- WriteXXXArray -emitPrimOp res WriteByteArrayOp_Char args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteByteArrayOp_WideChar args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteByteArrayOp_Int args live = doWriteByteArrayOp Nothing wordRep res args -emitPrimOp res WriteByteArrayOp_Word args live = doWriteByteArrayOp Nothing wordRep res args -emitPrimOp res WriteByteArrayOp_Addr args live = doWriteByteArrayOp Nothing wordRep res args -emitPrimOp res WriteByteArrayOp_Float args live = doWriteByteArrayOp Nothing F32 res args -emitPrimOp res WriteByteArrayOp_Double args live = doWriteByteArrayOp Nothing F64 res args -emitPrimOp res WriteByteArrayOp_StablePtr args live = doWriteByteArrayOp Nothing wordRep res args -emitPrimOp res WriteByteArrayOp_Int8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteByteArrayOp_Int16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args -emitPrimOp res WriteByteArrayOp_Int32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteByteArrayOp_Int64 args live = doWriteByteArrayOp Nothing I64 res args -emitPrimOp res WriteByteArrayOp_Word8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteByteArrayOp_Word16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args -emitPrimOp res WriteByteArrayOp_Word32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteByteArrayOp_Word64 args live = doWriteByteArrayOp Nothing I64 res args +emitPrimOp res WriteByteArrayOp_Char args live = doWriteByteArrayOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteByteArrayOp_WideChar args live = doWriteByteArrayOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteByteArrayOp_Int args live = doWriteByteArrayOp Nothing bWord res args +emitPrimOp res WriteByteArrayOp_Word args live = doWriteByteArrayOp Nothing bWord res args +emitPrimOp res WriteByteArrayOp_Addr args live = doWriteByteArrayOp Nothing bWord res args +emitPrimOp res WriteByteArrayOp_Float args live = doWriteByteArrayOp Nothing f32 res args +emitPrimOp res WriteByteArrayOp_Double args live = doWriteByteArrayOp Nothing f64 res args +emitPrimOp res WriteByteArrayOp_StablePtr args live = doWriteByteArrayOp Nothing bWord res args +emitPrimOp res WriteByteArrayOp_Int8 args live = doWriteByteArrayOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteByteArrayOp_Int16 args live = doWriteByteArrayOp (Just mo_WordTo16) b16 res args +emitPrimOp res WriteByteArrayOp_Int32 args live = doWriteByteArrayOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteByteArrayOp_Int64 args live = doWriteByteArrayOp Nothing b64 res args +emitPrimOp res WriteByteArrayOp_Word8 args live = doWriteByteArrayOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteByteArrayOp_Word16 args live = doWriteByteArrayOp (Just mo_WordTo16) b16 res args +emitPrimOp res WriteByteArrayOp_Word32 args live = doWriteByteArrayOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteByteArrayOp_Word64 args live = doWriteByteArrayOp Nothing b64 res args -- The rest just translate straightforwardly @@ -342,16 +341,16 @@ emitPrimOp [res] op [arg] live = stmtC (CmmAssign (CmmLocal res) arg) | Just (mop,rep) <- narrowOp op - = stmtC (CmmAssign (CmmLocal res) (CmmMachOp (mop rep wordRep) [ - CmmMachOp (mop wordRep rep) [arg]])) + = stmtC (CmmAssign (CmmLocal res) $ + CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]]) emitPrimOp [res] op args live | Just prim <- callishOp op = do vols <- getVolatileRegs live emitForeignCall' PlayRisky - [CmmKinded res NoHint] + [CmmHinted res NoHint] (CmmPrim prim) - [CmmKinded a NoHint | a<-args] -- ToDo: hints? + [CmmHinted a NoHint | a<-args] -- ToDo: hints? (Just vols) NoC_SRT -- No SRT b/c we do PlayRisky CmmMayReturn @@ -376,12 +375,13 @@ nopOp _ = False -- These PrimOps turn into double casts -narrowOp Narrow8IntOp = Just (MO_S_Conv, I8) -narrowOp Narrow16IntOp = Just (MO_S_Conv, I16) -narrowOp Narrow32IntOp = Just (MO_S_Conv, I32) -narrowOp Narrow8WordOp = Just (MO_U_Conv, I8) -narrowOp Narrow16WordOp = Just (MO_U_Conv, I16) -narrowOp Narrow32WordOp = Just (MO_U_Conv, I32) +narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width) +narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8) +narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16) +narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32) +narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8) +narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16) +narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32) narrowOp _ = Nothing -- Native word signless ops @@ -412,7 +412,7 @@ translateOp AddrRemOp = Just mo_wordURem -- Native word signed ops translateOp IntMulOp = Just mo_wordMul -translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordRep) +translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordWidth) translateOp IntQuotOp = Just mo_wordSQuot translateOp IntRemOp = Just mo_wordSRem translateOp IntNegOp = Just mo_wordSNeg @@ -445,53 +445,53 @@ translateOp AddrLtOp = Just mo_wordULt -- Char# ops -translateOp CharEqOp = Just (MO_Eq wordRep) -translateOp CharNeOp = Just (MO_Ne wordRep) -translateOp CharGeOp = Just (MO_U_Ge wordRep) -translateOp CharLeOp = Just (MO_U_Le wordRep) -translateOp CharGtOp = Just (MO_U_Gt wordRep) -translateOp CharLtOp = Just (MO_U_Lt wordRep) +translateOp CharEqOp = Just (MO_Eq wordWidth) +translateOp CharNeOp = Just (MO_Ne wordWidth) +translateOp CharGeOp = Just (MO_U_Ge wordWidth) +translateOp CharLeOp = Just (MO_U_Le wordWidth) +translateOp CharGtOp = Just (MO_U_Gt wordWidth) +translateOp CharLtOp = Just (MO_U_Lt wordWidth) -- Double ops -translateOp DoubleEqOp = Just (MO_Eq F64) -translateOp DoubleNeOp = Just (MO_Ne F64) -translateOp DoubleGeOp = Just (MO_S_Ge F64) -translateOp DoubleLeOp = Just (MO_S_Le F64) -translateOp DoubleGtOp = Just (MO_S_Gt F64) -translateOp DoubleLtOp = Just (MO_S_Lt F64) +translateOp DoubleEqOp = Just (MO_F_Eq W64) +translateOp DoubleNeOp = Just (MO_F_Ne W64) +translateOp DoubleGeOp = Just (MO_F_Ge W64) +translateOp DoubleLeOp = Just (MO_F_Le W64) +translateOp DoubleGtOp = Just (MO_F_Gt W64) +translateOp DoubleLtOp = Just (MO_F_Lt W64) -translateOp DoubleAddOp = Just (MO_Add F64) -translateOp DoubleSubOp = Just (MO_Sub F64) -translateOp DoubleMulOp = Just (MO_Mul F64) -translateOp DoubleDivOp = Just (MO_S_Quot F64) -translateOp DoubleNegOp = Just (MO_S_Neg F64) +translateOp DoubleAddOp = Just (MO_F_Add W64) +translateOp DoubleSubOp = Just (MO_F_Sub W64) +translateOp DoubleMulOp = Just (MO_F_Mul W64) +translateOp DoubleDivOp = Just (MO_F_Quot W64) +translateOp DoubleNegOp = Just (MO_F_Neg W64) -- Float ops -translateOp FloatEqOp = Just (MO_Eq F32) -translateOp FloatNeOp = Just (MO_Ne F32) -translateOp FloatGeOp = Just (MO_S_Ge F32) -translateOp FloatLeOp = Just (MO_S_Le F32) -translateOp FloatGtOp = Just (MO_S_Gt F32) -translateOp FloatLtOp = Just (MO_S_Lt F32) +translateOp FloatEqOp = Just (MO_F_Eq W32) +translateOp FloatNeOp = Just (MO_F_Ne W32) +translateOp FloatGeOp = Just (MO_F_Ge W32) +translateOp FloatLeOp = Just (MO_F_Le W32) +translateOp FloatGtOp = Just (MO_F_Gt W32) +translateOp FloatLtOp = Just (MO_F_Lt W32) -translateOp FloatAddOp = Just (MO_Add F32) -translateOp FloatSubOp = Just (MO_Sub F32) -translateOp FloatMulOp = Just (MO_Mul F32) -translateOp FloatDivOp = Just (MO_S_Quot F32) -translateOp FloatNegOp = Just (MO_S_Neg F32) +translateOp FloatAddOp = Just (MO_F_Add W32) +translateOp FloatSubOp = Just (MO_F_Sub W32) +translateOp FloatMulOp = Just (MO_F_Mul W32) +translateOp FloatDivOp = Just (MO_F_Quot W32) +translateOp FloatNegOp = Just (MO_F_Neg W32) -- Conversions -translateOp Int2DoubleOp = Just (MO_S_Conv wordRep F64) -translateOp Double2IntOp = Just (MO_S_Conv F64 wordRep) +translateOp Int2DoubleOp = Just (MO_SF_Conv wordWidth W64) +translateOp Double2IntOp = Just (MO_FS_Conv W64 wordWidth) -translateOp Int2FloatOp = Just (MO_S_Conv wordRep F32) -translateOp Float2IntOp = Just (MO_S_Conv F32 wordRep) +translateOp Int2FloatOp = Just (MO_SF_Conv wordWidth W32) +translateOp Float2IntOp = Just (MO_FS_Conv W32 wordWidth) -translateOp Float2DoubleOp = Just (MO_S_Conv F32 F64) -translateOp Double2FloatOp = Just (MO_S_Conv F64 F32) +translateOp Float2DoubleOp = Just (MO_FF_Conv W32 W64) +translateOp Double2FloatOp = Just (MO_FF_Conv W64 W32) -- Word comparisons masquerading as more exotic things. @@ -540,6 +540,10 @@ callishOp _ = Nothing ------------------------------------------------------------------------------ -- Helpers for translating various minor variants of array indexing. +-- Bytearrays outside the heap; hence non-pointers +doIndexOffAddrOp, doIndexByteArrayOp + :: Maybe MachOp -> CmmType + -> [LocalReg] -> [CmmExpr] -> Code doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx] = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx doIndexOffAddrOp _ _ _ _ @@ -550,10 +554,14 @@ doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx] doIndexByteArrayOp _ _ _ _ = panic "CgPrimOp: doIndexByteArrayOp" +doReadPtrArrayOp :: LocalReg -> CmmExpr -> CmmExpr -> Code doReadPtrArrayOp res addr idx - = mkBasicIndexedRead arrPtrsHdrSize Nothing wordRep res addr idx + = mkBasicIndexedRead arrPtrsHdrSize Nothing gcWord res addr idx +doWriteOffAddrOp, doWriteByteArrayOp + :: Maybe MachOp -> CmmType + -> [LocalReg] -> [CmmExpr] -> Code doWriteOffAddrOp maybe_pre_write_cast rep [] [addr,idx,val] = mkBasicIndexedWrite 0 maybe_pre_write_cast rep addr idx val doWriteOffAddrOp _ _ _ _ @@ -564,17 +572,22 @@ doWriteByteArrayOp maybe_pre_write_cast rep [] [addr,idx,val] doWriteByteArrayOp _ _ _ _ = panic "CgPrimOp: doWriteByteArrayOp" +doWritePtrArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> Code doWritePtrArrayOp addr idx val = do stmtC (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel))) - mkBasicIndexedWrite arrPtrsHdrSize Nothing wordRep addr idx val + mkBasicIndexedWrite arrPtrsHdrSize Nothing bWord addr idx val +mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType + -> LocalReg -> CmmExpr -> CmmExpr -> Code mkBasicIndexedRead off Nothing read_rep res base idx = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx)) mkBasicIndexedRead off (Just cast) read_rep res base idx = stmtC (CmmAssign (CmmLocal res) (CmmMachOp cast [ cmmLoadIndexOffExpr off read_rep base idx])) +mkBasicIndexedWrite :: ByteOff -> Maybe MachOp -> CmmType + -> CmmExpr -> CmmExpr -> CmmExpr -> Code mkBasicIndexedWrite off Nothing write_rep base idx val = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) val) mkBasicIndexedWrite off (Just cast) write_rep base idx val @@ -583,11 +596,11 @@ mkBasicIndexedWrite off (Just cast) write_rep base idx val -- ---------------------------------------------------------------------------- -- Misc utils -cmmIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr +cmmIndexOffExpr :: ByteOff -> CmmType -> CmmExpr -> CmmExpr -> CmmExpr cmmIndexOffExpr off rep base idx - = cmmIndexExpr rep (cmmOffsetB base off) idx + = cmmIndexExpr (typeWidth rep) (cmmOffsetB base off) idx -cmmLoadIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr +cmmLoadIndexOffExpr :: ByteOff -> CmmType -> CmmExpr -> CmmExpr -> CmmExpr cmmLoadIndexOffExpr off rep base idx = CmmLoad (cmmIndexOffExpr off rep base idx) rep diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs index c2a8a1bd75..c85beb50aa 100644 --- a/compiler/codeGen/CgProf.hs +++ b/compiler/codeGen/CgProf.hs @@ -45,7 +45,6 @@ import CgMonad import SMRep import Cmm -import MachOp import CmmUtils import CLabel @@ -70,7 +69,7 @@ import Control.Monad -- Expression representing the current cost centre stack curCCS :: CmmExpr -curCCS = CmmLoad curCCSAddr wordRep +curCCS = CmmLoad curCCSAddr bWord -- Address of current CCS variable, for storing into curCCSAddr :: CmmExpr @@ -84,7 +83,7 @@ mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs) costCentreFrom :: CmmExpr -- A closure pointer -> CmmExpr -- The cost centre from that closure -costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) wordRep +costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) bWord staticProfHdr :: CostCentreStack -> [CmmLit] -- The profiling header words in a static closure @@ -122,13 +121,13 @@ profAlloc words ccs = ifProfiling $ stmtC (addToMemE alloc_rep (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc) - (CmmMachOp (MO_U_Conv wordRep alloc_rep) $ + (CmmMachOp (MO_UU_Conv wordWidth alloc_rep) $ [CmmMachOp mo_wordSub [words, CmmLit (mkIntCLit profHdrSize)]])) -- subtract the "profiling overhead", which is the -- profiling header in a closure. where - alloc_rep = REP_CostCentreStack_mem_alloc + alloc_rep = typeWidth REP_CostCentreStack_mem_alloc -- ---------------------------------------------------------------------- -- Setting the cost centre in a new closure @@ -162,7 +161,7 @@ emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) push_em ccs [] = return ccs push_em ccs (cc:rest) = do - tmp <- newNonPtrTemp wordRep -- TODO FIXME NOW + tmp <- newTemp bWord -- TODO FIXME NOW pushCostCentre tmp ccs cc push_em (CmmReg (CmmLocal tmp)) rest @@ -267,7 +266,7 @@ enterCostCentreThunk closure = ifProfiling $ do stmtC $ CmmStore curCCSAddr (costCentreFrom closure) -enter_ccs_fun stack = emitRtsCall (sLit "EnterFunCCS") [CmmKinded stack PtrHint] False +enter_ccs_fun stack = emitRtsCall (sLit "EnterFunCCS") [CmmHinted stack AddrHint] False -- ToDo: vols enter_ccs_fsub = enteringPAP 0 @@ -280,7 +279,7 @@ enter_ccs_fsub = enteringPAP 0 enteringPAP :: Integer -> Code enteringPAP n = stmtC (CmmStore (CmmLit (CmmLabel (mkRtsDataLabel (sLit "entering_PAP")))) - (CmmLit (CmmInt n cIntRep))) + (CmmLit (CmmInt n cIntWidth))) ifProfiling :: Code -> Code ifProfiling code @@ -340,7 +339,7 @@ emitCostCentreStackDecl ccs | otherwise = pprPanic "emitCostCentreStackDecl" (ppr ccs) zero = mkIntCLit 0 -zero64 = CmmInt 0 I64 +zero64 = CmmInt 0 W64 sizeof_ccs_words :: Int sizeof_ccs_words @@ -359,12 +358,12 @@ sizeof_ccs_words emitRegisterCC :: CostCentre -> Code emitRegisterCC cc = do - { tmp <- newNonPtrTemp cIntRep + { tmp <- newTemp cInt ; stmtsC [ CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_link) - (CmmLoad cC_LIST wordRep), + (CmmLoad cC_LIST bWord), CmmStore cC_LIST cc_lit, - CmmAssign (CmmLocal tmp) (CmmLoad cC_ID cIntRep), + CmmAssign (CmmLocal tmp) (CmmLoad cC_ID cInt), CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_ccID) (CmmReg (CmmLocal tmp)), CmmStore cC_ID (cmmRegOffB (CmmLocal tmp) 1) ] @@ -378,12 +377,12 @@ emitRegisterCC cc = do emitRegisterCCS :: CostCentreStack -> Code emitRegisterCCS ccs = do - { tmp <- newNonPtrTemp cIntRep + { tmp <- newTemp cInt ; stmtsC [ CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_prevStack) - (CmmLoad cCS_LIST wordRep), + (CmmLoad cCS_LIST bWord), CmmStore cCS_LIST ccs_lit, - CmmAssign (CmmLocal tmp) (CmmLoad cCS_ID cIntRep), + CmmAssign (CmmLocal tmp) (CmmLoad cCS_ID cInt), CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_ccsID) (CmmReg (CmmLocal tmp)), CmmStore cCS_ID (cmmRegOffB (CmmLocal tmp) 1) ] @@ -405,7 +404,7 @@ emitSetCCC :: CostCentre -> Code emitSetCCC cc | not opt_SccProfilingOn = nopC | otherwise = do - tmp <- newNonPtrTemp wordRep -- TODO FIXME NOW + tmp <- newTemp bWord -- TODO FIXME NOW ASSERT( sccAbleCostCentre cc ) pushCostCentre tmp curCCS cc stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp))) @@ -414,14 +413,14 @@ emitSetCCC cc pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> Code pushCostCentre result ccs cc - = emitRtsCallWithResult result PtrHint - (sLit "PushCostCentre") [CmmKinded ccs PtrHint, - CmmKinded (CmmLit (mkCCostCentre cc)) PtrHint] + = emitRtsCallWithResult result AddrHint + (sLit "PushCostCentre") [CmmHinted ccs AddrHint, + CmmHinted (CmmLit (mkCCostCentre cc)) AddrHint] False bumpSccCount :: CmmExpr -> CmmStmt bumpSccCount ccs - = addToMem REP_CostCentreStack_scc_count + = addToMem (typeWidth REP_CostCentreStack_scc_count) (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1 ----------------------------------------------------------------------------- @@ -475,13 +474,13 @@ ldvEnter cl_ptr where -- don't forget to substract node's tag ldv_wd = ldvWord cl_ptr - new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd wordRep) + new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd bWord) (CmmLit (mkWordCLit lDV_CREATE_MASK))) (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE))) loadEra :: CmmExpr -loadEra = CmmMachOp (MO_U_Conv cIntRep wordRep) - [CmmLoad (mkLblExpr (mkRtsDataLabel (sLit "era"))) cIntRep] +loadEra = CmmMachOp (MO_UU_Conv cIntWidth wordWidth) + [CmmLoad (mkLblExpr (mkRtsDataLabel $ sLit("era"))) cInt] ldvWord :: CmmExpr -> CmmExpr -- Takes the address of a closure, and returns diff --git a/compiler/codeGen/CgStackery.lhs b/compiler/codeGen/CgStackery.lhs index b8db38d4ed..d6d9e5cfad 100644 --- a/compiler/codeGen/CgStackery.lhs +++ b/compiler/codeGen/CgStackery.lhs @@ -274,7 +274,6 @@ to reflect the frame pushed. \begin{code} pushUpdateFrame :: CmmExpr -> Code -> Code - pushUpdateFrame updatee code = do { when debugIsOn $ do diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs index 475196abba..4f890998ae 100644 --- a/compiler/codeGen/CgTailCall.lhs +++ b/compiler/codeGen/CgTailCall.lhs @@ -41,6 +41,7 @@ import Type import Id import StgSyn import PrimOp +import FastString import Outputable import Control.Monad @@ -116,7 +117,7 @@ performTailCall fun_info arg_amodes pending_assts ; EndOfBlockInfo sp _ <- getEndOfBlockInfo ; this_pkg <- getThisPackage - ; case (getCallMethod fun_name lf_info (length arg_amodes)) of + ; case (getCallMethod fun_name fun_has_cafs lf_info (length arg_amodes)) of -- Node must always point to things we enter EnterIt -> do @@ -183,8 +184,10 @@ performTailCall fun_info arg_amodes pending_assts } } where - fun_name = idName (cgIdInfoId fun_info) + fun_id = cgIdInfoId fun_info + fun_name = idName fun_id lf_info = cgIdInfoLF fun_info + fun_has_cafs = idCafInfo fun_id untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg)) -- Test if closure is a constructor maybeSwitchOnCons enterClosure eob diff --git a/compiler/codeGen/CgTicky.hs b/compiler/codeGen/CgTicky.hs index 24947409fe..b23b34caa4 100644 --- a/compiler/codeGen/CgTicky.hs +++ b/compiler/codeGen/CgTicky.hs @@ -52,12 +52,12 @@ import CgMonad import SMRep import Cmm -import MachOp import CmmUtils import CLabel import Name import Id +import IdInfo import StaticFlags import BasicTypes import FastString @@ -106,7 +106,7 @@ emitTickyCounter cl_info args on_stk ] } where name = closureName cl_info - ticky_ctr_label = mkRednCountsLabel name + ticky_ctr_label = mkRednCountsLabel name NoCafRefs arg_descr = map (showTypeCategory . idType) args fun_descr mod_name = ppr_for_ticky_name mod_name name @@ -172,13 +172,13 @@ registerTickyCtr ctr_lbl = emitIf test (stmtsC register_stmts) where -- krc: code generator doesn't handle Not, so we test for Eq 0 instead - test = CmmMachOp (MO_Eq wordRep) + test = CmmMachOp (MO_Eq wordWidth) [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl - oFFSET_StgEntCounter_registeredp)) wordRep, + oFFSET_StgEntCounter_registeredp)) bWord, CmmLit (mkIntCLit 0)] register_stmts = [ CmmStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_link)) - (CmmLoad ticky_entry_ctrs wordRep) + (CmmLoad ticky_entry_ctrs bWord) , CmmStore ticky_entry_ctrs (mkLblExpr ctr_lbl) , CmmStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_registeredp)) @@ -288,13 +288,13 @@ tickyAllocHeap hp if hp == 0 then [] -- Inside the stmtC to avoid control else [ -- dependency on the argument -- Bump the allcoation count in the StgEntCounter - addToMem REP_StgEntCounter_allocs + addToMem (typeWidth REP_StgEntCounter_allocs) (CmmLit (cmmLabelOffB ticky_ctr oFFSET_StgEntCounter_allocs)) hp, -- Bump ALLOC_HEAP_ctr - addToMemLbl cLongRep (mkRtsDataLabel (sLit "ALLOC_HEAP_ctr")) 1, - -- Bump ALLOC_HEAP_tot - addToMemLbl cLongRep (mkRtsDataLabel (sLit "ALLOC_HEAP_tot")) hp] } + addToMemLbl cLongWidth (mkRtsDataLabel $ sLit "ALLOC_HEAP_ctr") 1, + -- Bump ALLOC_HEAP_tot + addToMemLbl cLongWidth (mkRtsDataLabel $ sLit "ALLOC_HEAP_tot") hp] } -- ----------------------------------------------------------------------------- -- Ticky utils @@ -304,7 +304,7 @@ ifTicky code | opt_DoTickyProfiling = code | otherwise = nopC -addToMemLbl :: MachRep -> CLabel -> Int -> CmmStmt +addToMemLbl :: Width -> CLabel -> Int -> CmmStmt addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n -- All the ticky-ticky counters are declared "unsigned long" in C @@ -313,27 +313,28 @@ bumpTickyCounter lbl = bumpTickyCounter' (cmmLabelOffB (mkRtsDataLabel lbl) 0) bumpTickyCounter' :: CmmLit -> Code -- krc: note that we're incrementing the _entry_count_ field of the ticky counter -bumpTickyCounter' lhs = stmtC (addToMem cLongRep (CmmLit lhs) 1) - -addToMemLong = addToMem cLongRep +bumpTickyCounter' lhs = stmtC (addToMemLong (CmmLit lhs) 1) bumpHistogram :: LitString -> Int -> Code bumpHistogram lbl n --- = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLongRep)) +-- = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLong)) = return () -- TEMP SPJ Apr 07 bumpHistogramE :: LitString -> CmmExpr -> Code bumpHistogramE lbl n - = do t <- newNonPtrTemp cLongRep + = do t <- newTemp cLong stmtC (CmmAssign (CmmLocal t) n) - emitIf (CmmMachOp (MO_U_Le cLongRep) [CmmReg (CmmLocal t), eight]) $ + emitIf (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight]) $ stmtC (CmmAssign (CmmLocal t) eight) - stmtC (addToMemLong (cmmIndexExpr cLongRep + stmtC (addToMemLong (cmmIndexExpr cLongWidth (CmmLit (CmmLabel (mkRtsDataLabel lbl))) (CmmReg (CmmLocal t))) 1) where - eight = CmmLit (CmmInt 8 cLongRep) + eight = CmmLit (CmmInt 8 cLongWidth) + +------------------------------------------------------------------ +addToMemLong = addToMem cLongWidth ------------------------------------------------------------------ -- Showing the "type category" for ticky-ticky profiling diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index 4de3537788..fd49cb7182 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -20,8 +20,7 @@ module CgUtils ( emitRODataLits, mkRODataLits, emitIf, emitIfThenElse, emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, - assignNonPtrTemp, newNonPtrTemp, - assignPtrTemp, newPtrTemp, + assignTemp, newTemp, emitSimultaneously, emitSwitch, emitLitSwitch, tagToClosure, @@ -47,7 +46,7 @@ module CgUtils ( packHalfWordsCLit, blankWord, - getSRTInfo + getSRTInfo, clHasCafRefs ) where #include "HsVersions.h" @@ -58,13 +57,13 @@ import CgMonad import TyCon import DataCon import Id +import IdInfo import Constants import SMRep import PprCmm ( {- instances -} ) import Cmm import CLabel import CmmUtils -import MachOp import ForeignCall import ClosureInfo import StgSyn (SRT(..)) @@ -103,24 +102,24 @@ cgLit (MachStr s) = mkByteStringCLit (bytesFS s) cgLit other_lit = return (mkSimpleLit other_lit) mkSimpleLit :: Literal -> CmmLit -mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordRep +mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordWidth mkSimpleLit MachNullAddr = zeroCLit -mkSimpleLit (MachInt i) = CmmInt i wordRep -mkSimpleLit (MachInt64 i) = CmmInt i I64 -mkSimpleLit (MachWord i) = CmmInt i wordRep -mkSimpleLit (MachWord64 i) = CmmInt i I64 -mkSimpleLit (MachFloat r) = CmmFloat r F32 -mkSimpleLit (MachDouble r) = CmmFloat r F64 +mkSimpleLit (MachInt i) = CmmInt i wordWidth +mkSimpleLit (MachInt64 i) = CmmInt i W64 +mkSimpleLit (MachWord i) = CmmInt i wordWidth +mkSimpleLit (MachWord64 i) = CmmInt i W64 +mkSimpleLit (MachFloat r) = CmmFloat r W32 +mkSimpleLit (MachDouble r) = CmmFloat r W64 mkSimpleLit (MachLabel fs ms) = CmmLabel (mkForeignLabel fs ms is_dyn) where is_dyn = False -- ToDo: fix me mkLtOp :: Literal -> MachOp -- On signed literals we must do a signed comparison -mkLtOp (MachInt _) = MO_S_Lt wordRep -mkLtOp (MachFloat _) = MO_S_Lt F32 -mkLtOp (MachDouble _) = MO_S_Lt F64 -mkLtOp lit = MO_U_Lt (cmmLitRep (mkSimpleLit lit)) +mkLtOp (MachInt _) = MO_S_Lt wordWidth +mkLtOp (MachFloat _) = MO_F_Lt W32 +mkLtOp (MachDouble _) = MO_F_Lt W64 +mkLtOp lit = MO_U_Lt (typeWidth (cmmLitType (mkSimpleLit lit))) --------------------------------------------------- @@ -151,7 +150,7 @@ cmmOffsetLitB = cmmOffsetLit cmmOffsetExprW :: CmmExpr -> CmmExpr -> CmmExpr -- The second arg is a *word* offset; need to change it to bytes cmmOffsetExprW e (CmmLit (CmmInt n _)) = cmmOffsetW e (fromInteger n) -cmmOffsetExprW e wd_off = cmmIndexExpr wordRep e wd_off +cmmOffsetExprW e wd_off = cmmIndexExpr wordWidth e wd_off cmmOffsetW :: CmmExpr -> WordOff -> CmmExpr cmmOffsetW e n = cmmOffsetB e (wORD_SIZE * n) @@ -165,9 +164,8 @@ cmmOffsetLitW lit wd_off = cmmOffsetLitB lit (wORD_SIZE * wd_off) cmmLabelOffW :: CLabel -> WordOff -> CmmLit cmmLabelOffW lbl wd_off = cmmLabelOffB lbl (wORD_SIZE * wd_off) -cmmLoadIndexW :: CmmExpr -> Int -> CmmExpr -cmmLoadIndexW base off - = CmmLoad (cmmOffsetW base off) wordRep +cmmLoadIndexW :: CmmExpr -> Int -> CmmType -> CmmExpr +cmmLoadIndexW base off ty = CmmLoad (cmmOffsetW base off) ty ----------------------- cmmNeWord, cmmEqWord, cmmOrWord, cmmAndWord :: CmmExpr -> CmmExpr -> CmmExpr @@ -184,7 +182,7 @@ cmmSubWord e1 e2 = CmmMachOp mo_wordSub [e1, e2] cmmNegate :: CmmExpr -> CmmExpr cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) -cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprRep e)) [e] +cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprWidth e)) [e] blankWord :: CmmStatic blankWord = CmmUninitialised wORD_SIZE @@ -244,7 +242,7 @@ dataConTagZ con = dataConTag con - fIRST_TAG -- Making literals mkWordCLit :: StgWord -> CmmLit -mkWordCLit wd = CmmInt (fromIntegral wd) wordRep +mkWordCLit wd = CmmInt (fromIntegral wd) wordWidth packHalfWordsCLit :: (Integral a, Integral b) => a -> b -> CmmLit -- Make a single word literal in which the lower_half_word is @@ -267,18 +265,18 @@ packHalfWordsCLit lower_half_word upper_half_word -- -------------------------------------------------------------------------- -addToMem :: MachRep -- rep of the counter +addToMem :: Width -- rep of the counter -> CmmExpr -- Address -> Int -- What to add (a word) -> CmmStmt -addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) rep)) +addToMem width ptr n = addToMemE width ptr (CmmLit (CmmInt (toInteger n) width)) -addToMemE :: MachRep -- rep of the counter +addToMemE :: Width -- rep of the counter -> CmmExpr -- Address -> CmmExpr -- What to add (a word-typed expression) -> CmmStmt -addToMemE rep ptr n - = CmmStore ptr (CmmMachOp (MO_Add rep) [CmmLoad ptr rep, n]) +addToMemE width ptr n + = CmmStore ptr (CmmMachOp (MO_Add width) [CmmLoad ptr (cmmBits width), n]) ------------------------------------------------------------------------- -- @@ -289,9 +287,9 @@ addToMemE rep ptr n tagToClosure :: TyCon -> CmmExpr -> CmmExpr tagToClosure tycon tag - = CmmLoad (cmmOffsetExprW closure_tbl tag) wordRep + = CmmLoad (cmmOffsetExprW closure_tbl tag) gcWord where closure_tbl = CmmLit (CmmLabel lbl) - lbl = mkClosureTableLabel (tyConName tycon) + lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs ------------------------------------------------------------------------- -- @@ -334,24 +332,24 @@ emitIfThenElse cond then_part else_part ; labelC join_id } -emitRtsCall :: LitString -> [CmmKinded CmmExpr] -> Bool -> Code +emitRtsCall :: LitString -> [CmmHinted CmmExpr] -> Bool -> Code emitRtsCall fun args safe = emitRtsCall' [] fun args Nothing safe -- The 'Nothing' says "save all global registers" -emitRtsCallWithVols :: LitString -> [CmmKinded CmmExpr] -> [GlobalReg] -> Bool -> Code +emitRtsCallWithVols :: LitString -> [CmmHinted CmmExpr] -> [GlobalReg] -> Bool -> Code emitRtsCallWithVols fun args vols safe = emitRtsCall' [] fun args (Just vols) safe -emitRtsCallWithResult :: LocalReg -> MachHint -> LitString - -> [CmmKinded CmmExpr] -> Bool -> Code +emitRtsCallWithResult :: LocalReg -> ForeignHint -> LitString + -> [CmmHinted CmmExpr] -> Bool -> Code emitRtsCallWithResult res hint fun args safe - = emitRtsCall' [CmmKinded res hint] fun args Nothing safe + = emitRtsCall' [CmmHinted res hint] fun args Nothing safe -- Make a call to an RTS C procedure emitRtsCall' - :: CmmFormals + :: [CmmHinted LocalReg] -> LitString - -> [CmmKinded CmmExpr] + -> [CmmHinted CmmExpr] -> Maybe [GlobalReg] -> Bool -- True <=> CmmSafe call -> Code @@ -393,7 +391,8 @@ callerSaveVolatileRegs vols = (caller_save, caller_load) vol_list = case vols of Nothing -> all_of_em; Just regs -> regs - all_of_em = [ VanillaReg n | n <- [0..mAX_Vanilla_REG] ] + all_of_em = [ VanillaReg n VNonGcPtr | n <- [0..mAX_Vanilla_REG] ] + -- The VNonGcPtr is a lie, but I don't think it matters ++ [ FloatReg n | n <- [0..mAX_Float_REG] ] ++ [ DoubleReg n | n <- [0..mAX_Double_REG] ] ++ [ LongReg n | n <- [0..mAX_Long_REG] ] @@ -407,7 +406,7 @@ callerSaveVolatileRegs vols = (caller_save, caller_load) callerRestoreGlobalReg reg next | callerSaves reg = CmmAssign (CmmGlobal reg) - (CmmLoad (get_GlobalReg_addr reg) (globalRegRep reg)) + (CmmLoad (get_GlobalReg_addr reg) (globalRegType reg)) : next | otherwise = next @@ -423,14 +422,14 @@ callerSaveVolatileRegs vols = (caller_save, caller_load) get_GlobalReg_addr :: GlobalReg -> CmmExpr get_GlobalReg_addr BaseReg = regTableOffset 0 get_GlobalReg_addr mid = get_Regtable_addr_from_offset - (globalRegRep mid) (baseRegOffset mid) + (globalRegType mid) (baseRegOffset mid) -- Calculate a literal representing an offset into the register table. -- Used when we don't have an actual BaseReg to offset from. regTableOffset n = CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n)) -get_Regtable_addr_from_offset :: MachRep -> Int -> CmmExpr +get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr get_Regtable_addr_from_offset rep offset = #ifdef REG_Base CmmRegOff (CmmGlobal BaseReg) offset @@ -448,28 +447,28 @@ callerSaves :: GlobalReg -> Bool callerSaves BaseReg = True #endif #ifdef CALLER_SAVES_R1 -callerSaves (VanillaReg 1) = True +callerSaves (VanillaReg 1 _) = True #endif #ifdef CALLER_SAVES_R2 -callerSaves (VanillaReg 2) = True +callerSaves (VanillaReg 2 _) = True #endif #ifdef CALLER_SAVES_R3 -callerSaves (VanillaReg 3) = True +callerSaves (VanillaReg 3 _) = True #endif #ifdef CALLER_SAVES_R4 -callerSaves (VanillaReg 4) = True +callerSaves (VanillaReg 4 _) = True #endif #ifdef CALLER_SAVES_R5 -callerSaves (VanillaReg 5) = True +callerSaves (VanillaReg 5 _) = True #endif #ifdef CALLER_SAVES_R6 -callerSaves (VanillaReg 6) = True +callerSaves (VanillaReg 6 _) = True #endif #ifdef CALLER_SAVES_R7 -callerSaves (VanillaReg 7) = True +callerSaves (VanillaReg 7 _) = True #endif #ifdef CALLER_SAVES_R8 -callerSaves (VanillaReg 8) = True +callerSaves (VanillaReg 8 _) = True #endif #ifdef CALLER_SAVES_F1 callerSaves (FloatReg 1) = True @@ -518,16 +517,16 @@ callerSaves _ = False baseRegOffset :: GlobalReg -> Int -baseRegOffset (VanillaReg 1) = oFFSET_StgRegTable_rR1 -baseRegOffset (VanillaReg 2) = oFFSET_StgRegTable_rR2 -baseRegOffset (VanillaReg 3) = oFFSET_StgRegTable_rR3 -baseRegOffset (VanillaReg 4) = oFFSET_StgRegTable_rR4 -baseRegOffset (VanillaReg 5) = oFFSET_StgRegTable_rR5 -baseRegOffset (VanillaReg 6) = oFFSET_StgRegTable_rR6 -baseRegOffset (VanillaReg 7) = oFFSET_StgRegTable_rR7 -baseRegOffset (VanillaReg 8) = oFFSET_StgRegTable_rR8 -baseRegOffset (VanillaReg 9) = oFFSET_StgRegTable_rR9 -baseRegOffset (VanillaReg 10) = oFFSET_StgRegTable_rR10 +baseRegOffset (VanillaReg 1 _) = oFFSET_StgRegTable_rR1 +baseRegOffset (VanillaReg 2 _) = oFFSET_StgRegTable_rR2 +baseRegOffset (VanillaReg 3 _) = oFFSET_StgRegTable_rR3 +baseRegOffset (VanillaReg 4 _) = oFFSET_StgRegTable_rR4 +baseRegOffset (VanillaReg 5 _) = oFFSET_StgRegTable_rR5 +baseRegOffset (VanillaReg 6 _) = oFFSET_StgRegTable_rR6 +baseRegOffset (VanillaReg 7 _) = oFFSET_StgRegTable_rR7 +baseRegOffset (VanillaReg 8 _) = oFFSET_StgRegTable_rR8 +baseRegOffset (VanillaReg 9 _) = oFFSET_StgRegTable_rR9 +baseRegOffset (VanillaReg 10 _) = oFFSET_StgRegTable_rR10 baseRegOffset (FloatReg 1) = oFFSET_StgRegTable_rF1 baseRegOffset (FloatReg 2) = oFFSET_StgRegTable_rF2 baseRegOffset (FloatReg 3) = oFFSET_StgRegTable_rF3 @@ -565,15 +564,15 @@ mkDataLits :: CLabel -> [CmmLit] -> GenCmmTop CmmStatic info graph mkDataLits lbl lits = CmmData Data (CmmDataLabel lbl : map CmmStaticLit lits) -emitRODataLits :: CLabel -> [CmmLit] -> Code +emitRODataLits :: String -> CLabel -> [CmmLit] -> Code -- Emit a read-only data block -emitRODataLits lbl lits +emitRODataLits caller lbl lits = emitData section (CmmDataLabel lbl : map CmmStaticLit lits) - where section | any needsRelocation lits = RelocatableReadOnlyData - | otherwise = ReadOnlyData - needsRelocation (CmmLabel _) = True - needsRelocation (CmmLabelOff _ _) = True - needsRelocation _ = False + where section | any needsRelocation lits = RelocatableReadOnlyData + | otherwise = ReadOnlyData + needsRelocation (CmmLabel _) = True + needsRelocation (CmmLabelOff _ _) = True + needsRelocation _ = False mkRODataLits :: CLabel -> [CmmLit] -> GenCmmTop CmmStatic info graph mkRODataLits lbl lits @@ -602,30 +601,17 @@ mkByteStringCLit bytes -- ------------------------------------------------------------------------- -assignNonPtrTemp :: CmmExpr -> FCode CmmExpr --- For a non-trivial expression, e, create a local --- variable and assign the expression to it -assignNonPtrTemp e - | isTrivialCmmExpr e = return e - | otherwise = do { reg <- newNonPtrTemp (cmmExprRep e) - ; stmtC (CmmAssign (CmmLocal reg) e) - ; return (CmmReg (CmmLocal reg)) } - -assignPtrTemp :: CmmExpr -> FCode CmmExpr +assignTemp :: CmmExpr -> FCode CmmExpr -- For a non-trivial expression, e, create a local -- variable and assign the expression to it -assignPtrTemp e +assignTemp e | isTrivialCmmExpr e = return e - | otherwise = do { reg <- newPtrTemp (cmmExprRep e) + | otherwise = do { reg <- newTemp (cmmExprType e) ; stmtC (CmmAssign (CmmLocal reg) e) ; return (CmmReg (CmmLocal reg)) } -newNonPtrTemp :: MachRep -> FCode LocalReg -newNonPtrTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep GCKindNonPtr) } - -newPtrTemp :: MachRep -> FCode LocalReg -newPtrTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep GCKindPtr) } - +newTemp :: CmmType -> FCode LocalReg +newTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep) } ------------------------------------------------------------------------- -- @@ -727,7 +713,7 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C -- if we can knock off a bunch of default cases with one if, then do so | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches - = do { (assign_tag, tag_expr') <- assignNonPtrTemp' tag_expr + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit lowest_branch)) branch = CmmCondBranch cond deflt ; stmts <- mk_switch tag_expr' branches mb_deflt @@ -736,7 +722,7 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C } | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches - = do { (assign_tag, tag_expr') <- assignNonPtrTemp' tag_expr + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr ; let cond = cmmUGtWord tag_expr' (CmmLit (mkIntCLit highest_branch)) branch = CmmCondBranch cond deflt ; stmts <- mk_switch tag_expr' branches mb_deflt @@ -745,7 +731,7 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C } | otherwise -- Use an if-tree - = do { (assign_tag, tag_expr') <- assignNonPtrTemp' tag_expr + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr -- To avoid duplication ; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt lo_tag (mid_tag-1) via_C @@ -810,9 +796,9 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C is_lo (t,_) = t < mid_tag -assignNonPtrTemp' e +assignTemp' e | isTrivialCmmExpr e = return (CmmNop, e) - | otherwise = do { reg <- newNonPtrTemp (cmmExprRep e) + | otherwise = do { reg <- newTemp (cmmExprType e) ; return (CmmAssign (CmmLocal reg) e, CmmReg (CmmLocal reg)) } emitLitSwitch :: CmmExpr -- Tag to switch on @@ -828,7 +814,7 @@ emitLitSwitch :: CmmExpr -- Tag to switch on emitLitSwitch scrut [] deflt = emitCgStmts deflt emitLitSwitch scrut branches deflt_blk - = do { scrut' <- assignNonPtrTemp scrut + = do { scrut' <- assignTemp scrut ; deflt_blk_id <- forkCgStmts deflt_blk ; blk <- mk_lit_switch scrut' deflt_blk_id (sortLe le branches) ; emitCgStmts blk } @@ -842,8 +828,9 @@ mk_lit_switch scrut deflt_blk_id [(lit,blk)] = return (consCgStmt if_stmt blk) where cmm_lit = mkSimpleLit lit - rep = cmmLitRep cmm_lit - cond = CmmMachOp (MO_Ne rep) [scrut, CmmLit cmm_lit] + rep = cmmLitType cmm_lit + ne = if isFloatType rep then MO_F_Ne else MO_Ne + cond = CmmMachOp (ne (typeWidth rep)) [scrut, CmmLit cmm_lit] if_stmt = CmmCondBranch cond deflt_blk_id mk_lit_switch scrut deflt_blk_id branches @@ -920,11 +907,11 @@ doSimultaneously1 vertices ; stmtC from_temp } go_via_temp (CmmAssign dest src) - = do { tmp <- newNonPtrTemp (cmmRegRep dest) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong + = do { tmp <- newTemp (cmmRegType dest) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong ; stmtC (CmmAssign (CmmLocal tmp) src) ; return (CmmAssign dest (CmmReg (CmmLocal tmp))) } go_via_temp (CmmStore dest src) - = do { tmp <- newNonPtrTemp (cmmExprRep src) -- TODO FIXME NOW if the pair of assignemnts move across a call this will be wrong + = do { tmp <- newTemp (cmmExprType src) -- TODO FIXME NOW if the pair of assignemnts move across a call this will be wrong ; stmtC (CmmAssign (CmmLocal tmp) src) ; return (CmmStore dest (CmmReg (CmmLocal tmp))) } in @@ -932,7 +919,7 @@ doSimultaneously1 vertices mustFollow :: CmmStmt -> CmmStmt -> Bool CmmAssign reg _ `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt -CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprRep e)) stmt +CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprType e)) stmt CmmNop `mustFollow` stmt = False CmmComment _ `mustFollow` stmt = False @@ -952,7 +939,7 @@ reg `regUsedIn` CmmReg reg' = reg == reg' reg `regUsedIn` CmmRegOff reg' _ = reg == reg' reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es -locUsedIn :: CmmExpr -> MachRep -> CmmExpr -> Bool +locUsedIn :: CmmExpr -> CmmType -> CmmExpr -> Bool -- (locUsedIn a r e) checks whether writing to r[a] could affect the value of -- 'e'. Returns True if it's not sure. locUsedIn loc rep (CmmLit _) = False @@ -961,7 +948,7 @@ locUsedIn loc rep (CmmReg reg') = False locUsedIn loc rep (CmmRegOff reg' _) = False locUsedIn loc rep (CmmMachOp _ es) = any (locUsedIn loc rep) es -possiblySameLoc :: CmmExpr -> MachRep -> CmmExpr -> MachRep -> Bool +possiblySameLoc :: CmmExpr -> CmmType -> CmmExpr -> CmmType -> Bool -- Assumes that distinct registers (eg Hp, Sp) do not -- point to the same location, nor any offset thereof. possiblySameLoc (CmmReg r1) rep1 (CmmReg r2) rep2 = r1==r2 @@ -970,8 +957,8 @@ possiblySameLoc (CmmRegOff r1 0) rep1 (CmmReg r2) rep2 = r1==r2 possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2 = r1==r2 && end1 > start2 && end2 > start1 where - end1 = start1 + machRepByteWidth rep1 - end2 = start2 + machRepByteWidth rep2 + end1 = start1 + widthInBytes (typeWidth rep1) + end2 = start2 + widthInBytes (typeWidth rep2) possiblySameLoc l1 rep1 (CmmLit _) rep2 = False possiblySameLoc l1 rep1 l2 rep2 = True -- Conservative @@ -999,7 +986,7 @@ getSRTInfo = do | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape] -> do id <- newUnique let srt_desc_lbl = mkLargeSRTLabel id - emitRODataLits srt_desc_lbl + emitRODataLits "getSRTInfo" srt_desc_lbl ( cmmLabelOffW srt_lbl off : mkWordCLit (fromIntegral len) : map mkWordCLit bmp) @@ -1011,3 +998,9 @@ getSRTInfo = do -- The fromIntegral converts to StgHalfWord srt_escape = (-1) :: StgHalfWord + +clHasCafRefs :: ClosureInfo -> CafInfo +clHasCafRefs (ClosureInfo {closureSRT = srt}) = + case srt of NoC_SRT -> NoCafRefs + _ -> MayHaveCafRefs +clHasCafRefs (ConInfo {}) = NoCafRefs diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs index dcb41b4cc4..df32299c2a 100644 --- a/compiler/codeGen/ClosureInfo.lhs +++ b/compiler/codeGen/ClosureInfo.lhs @@ -37,7 +37,7 @@ module ClosureInfo ( slopSize, closureName, infoTableLabelFromCI, - closureLabelFromCI, closureSRT, + closureLabelFromCI, closureLFInfo, isLFThunk,closureSMRep, closureUpdReqd, closureNeedsUpdSpace, closureIsThunk, closureSingleEntry, closureReEntrant, isConstrClosure_maybe, @@ -76,6 +76,7 @@ import Packages import PackageConfig import StaticFlags import Id +import IdInfo import DataCon import Name import OccName @@ -576,28 +577,29 @@ data CallMethod Int -- Its arity getCallMethod :: Name -- Function being applied + -> CafInfo -- Can it refer to CAF's? -> LambdaFormInfo -- Its info -> Int -- Number of available arguments -> CallMethod -getCallMethod name lf_info n_args +getCallMethod name _ lf_info n_args | nodeMustPointToIt lf_info && opt_Parallel = -- If we're parallel, then we must always enter via node. -- The reason is that the closure may have been -- fetched since we allocated it. EnterIt -getCallMethod name (LFReEntrant _ arity _ _) n_args +getCallMethod name caf (LFReEntrant _ arity _ _) n_args | n_args == 0 = ASSERT( arity /= 0 ) ReturnIt -- No args at all | n_args < arity = SlowCall -- Not enough args - | otherwise = DirectEntry (enterIdLabel name) arity + | otherwise = DirectEntry (enterIdLabel name caf) arity -getCallMethod name (LFCon con) n_args +getCallMethod name _ (LFCon con) n_args = ASSERT( n_args == 0 ) ReturnCon con -getCallMethod name (LFThunk _ _ updatable std_form_info is_fun) n_args +getCallMethod name caf (LFThunk _ _ updatable std_form_info is_fun) n_args | is_fun -- it *might* be a function, so we must "call" it (which is -- always safe) = SlowCall -- We cannot just enter it [in eval/apply, the entry code @@ -620,12 +622,12 @@ getCallMethod name (LFThunk _ _ updatable std_form_info is_fun) n_args | otherwise -- Jump direct to code for single-entry thunks = ASSERT( n_args == 0 ) - JumpToIt (thunkEntryLabel name std_form_info updatable) + JumpToIt (thunkEntryLabel name caf std_form_info updatable) -getCallMethod name (LFUnknown True) n_args +getCallMethod name _ (LFUnknown True) n_args = SlowCall -- Might be a function -getCallMethod name (LFUnknown False) n_args +getCallMethod name _ (LFUnknown False) n_args | n_args > 0 = WARN( True, ppr name <+> ppr n_args ) SlowCall -- Note [Unsafe coerce complications] @@ -633,15 +635,15 @@ getCallMethod name (LFUnknown False) n_args | otherwise = EnterIt -- Not a function -getCallMethod name (LFBlackHole _) n_args +getCallMethod name _ (LFBlackHole _) n_args = SlowCall -- Presumably the black hole has by now -- been updated, but we don't know with -- what, so we slow call it -getCallMethod name (LFLetNoEscape 0) n_args +getCallMethod name _ (LFLetNoEscape 0) n_args = JumpToIt (enterReturnPtLabel (nameUnique name)) -getCallMethod name (LFLetNoEscape arity) n_args +getCallMethod name _ (LFLetNoEscape arity) n_args | n_args == arity = DirectEntry (enterReturnPtLabel (nameUnique name)) arity | otherwise = pprPanic "let-no-escape: " (ppr name <+> ppr arity) @@ -882,10 +884,10 @@ isToplevClosure _ = False Label generation. \begin{code} -infoTableLabelFromCI :: ClosureInfo -> CLabel +infoTableLabelFromCI :: ClosureInfo -> CafInfo -> CLabel infoTableLabelFromCI (ClosureInfo { closureName = name, closureLFInfo = lf_info, - closureSMRep = rep }) + closureSMRep = rep }) caf = case lf_info of LFBlackHole info -> info @@ -895,32 +897,32 @@ infoTableLabelFromCI (ClosureInfo { closureName = name, LFThunk _ _ upd_flag (ApThunk arity) _ -> mkApInfoTableLabel upd_flag arity - LFThunk{} -> mkLocalInfoTableLabel name + LFThunk{} -> mkLocalInfoTableLabel name caf - LFReEntrant _ _ _ _ -> mkLocalInfoTableLabel name + LFReEntrant _ _ _ _ -> mkLocalInfoTableLabel name caf other -> panic "infoTableLabelFromCI" infoTableLabelFromCI (ConInfo { closureCon = con, - closureSMRep = rep }) - | isStaticRep rep = mkStaticInfoTableLabel name - | otherwise = mkConInfoTableLabel name + closureSMRep = rep }) caf + | isStaticRep rep = mkStaticInfoTableLabel name caf + | otherwise = mkConInfoTableLabel name caf where name = dataConName con -- ClosureInfo for a closure (as opposed to a constructor) is always local -closureLabelFromCI (ClosureInfo { closureName = nm }) = mkLocalClosureLabel nm -closureLabelFromCI _ = panic "closureLabelFromCI" +closureLabelFromCI (ClosureInfo { closureName = nm }) caf = mkLocalClosureLabel nm caf +closureLabelFromCI _ _ = panic "closureLabelFromCI" -- thunkEntryLabel is a local help function, not exported. It's used from both -- entryLabelFromCI and getCallMethod. -thunkEntryLabel thunk_id (ApThunk arity) is_updatable +thunkEntryLabel thunk_id _ (ApThunk arity) is_updatable = enterApLabel is_updatable arity -thunkEntryLabel thunk_id (SelectorThunk offset) upd_flag +thunkEntryLabel thunk_id _ (SelectorThunk offset) upd_flag = enterSelectorLabel upd_flag offset -thunkEntryLabel thunk_id _ is_updatable - = enterIdLabel thunk_id +thunkEntryLabel thunk_id caf _ is_updatable + = enterIdLabel thunk_id caf enterApLabel is_updatable arity | tablesNextToCode = mkApInfoTableLabel is_updatable arity diff --git a/compiler/codeGen/CodeGen.lhs b/compiler/codeGen/CodeGen.lhs index 4221342d4f..14d745780d 100644 --- a/compiler/codeGen/CodeGen.lhs +++ b/compiler/codeGen/CodeGen.lhs @@ -38,7 +38,6 @@ import CLabel import Cmm import CmmUtils import PprCmm -import MachOp import StgSyn import PrelNames @@ -51,6 +50,7 @@ import CostCentre import Id import Name import OccName +import Outputable import TyCon import Module import ErrUtils @@ -198,7 +198,7 @@ mkModuleInit way cost_centre_info this_mod main_mod imported_mods hpc_info jump_to_init = stmtC (CmmJump (mkLblExpr real_init_lbl) []) - mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) wordRep + mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) bWord -- Main refers to GHC.TopHandler.runIO, so make sure we call the -- init function for GHC.TopHandler. @@ -224,7 +224,7 @@ mkModuleInit way cost_centre_info this_mod main_mod imported_mods hpc_info -- The return-code pops the work stack by -- incrementing Sp, and then jumpd to the popped item ret_code = stmtsC [ CmmAssign spReg (cmmRegOffW spReg 1) - , CmmJump (CmmLoad (cmmRegOffW spReg (-1)) wordRep) [] ] + , CmmJump (CmmLoad (cmmRegOffW spReg (-1)) bWord) [] ] rec_descent_init = if opt_SccProfilingOn || isHpcUsed hpc_info @@ -303,8 +303,8 @@ mkSRT these (id,[]) = nopC mkSRT these (id,ids) = do { ids <- mapFCs remap ids ; id <- remap id - ; emitRODataLits (mkSRTLabel (idName id)) - (map (CmmLabel . mkClosureLabel . idName) ids) + ; emitRODataLits "CodeGen.mkSRT" (mkSRTLabel (idName id) (idCafInfo id)) + (map (\id -> CmmLabel $ mkClosureLabel (idName id) (idCafInfo id)) ids) } where -- Sigh, better map all the ids against the environment in @@ -326,7 +326,7 @@ cgTopRhs bndr (StgRhsCon cc con args) cgTopRhs bndr (StgRhsClosure cc bi fvs upd_flag srt args body) = ASSERT(null fvs) -- There should be no free variables - setSRTLabel (mkSRTLabel (idName bndr)) $ + setSRTLabel (mkSRTLabel (idName bndr) (idCafInfo bndr)) $ setSRT srt $ forkStatics (cgTopRhsClosure bndr cc bi upd_flag args body) \end{code} diff --git a/compiler/codeGen/SMRep.lhs b/compiler/codeGen/SMRep.lhs index 28d17079e5..987562c364 100644 --- a/compiler/codeGen/SMRep.lhs +++ b/compiler/codeGen/SMRep.lhs @@ -24,14 +24,15 @@ module SMRep ( -- Argument/return representations CgRep(..), nonVoidArg, - argMachRep, primRepToCgRep, primRepHint, + argMachRep, primRepToCgRep, +-- Temp primRepHint, typeHint, isFollowableArg, isVoidArg, isFloatingArg, is64BitArg, separateByPtrFollowness, cgRepSizeW, cgRepSizeB, retAddrSizeW, - typeCgRep, idCgRep, tyConCgRep, typeHint, + typeCgRep, idCgRep, tyConCgRep, -- Closure repesentation SMRep(..), ClosureType(..), @@ -45,10 +46,10 @@ module SMRep ( #include "../includes/MachDeps.h" +import CmmExpr -- CmmType and friends import Id import Type import TyCon -import MachOp import StaticFlags import Constants import Outputable @@ -136,12 +137,12 @@ instance Outputable CgRep where ppr FloatArg = ptext (sLit "F_") ppr DoubleArg = ptext (sLit "D_") -argMachRep :: CgRep -> MachRep -argMachRep PtrArg = wordRep -argMachRep NonPtrArg = wordRep -argMachRep LongArg = I64 -argMachRep FloatArg = F32 -argMachRep DoubleArg = F64 +argMachRep :: CgRep -> CmmType +argMachRep PtrArg = gcWord +argMachRep NonPtrArg = bWord +argMachRep LongArg = b64 +argMachRep FloatArg = f32 +argMachRep DoubleArg = f64 argMachRep VoidArg = panic "argMachRep:VoidRep" primRepToCgRep :: PrimRep -> CgRep @@ -155,17 +156,6 @@ primRepToCgRep AddrRep = NonPtrArg primRepToCgRep FloatRep = FloatArg primRepToCgRep DoubleRep = DoubleArg -primRepHint :: PrimRep -> MachHint -primRepHint VoidRep = panic "primRepHint:VoidRep" -primRepHint PtrRep = PtrHint -primRepHint IntRep = SignedHint -primRepHint WordRep = NoHint -primRepHint Int64Rep = SignedHint -primRepHint Word64Rep = NoHint -primRepHint AddrRep = PtrHint -- NB! PtrHint, but NonPtrArg -primRepHint FloatRep = FloatHint -primRepHint DoubleRep = FloatHint - idCgRep :: Id -> CgRep idCgRep x = typeCgRep . idType $ x @@ -174,9 +164,6 @@ tyConCgRep = primRepToCgRep . tyConPrimRep typeCgRep :: Type -> CgRep typeCgRep = primRepToCgRep . typePrimRep - -typeHint :: Type -> MachHint -typeHint = primRepHint . typePrimRep \end{code} Whether or not the thing is a pointer that the garbage-collector diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs new file mode 100644 index 0000000000..56cd1d5555 --- /dev/null +++ b/compiler/codeGen/StgCmm.hs @@ -0,0 +1,400 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmm ( codeGen ) where + +#define FAST_STRING_NOT_NEEDED +#include "HsVersions.h" + +import StgCmmProf +import StgCmmMonad +import StgCmmEnv +import StgCmmBind +import StgCmmCon +import StgCmmLayout +import StgCmmHeap +import StgCmmUtils +import StgCmmClosure +import StgCmmHpc +import StgCmmTicky + +import MkZipCfgCmm +import Cmm +import CmmUtils +import CLabel +import PprCmm + +import StgSyn +import PrelNames +import DynFlags +import StaticFlags + +import HscTypes +import CostCentre +import Id +import IdInfo +import Type +import DataCon +import Name +import OccName +import TyCon +import Module +import ErrUtils +import Outputable + +codeGen :: DynFlags + -> Module + -> [TyCon] + -> [Module] -- Directly-imported modules + -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering. + -> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs + -> HpcInfo + -> IO [CmmZ] -- Output + +codeGen dflags this_mod data_tycons imported_mods + cost_centre_info stg_binds hpc_info + = do { showPass dflags "New CodeGen" + ; let way = buildTag dflags + main_mod = mainModIs dflags + +-- Why? +-- ; mapM_ (\x -> seq x (return ())) data_tycons + + ; code_stuff <- initC dflags this_mod $ do + { cmm_binds <- mapM (getCmm . cgTopBinding dflags) stg_binds + ; cmm_tycons <- mapM cgTyCon data_tycons + ; cmm_init <- getCmm (mkModuleInit way cost_centre_info + this_mod main_mod + imported_mods hpc_info) + ; return (cmm_binds ++ concat cmm_tycons ++ [cmm_init]) + } + -- Put datatype_stuff after code_stuff, because the + -- datatype closure table (for enumeration types) to + -- (say) PrelBase_True_closure, which is defined in + -- code_stuff + + -- N.B. returning '[Cmm]' and not 'Cmm' here makes it + -- possible for object splitting to split up the + -- pieces later. + + ; dumpIfSet_dyn dflags Opt_D_dump_cmmz "New Cmm" (pprCmms code_stuff) + + ; return code_stuff } + + +--------------------------------------------------------------- +-- Top-level bindings +--------------------------------------------------------------- + +{- 'cgTopBinding' is only used for top-level bindings, since they need +to be allocated statically (not in the heap) and need to be labelled. +No unboxed bindings can happen at top level. + +In the code below, the static bindings are accumulated in the +@MkCgState@, and transferred into the ``statics'' slot by @forkStatics@. +This is so that we can write the top level processing in a compositional +style, with the increasing static environment being plumbed as a state +variable. -} + +cgTopBinding :: DynFlags -> (StgBinding,[(Id,[Id])]) -> FCode () +cgTopBinding dflags (StgNonRec id rhs, _srts) + = do { id' <- maybeExternaliseId dflags id + --; mapM_ (mkSRT [id']) srts + ; (id,info) <- cgTopRhs id' rhs + ; addBindC id info -- Add the *un-externalised* Id to the envt, + -- so we find it when we look up occurrences + } + +cgTopBinding dflags (StgRec pairs, _srts) + = do { let (bndrs, rhss) = unzip pairs + ; bndrs' <- mapFCs (maybeExternaliseId dflags) bndrs + ; let pairs' = zip bndrs' rhss + --; mapM_ (mkSRT bndrs') srts + ; fixC (\ new_binds -> do + { addBindsC new_binds + ; mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' }) + ; return () } + +--mkSRT :: [Id] -> (Id,[Id]) -> FCode () +--mkSRT these (id,ids) +-- | null ids = nopC +-- | otherwise +-- = do { ids <- mapFCs remap ids +-- ; id <- remap id +-- ; emitRODataLits (mkSRTLabel (idName id) (idCafInfo id)) +-- (map (\id -> CmmLabel $ mkClosureLabel (idName id) (idCafInfo id)) ids) +-- } +-- where +-- -- Sigh, better map all the ids against the environment in +-- -- case they've been externalised (see maybeExternaliseId below). +-- remap id = case filter (==id) these of +-- (id':_) -> returnFC id' +-- [] -> do { info <- getCgIdInfo id; return (cgIdInfoId info) } + +-- Urgh! I tried moving the forkStatics call from the rhss of cgTopRhs +-- to enclose the listFCs in cgTopBinding, but that tickled the +-- statics "error" call in initC. I DON'T UNDERSTAND WHY! + +cgTopRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo) + -- The Id is passed along for setting up a binding... + -- It's already been externalised if necessary + +cgTopRhs bndr (StgRhsCon _cc con args) + = forkStatics (cgTopRhsCon bndr con args) + +cgTopRhs bndr (StgRhsClosure cc bi fvs upd_flag srt args body) + = ASSERT(null fvs) -- There should be no free variables + setSRTLabel (mkSRTLabel (idName bndr) (idCafInfo bndr)) $ + forkStatics (cgTopRhsClosure bndr cc bi upd_flag srt args body) + + + +--------------------------------------------------------------- +-- Module initialisation code +--------------------------------------------------------------- + +{- The module initialisation code looks like this, roughly: + + FN(__stginit_Foo) { + JMP_(__stginit_Foo_1_p) + } + + FN(__stginit_Foo_1_p) { + ... + } + + We have one version of the init code with a module version and the + 'way' attached to it. The version number helps to catch cases + where modules are not compiled in dependency order before being + linked: if a module has been compiled since any modules which depend on + it, then the latter modules will refer to a different version in their + init blocks and a link error will ensue. + + The 'way' suffix helps to catch cases where modules compiled in different + ways are linked together (eg. profiled and non-profiled). + + We provide a plain, unadorned, version of the module init code + which just jumps to the version with the label and way attached. The + reason for this is that when using foreign exports, the caller of + startupHaskell() must supply the name of the init function for the "top" + module in the program, and we don't want to require that this name + has the version and way info appended to it. + +We initialise the module tree by keeping a work-stack, + * pointed to by Sp + * that grows downward + * Sp points to the last occupied slot +-} + +mkModuleInit + :: String -- the "way" + -> CollectedCCs -- cost centre info + -> Module + -> Module -- name of the Main module + -> [Module] + -> HpcInfo + -> FCode () +mkModuleInit way cost_centre_info this_mod main_mod imported_mods hpc_info + = do { -- Allocate the static boolean that records if this + -- module has been registered already + emitData Data [CmmDataLabel moduleRegdLabel, + CmmStaticLit zeroCLit] + + ; init_hpc <- initHpc this_mod hpc_info + ; init_prof <- initCostCentres cost_centre_info + + -- We emit a recursive descent module search for all modules + -- and *choose* to chase it in :Main, below. + -- In this way, Hpc enabled modules can interact seamlessly with + -- not Hpc enabled moduled, provided Main is compiled with Hpc. + + ; emitSimpleProc real_init_lbl $ withFreshLabel "ret_block" $ \retId -> catAGraphs + [ check_already_done retId + , init_prof + , init_hpc + , catAGraphs $ map (registerImport way) all_imported_mods + , mkBranch retId ] + -- Make the "plain" procedure jump to the "real" init procedure + ; emitSimpleProc plain_init_lbl jump_to_init + + -- When compiling the module in which the 'main' function lives, + -- (that is, this_mod == main_mod) + -- we inject an extra stg_init procedure for stg_init_ZCMain, for the + -- RTS to invoke. We must consult the -main-is flag in case the + -- user specified a different function to Main.main + + -- Notice that the recursive descent is optional, depending on what options + -- are enabled. + + + ; whenC (this_mod == main_mod) + (emitSimpleProc plain_main_init_lbl rec_descent_init) + } + where + plain_init_lbl = mkPlainModuleInitLabel this_mod + real_init_lbl = mkModuleInitLabel this_mod way + plain_main_init_lbl = mkPlainModuleInitLabel rOOT_MAIN + + jump_to_init = mkJump (mkLblExpr real_init_lbl) [] + + + -- Main refers to GHC.TopHandler.runIO, so make sure we call the + -- init function for GHC.TopHandler. + extra_imported_mods + | this_mod == main_mod = [gHC_TOP_HANDLER] + | otherwise = [] + all_imported_mods = imported_mods ++ extra_imported_mods + + mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) bWord + check_already_done retId + = mkCmmIfThenElse (cmmNeWord (CmmLit zeroCLit) mod_reg_val) + (mkLabel retId Nothing <*> mkReturn []) mkNop + <*> -- Set mod_reg to 1 to record that we've been here + mkStore (mkLblExpr moduleRegdLabel) (CmmLit (mkIntCLit 1)) + + -- The return-code pops the work stack by + -- incrementing Sp, and then jumpd to the popped item + ret_code = mkAssign spReg (cmmRegOffW spReg 1) + <*> mkJump (CmmLoad (cmmRegOffW spReg (-1)) bWord) [] + + rec_descent_init = if opt_SccProfilingOn || isHpcUsed hpc_info + then jump_to_init + else ret_code + +----------------------- +registerImport :: String -> Module -> CmmAGraph +registerImport way mod + | mod == gHC_PRIM + = mkNop + | otherwise -- Push the init procedure onto the work stack + = mkCmmCall init_lbl [] [] NoC_SRT + where + init_lbl = mkLblExpr $ mkModuleInitLabel mod way + + + +--------------------------------------------------------------- +-- Generating static stuff for algebraic data types +--------------------------------------------------------------- + +{- [These comments are rather out of date] + +Macro Kind of constructor +CONST_INFO_TABLE@ Zero arity (no info -- compiler uses static closure) +CHARLIKE_INFO_TABLE Charlike (no info -- compiler indexes fixed array) +INTLIKE_INFO_TABLE Intlike; the one macro generates both info tbls +SPEC_INFO_TABLE SPECish, and bigger than or equal to MIN_UPD_SIZE +GEN_INFO_TABLE GENish (hence bigger than or equal to MIN_UPD_SIZE@) + +Possible info tables for constructor con: + +* _con_info: + Used for dynamically let(rec)-bound occurrences of + the constructor, and for updates. For constructors + which are int-like, char-like or nullary, when GC occurs, + the closure tries to get rid of itself. + +* _static_info: + Static occurrences of the constructor macro: STATIC_INFO_TABLE. + +For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure; +it's place is taken by the top level defn of the constructor. + +For charlike and intlike closures there is a fixed array of static +closures predeclared. +-} + +cgTyCon :: TyCon -> FCode [CmmZ] -- All constructors merged together +cgTyCon tycon + = do { constrs <- mapM (getCmm . cgDataCon) (tyConDataCons tycon) + + -- Generate a table of static closures for an enumeration type + -- Put the table after the data constructor decls, because the + -- datatype closure table (for enumeration types) + -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff + -- Note that the closure pointers are tagged. + + -- N.B. comment says to put table after constructor decls, but + -- code puts it before --- NR 16 Aug 2007 + ; extra <- cgEnumerationTyCon tycon + + ; return (extra ++ constrs) + } + +cgEnumerationTyCon :: TyCon -> FCode [CmmZ] +cgEnumerationTyCon tycon + | isEnumerationTyCon tycon + = do { tbl <- getCmm $ + emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs) + [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs) + (tagForCon con) + | con <- tyConDataCons tycon] + ; return [tbl] } + | otherwise + = return [] + +cgDataCon :: DataCon -> FCode () +-- Generate the entry code, info tables, and (for niladic constructor) +-- the static closure, for a constructor. +cgDataCon data_con + = do { let + -- To allow the debuggers, interpreters, etc to cope with + -- static data structures (ie those built at compile + -- time), we take care that info-table contains the + -- information we need. + (static_cl_info, _) = layOutStaticConstr data_con arg_reps + (dyn_cl_info, arg_things) = layOutDynConstr data_con arg_reps + + emit_info cl_info ticky_code + = do { code_blks <- getCode (mk_code ticky_code) + ; emitClosureCodeAndInfoTable cl_info [] code_blks } + + mk_code ticky_code + = -- NB: We don't set CC when entering data (WDP 94/06) + do { ticky_code + ; ldvEnter (CmmReg nodeReg) + ; tickyReturnOldCon (length arg_things) + ; emitReturn [cmmOffsetB (CmmReg nodeReg) + (tagForCon data_con)] } + -- The case continuation code expects a tagged pointer + + arg_reps :: [(PrimRep, Type)] + arg_reps = [(typePrimRep ty, ty) | ty <- dataConRepArgTys data_con] + + -- Dynamic closure code for non-nullary constructors only + ; whenC (not (isNullaryRepDataCon data_con)) + (emit_info dyn_cl_info tickyEnterDynCon) + + -- Dynamic-Closure first, to reduce forward references + ; emit_info static_cl_info tickyEnterStaticCon } + + +--------------------------------------------------------------- +-- Stuff to support splitting +--------------------------------------------------------------- + +-- If we're splitting the object, we need to externalise all the +-- top-level names (and then make sure we only use the externalised +-- one in any C label we use which refers to this name). + +maybeExternaliseId :: DynFlags -> Id -> FCode Id +maybeExternaliseId dflags id + | dopt Opt_SplitObjs dflags, -- Externalise the name for -split-objs + isInternalName name = do { mod <- getModuleName + ; returnFC (setIdName id (externalise mod)) } + | otherwise = returnFC id + where + externalise mod = mkExternalName uniq mod new_occ loc + name = idName id + uniq = nameUnique name + new_occ = mkLocalOcc uniq (nameOccName name) + loc = nameSrcSpan name + -- We want to conjure up a name that can't clash with any + -- existing name. So we generate + -- Mod_$L243foo + -- where 243 is the unique. diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs new file mode 100644 index 0000000000..0e8d853969 --- /dev/null +++ b/compiler/codeGen/StgCmmBind.hs @@ -0,0 +1,615 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation: bindings +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmBind ( + cgTopRhsClosure, + cgBind, + emitBlackHoleCode + ) where + +#include "HsVersions.h" + +import StgCmmMonad +import StgCmmExpr +import StgCmmEnv +import StgCmmCon +import StgCmmHeap +import StgCmmProf +import StgCmmTicky +import StgCmmGran +import StgCmmLayout +import StgCmmUtils +import StgCmmClosure + +import MkZipCfgCmm +import CoreSyn ( AltCon(..) ) +import SMRep +import Cmm +import CmmUtils +import CLabel +import StgSyn +import CostCentre +import Id +import Name +import Module +import ListSetOps +import Util +import BasicTypes +import Constants +import Outputable +import FastString +import Maybes + +import Data.List + +------------------------------------------------------------------------ +-- Top-level bindings +------------------------------------------------------------------------ + +-- For closures bound at top level, allocate in static space. +-- They should have no free variables. + +cgTopRhsClosure :: Id + -> CostCentreStack -- Optional cost centre annotation + -> StgBinderInfo + -> UpdateFlag + -> SRT + -> [Id] -- Args + -> StgExpr + -> FCode (Id, CgIdInfo) + +cgTopRhsClosure id ccs binder_info upd_flag srt args body = do + { -- LAY OUT THE OBJECT + let name = idName id + ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args + ; srt_info <- getSRTInfo srt + ; mod_name <- getModuleName + ; let descr = closureDescription mod_name name + closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr + closure_label = mkLocalClosureLabel name (idCafInfo id) + cg_id_info = litIdInfo id lf_info (CmmLabel closure_label) + closure_rep = mkStaticClosureFields closure_info ccs True [] + + -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY) + ; emitDataLits closure_label closure_rep + ; forkClosureBody $ do + { node <- bindToReg id lf_info + ; closureCodeBody binder_info closure_info + ccs srt_info node args body } + + ; returnFC (id, cg_id_info) } + +------------------------------------------------------------------------ +-- Non-top-level bindings +------------------------------------------------------------------------ + +cgBind :: StgBinding -> FCode () +cgBind (StgNonRec name rhs) + = do { (name, info) <- cgRhs name rhs + ; addBindC name info } + +cgBind (StgRec pairs) + = do { new_binds <- fixC (\ new_binds -> + do { addBindsC new_binds + ; listFCs [ cgRhs b e | (b,e) <- pairs ] }) + ; addBindsC new_binds } + +-------------------- +cgRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo) + -- The Id is passed along so a binding can be set up + +cgRhs name (StgRhsCon maybe_cc con args) + = do { idinfo <- buildDynCon name maybe_cc con args + ; return (name, idinfo) } + +cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) + = mkRhsClosure name cc bi fvs upd_flag srt args body + +------------------------------------------------------------------------ +-- Non-constructor right hand sides +------------------------------------------------------------------------ + +mkRhsClosure :: Id -> CostCentreStack -> StgBinderInfo + -> [Id] -- Free vars + -> UpdateFlag -> SRT + -> [Id] -- Args + -> StgExpr + -> FCode (Id, CgIdInfo) + +{- mkRhsClosure looks for two special forms of the right-hand side: + a) selector thunks + b) AP thunks + +If neither happens, it just calls mkClosureLFInfo. You might think +that mkClosureLFInfo should do all this, but it seems wrong for the +latter to look at the structure of an expression + +Note [Selectors] +~~~~~~~~~~~~~~~~ +We look at the body of the closure to see if it's a selector---turgid, +but nothing deep. We are looking for a closure of {\em exactly} the +form: + +... = [the_fv] \ u [] -> + case the_fv of + con a_1 ... a_n -> a_i + +Note [Ap thunks] +~~~~~~~~~~~~~~~~ +A more generic AP thunk of the form + + x = [ x_1...x_n ] \.. [] -> x_1 ... x_n + +A set of these is compiled statically into the RTS, so we just use +those. We could extend the idea to thunks where some of the x_i are +global ids (and hence not free variables), but this would entail +generating a larger thunk. It might be an option for non-optimising +compilation, though. + +We only generate an Ap thunk if all the free variables are pointers, +for semi-obvious reasons. + +-} + +---------- Note [Selectors] ------------------ +mkRhsClosure bndr cc bi + [the_fv] -- Just one free var + upd_flag -- Updatable thunk + _srt + [] -- A thunk + body@(StgCase (StgApp scrutinee [{-no args-}]) + _ _ _ _ -- ignore uniq, etc. + (AlgAlt _) + [(DataAlt con, params, _use_mask, + (StgApp selectee [{-no args-}]))]) + | the_fv == scrutinee -- Scrutinee is the only free variable + && maybeToBool maybe_offset -- Selectee is a component of the tuple + && offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough + = -- NOT TRUE: ASSERT(is_single_constructor) + -- The simplifier may have statically determined that the single alternative + -- is the only possible case and eliminated the others, even if there are + -- other constructors in the datatype. It's still ok to make a selector + -- thunk in this case, because we *know* which constructor the scrutinee + -- will evaluate to. + -- + -- srt is discarded; it must be empty + cgStdThunk bndr cc bi body lf_info [StgVarArg the_fv] + where + lf_info = mkSelectorLFInfo bndr offset_into_int + (isUpdatable upd_flag) + (_, params_w_offsets) = layOutDynConstr con (addIdReps params) + -- Just want the layout + maybe_offset = assocMaybe params_w_offsets selectee + Just the_offset = maybe_offset + offset_into_int = the_offset - fixedHdrSize + +---------- Note [Ap thunks] ------------------ +mkRhsClosure bndr cc bi + fvs + upd_flag + _srt + [] -- No args; a thunk + body@(StgApp fun_id args) + + | args `lengthIs` (arity-1) + && all isFollowableArg (map idCgRep fvs) + && isUpdatable upd_flag + && arity <= mAX_SPEC_AP_SIZE + + -- Ha! an Ap thunk + = cgStdThunk bndr cc bi body lf_info payload + where + lf_info = mkApLFInfo bndr upd_flag arity + -- the payload has to be in the correct order, hence we can't + -- just use the fvs. + payload = StgVarArg fun_id : args + arity = length fvs + +---------- Default case ------------------ +mkRhsClosure bndr cc bi fvs upd_flag srt args body + = do { -- LAY OUT THE OBJECT + -- If the binder is itself a free variable, then don't store + -- it in the closure. Instead, just bind it to Node on entry. + -- NB we can be sure that Node will point to it, because we + -- havn't told mkClosureLFInfo about this; so if the binder + -- _was_ a free var of its RHS, mkClosureLFInfo thinks it *is* + -- stored in the closure itself, so it will make sure that + -- Node points to it... + ; let + is_elem = isIn "cgRhsClosure" + bndr_is_a_fv = bndr `is_elem` fvs + reduced_fvs | bndr_is_a_fv = fvs `minusList` [bndr] + | otherwise = fvs + + + -- MAKE CLOSURE INFO FOR THIS CLOSURE + ; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args + ; mod_name <- getModuleName + ; c_srt <- getSRTInfo srt + ; let name = idName bndr + descr = closureDescription mod_name name + fv_details :: [(Id, VirtualHpOffset)] + (tot_wds, ptr_wds, fv_details) + = mkVirtHeapOffsets (isLFThunk lf_info) + (addIdReps reduced_fvs) + closure_info = mkClosureInfo False -- Not static + bndr lf_info tot_wds ptr_wds + c_srt descr + + -- BUILD ITS INFO TABLE AND CODE + ; forkClosureBody $ do + { -- Bind the binder itself + -- It does no harm to have it in the envt even if + -- it's not a free variable; and we need a reg for it + node <- bindToReg bndr lf_info + + -- Bind the free variables + ; mapCs (bind_fv node) fv_details + + -- And compile the body + ; closureCodeBody bi closure_info cc c_srt node args body } + + -- BUILD THE OBJECT + ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body + ; emit (mkComment $ mkFastString "calling allocDynClosure") + ; tmp <- allocDynClosure closure_info use_cc blame_cc + (mapFst StgVarArg fv_details) + + -- RETURN + ; return (bndr, regIdInfo bndr lf_info tmp) } + where + -- A function closure pointer may be tagged, so we + -- must take it into account when accessing the free variables. + tag = tagForArity (length args) + + bind_fv node (id, off) + = do { reg <- rebindToReg id + ; emit $ mkTaggedObjectLoad reg node off tag } + +------------------------- +cgStdThunk + :: Id + -> CostCentreStack -- Optional cost centre annotation + -> StgBinderInfo -- XXX: not used?? + -> StgExpr + -> LambdaFormInfo + -> [StgArg] -- payload + -> FCode (Id, CgIdInfo) + +cgStdThunk bndr cc _bndr_info body lf_info payload + = do -- AHA! A STANDARD-FORM THUNK + { -- LAY OUT THE OBJECT + mod_name <- getModuleName + ; let (tot_wds, ptr_wds, payload_w_offsets) + = mkVirtHeapOffsets (isLFThunk lf_info) (addArgReps payload) + + descr = closureDescription mod_name (idName bndr) + closure_info = mkClosureInfo False -- Not static + bndr lf_info tot_wds ptr_wds + NoC_SRT -- No SRT for a std-form closure + descr + + ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body + + -- BUILD THE OBJECT + ; tmp <- allocDynClosure closure_info use_cc blame_cc payload_w_offsets + + -- RETURN + ; returnFC (bndr, regIdInfo bndr lf_info tmp) } + +mkClosureLFInfo :: Id -- The binder + -> TopLevelFlag -- True of top level + -> [Id] -- Free vars + -> UpdateFlag -- Update flag + -> [Id] -- Args + -> FCode LambdaFormInfo +mkClosureLFInfo bndr top fvs upd_flag args + | null args = return (mkLFThunk (idType bndr) top fvs upd_flag) + | otherwise = do { arg_descr <- mkArgDescr (idName bndr) args + ; return (mkLFReEntrant top fvs args arg_descr) } + + +------------------------------------------------------------------------ +-- The code for closures} +------------------------------------------------------------------------ + +closureCodeBody :: StgBinderInfo -- XXX: unused? + -> ClosureInfo -- Lots of information about this closure + -> CostCentreStack -- Optional cost centre attached to closure + -> C_SRT + -> LocalReg -- The closure itself; first argument + -- The Id is in scope already, bound to this reg + -> [Id] + -> StgExpr + -> FCode () + +{- There are two main cases for the code for closures. + +* If there are *no arguments*, then the closure is a thunk, and not in + normal form. So it should set up an update frame (if it is + shared). NB: Thunks cannot have a primitive type! + +* If there is *at least one* argument, then this closure is in + normal form, so there is no need to set up an update frame. + + The Macros for GrAnSim are produced at the beginning of the + argSatisfactionCheck (by calling fetchAndReschedule). + There info if Node points to closure is available. -- HWL -} + +closureCodeBody _binder_info cl_info cc srt node args body + | null args -- No args i.e. thunk + = do { code <- getCode $ thunkCode cl_info cc srt node body + ; emitClosureCodeAndInfoTable cl_info [node] code } + +closureCodeBody _binder_info cl_info cc srt node args body + = ASSERT( length args > 0 ) + do { -- Allocate the global ticky counter, + -- and establish the ticky-counter + -- label for this block + let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info) $ clHasCafRefs cl_info + ; emitTickyCounter cl_info args + ; setTickyCtrLabel ticky_ctr_lbl $ do + +-- -- XXX: no slow-entry code for now +-- -- Emit the slow-entry code +-- { reg_save_code <- mkSlowEntryCode cl_info reg_args + + -- Emit the main entry code + ; let node_points = nodeMustPointToIt (closureLFInfo cl_info) + ; arg_regs <- bindArgsToRegs args + ; blks <- forkProc $ getCode $ do + { enterCostCentre cl_info cc body + ; tickyEnterFun cl_info + ; whenC node_points (ldvEnterClosure cl_info) + ; granYield arg_regs node_points + + -- Main payload + ; entryHeapCheck node arg_regs srt $ + cgExpr body } + + ; emitClosureCodeAndInfoTable cl_info (node:arg_regs) blks + } + +{- +----------------------------------------- +-- The "slow entry" code for a function. This entry point takes its +-- arguments on the stack. It loads the arguments into registers +-- according to the calling convention, and jumps to the function's +-- normal entry point. The function's closure is assumed to be in +-- R1/node. +-- +-- The slow entry point is used in two places: +-- +-- (a) unknown calls: eg. stg_PAP_entry +-- (b) returning from a heap-check failure + +mkSlowEntryCode :: ClosureInfo -> [(Id,GlobalReg)] -> FCode CmmStmts +-- If this function doesn't have a specialised ArgDescr, we need +-- to generate the function's arg bitmap, slow-entry code, and +-- register-save code for the heap-check failure +-- Here, we emit the slow-entry code, and +-- return the register-save assignments +mkSlowEntryCode cl_info reg_args + | Just (_, ArgGen _) <- closureFunInfo cl_info + = do { emitSimpleProc slow_lbl (emitStmts load_stmts) + ; return save_stmts } + | otherwise = return noStmts + where + name = closureName cl_info + slow_lbl = mkSlowEntryLabel name + + load_stmts = mkStmts load_assts `plusStmts` mkStmts [stk_adj_pop, jump_to_entry] + save_stmts = oneStmt stk_adj_push `plusStmts` mkStmts save_assts + + reps_w_regs :: [(CgRep,GlobalReg)] + reps_w_regs = [(idCgRep id, reg) | (id,reg) <- reverse reg_args] + (final_stk_offset, stk_offsets) + = mapAccumL (\off (rep,_) -> (off + cgRepSizeW rep, off)) + 0 reps_w_regs + + load_assts = zipWithEqual "mk_load" mk_load reps_w_regs stk_offsets + mk_load (rep,reg) offset = CmmAssign (CmmGlobal reg) + (CmmLoad (cmmRegOffW spReg offset) + (argMachRep rep)) + + save_assts = zipWithEqual "mk_save" mk_save reps_w_regs stk_offsets + mk_save (rep,reg) offset = ASSERT( argMachRep rep == globalRegType reg ) + CmmStore (cmmRegOffW spReg offset) + (CmmReg (CmmGlobal reg)) + + stk_adj_pop = CmmAssign spReg (cmmRegOffW spReg final_stk_offset) + stk_adj_push = CmmAssign spReg (cmmRegOffW spReg (- final_stk_offset)) + jump_to_entry = CmmJump (mkLblExpr (enterLocalIdLabel name)) [] +-} + +----------------------------------------- +thunkCode :: ClosureInfo -> CostCentreStack -> C_SRT -> LocalReg -> StgExpr -> FCode () +thunkCode cl_info cc srt node body + = do { let node_points = nodeMustPointToIt (closureLFInfo cl_info) + + ; tickyEnterThunk cl_info + ; ldvEnterClosure cl_info -- NB: Node always points when profiling + ; granThunk node_points + + -- Heap overflow check + ; entryHeapCheck node [] srt $ do + { -- Overwrite with black hole if necessary + -- but *after* the heap-overflow check + whenC (blackHoleOnEntry cl_info && node_points) + (blackHoleIt cl_info) + + -- Push update frame + ; setupUpdate cl_info node + + -- We only enter cc after setting up update so + -- that cc of enclosing scope will be recorded + -- in update frame CAF/DICT functions will be + -- subsumed by this enclosing cc + ; enterCostCentre cl_info cc body + + ; cgExpr body } } + + +------------------------------------------------------------------------ +-- Update and black-hole wrappers +------------------------------------------------------------------------ + +blackHoleIt :: ClosureInfo -> FCode () +-- Only called for closures with no args +-- Node points to the closure +blackHoleIt closure_info = emitBlackHoleCode (closureSingleEntry closure_info) + +emitBlackHoleCode :: Bool -> FCode () +emitBlackHoleCode is_single_entry + | eager_blackholing = do + tickyBlackHole (not is_single_entry) + emit (mkStore (CmmReg nodeReg) (CmmLit (CmmLabel bh_lbl))) + | otherwise = + nopC + where + bh_lbl | is_single_entry = mkRtsDataLabel (sLit "stg_SE_BLACKHOLE_info") + | otherwise = mkRtsDataLabel (sLit "stg_BLACKHOLE_info") + + -- If we wanted to do eager blackholing with slop filling, + -- we'd need to do it at the *end* of a basic block, otherwise + -- we overwrite the free variables in the thunk that we still + -- need. We have a patch for this from Andy Cheadle, but not + -- incorporated yet. --SDM [6/2004] + -- + -- Profiling needs slop filling (to support LDV profiling), so + -- currently eager blackholing doesn't work with profiling. + -- + -- Previously, eager blackholing was enabled when ticky-ticky + -- was on. But it didn't work, and it wasn't strictly necessary + -- to bring back minimal ticky-ticky, so now EAGER_BLACKHOLING + -- is unconditionally disabled. -- krc 1/2007 + + eager_blackholing = False + +setupUpdate :: ClosureInfo -> LocalReg -> FCode () + -- Nota Bene: this function does not change Node (even if it's a CAF), + -- so that the cost centre in the original closure can still be + -- extracted by a subsequent enterCostCentre +setupUpdate closure_info node + | closureReEntrant closure_info + = return () + + | not (isStaticClosure closure_info) + = if closureUpdReqd closure_info + then do { tickyPushUpdateFrame; pushUpdateFrame node } + else tickyUpdateFrameOmitted + + | otherwise -- A static closure + = do { tickyUpdateBhCaf closure_info + + ; if closureUpdReqd closure_info + then do -- Blackhole the (updatable) CAF: + { upd_closure <- link_caf closure_info True + ; pushUpdateFrame upd_closure } + else tickyUpdateFrameOmitted + } + +pushUpdateFrame :: LocalReg -> FCode () +pushUpdateFrame cl_reg + = emit (mkAddToContext (mkLblExpr mkUpdInfoLabel) + [CmmReg (CmmLocal cl_reg)]) + +----------------------------------------------------------------------------- +-- Entering a CAF +-- +-- When a CAF is first entered, it creates a black hole in the heap, +-- and updates itself with an indirection to this new black hole. +-- +-- We update the CAF with an indirection to a newly-allocated black +-- hole in the heap. We also set the blocking queue on the newly +-- allocated black hole to be empty. +-- +-- Why do we make a black hole in the heap when we enter a CAF? +-- +-- - for a generational garbage collector, which needs a fast +-- test for whether an updatee is in an old generation or not +-- +-- - for the parallel system, which can implement updates more +-- easily if the updatee is always in the heap. (allegedly). +-- +-- When debugging, we maintain a separate CAF list so we can tell when +-- a CAF has been garbage collected. + +-- newCAF must be called before the itbl ptr is overwritten, since +-- newCAF records the old itbl ptr in order to do CAF reverting +-- (which Hugs needs to do in order that combined mode works right.) +-- + +-- ToDo [Feb 04] This entire link_caf nonsense could all be moved +-- into the "newCAF" RTS procedure, which we call anyway, including +-- the allocation of the black-hole indirection closure. +-- That way, code size would fall, the CAF-handling code would +-- be closer together, and the compiler wouldn't need to know +-- about off_indirectee etc. + +link_caf :: ClosureInfo + -> Bool -- True <=> updatable, False <=> single-entry + -> FCode LocalReg -- Returns amode for closure to be updated +-- To update a CAF we must allocate a black hole, link the CAF onto the +-- CAF list, then update the CAF to point to the fresh black hole. +-- This function returns the address of the black hole, so it can be +-- updated with the new value when available. The reason for all of this +-- is that we only want to update dynamic heap objects, not static ones, +-- so that generational GC is easier. +link_caf cl_info is_upd = do + { -- Alloc black hole specifying CC_HDR(Node) as the cost centre + ; let use_cc = costCentreFrom (CmmReg nodeReg) + blame_cc = use_cc + ; hp_rel <- allocDynClosure bh_cl_info use_cc blame_cc [] + + -- Call the RTS function newCAF to add the CAF to the CafList + -- so that the garbage collector can find them + -- This must be done *before* the info table pointer is overwritten, + -- because the old info table ptr is needed for reversion + ; emitRtsCallWithVols (sLit "newCAF") [(CmmReg nodeReg,AddrHint)] [node] False + -- node is live, so save it. + + -- Overwrite the closure with a (static) indirection + -- to the newly-allocated black hole + ; emit (mkStore (cmmRegOffW nodeReg off_indirectee) (CmmReg (CmmLocal hp_rel)) <*> + mkStore (CmmReg nodeReg) ind_static_info) + + ; return hp_rel } + where + bh_cl_info :: ClosureInfo + bh_cl_info | is_upd = cafBlackHoleClosureInfo cl_info + | otherwise = seCafBlackHoleClosureInfo cl_info + + ind_static_info :: CmmExpr + ind_static_info = mkLblExpr mkIndStaticInfoLabel + + off_indirectee :: WordOff + off_indirectee = fixedHdrSize + oFFSET_StgInd_indirectee*wORD_SIZE + + +------------------------------------------------------------------------ +-- Profiling +------------------------------------------------------------------------ + +-- For "global" data constructors the description is simply occurrence +-- name of the data constructor itself. Otherwise it is determined by +-- @closureDescription@ from the let binding information. + +closureDescription :: Module -- Module + -> Name -- Id of closure binding + -> String + -- Not called for StgRhsCon which have global info tables built in + -- CgConTbls.lhs with a description generated from the data constructor +closureDescription mod_name name + = showSDocDump (char '<' <> + (if isExternalName name + then ppr name -- ppr will include the module name prefix + else pprModule mod_name <> char '.' <> ppr name) <> + char '>') + -- showSDocDump, because we want to see the unique on the Name. + diff --git a/compiler/codeGen/StgCmmBind.hs-boot b/compiler/codeGen/StgCmmBind.hs-boot new file mode 100644 index 0000000000..5840e990c8 --- /dev/null +++ b/compiler/codeGen/StgCmmBind.hs-boot @@ -0,0 +1,6 @@ +module StgCmmBind where + +import StgCmmMonad( FCode ) +import StgSyn( StgBinding ) + +cgBind :: StgBinding -> FCode () diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs new file mode 100644 index 0000000000..c32d7cd857 --- /dev/null +++ b/compiler/codeGen/StgCmmClosure.hs @@ -0,0 +1,1100 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation: +-- +-- The types LambdaFormInfo +-- ClosureInfo +-- +-- Nothing monadic in here! +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + + +module StgCmmClosure ( + SMRep, + DynTag, tagForCon, isSmallFamily, + ConTagZ, dataConTagZ, + + ArgDescr(..), Liveness(..), + C_SRT(..), needsSRT, + + isVoidRep, isGcPtrRep, addIdReps, addArgReps, + argPrimRep, + + LambdaFormInfo, -- Abstract + StandardFormInfo, -- ...ditto... + mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo, + mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape, + lfDynTag, + + ClosureInfo, + mkClosureInfo, mkConInfo, maybeIsLFCon, + + closureSize, closureNonHdrSize, + closureGoodStuffSize, closurePtrsSize, + slopSize, + + closureName, infoTableLabelFromCI, + closureLabelFromCI, + closureTypeInfo, + closureLFInfo, isLFThunk,closureSMRep, closureUpdReqd, + closureNeedsUpdSpace, closureIsThunk, + closureSingleEntry, closureReEntrant, isConstrClosure_maybe, + closureFunInfo, isStandardFormThunk, isKnownFun, + funTag, tagForArity, + + enterIdLabel, enterLocalIdLabel, + + nodeMustPointToIt, + CallMethod(..), getCallMethod, + + blackHoleOnEntry, + + getClosureType, + + isToplevClosure, + closureValDescr, closureTypeDescr, -- profiling + + isStaticClosure, + cafBlackHoleClosureInfo, seCafBlackHoleClosureInfo, + + staticClosureNeedsLink, clHasCafRefs + ) where + +#include "../includes/MachDeps.h" + +#define FAST_STRING_NOT_NEEDED +#include "HsVersions.h" + +import ClosureInfo (ArgDescr(..), C_SRT(..), Liveness(..)) + -- XXX temporary becuase FunInfo needs this one + +import StgSyn +import SMRep +import Cmm ( ClosureTypeInfo(..) ) +import CmmExpr + +import CLabel +import StaticFlags +import Id +import IdInfo +import DataCon +import Name +import OccName +import Type +import TypeRep +import TcType +import TyCon +import BasicTypes +import Outputable +import Constants + + +----------------------------------------------------------------------------- +-- Representations +----------------------------------------------------------------------------- + +addIdReps :: [Id] -> [(PrimRep, Id)] +addIdReps ids = [(idPrimRep id, id) | id <- ids] + +addArgReps :: [StgArg] -> [(PrimRep, StgArg)] +addArgReps args = [(argPrimRep arg, arg) | arg <- args] + +argPrimRep :: StgArg -> PrimRep +argPrimRep arg = typePrimRep (stgArgType arg) + +isVoidRep :: PrimRep -> Bool +isVoidRep VoidRep = True +isVoidRep _other = False + +isGcPtrRep :: PrimRep -> Bool +isGcPtrRep PtrRep = True +isGcPtrRep _ = False + + +----------------------------------------------------------------------------- +-- LambdaFormInfo +----------------------------------------------------------------------------- + +-- Information about an identifier, from the code generator's point of +-- view. Every identifier is bound to a LambdaFormInfo in the +-- environment, which gives the code generator enough info to be able to +-- tail call or return that identifier. + +data LambdaFormInfo + = LFReEntrant -- Reentrant closure (a function) + TopLevelFlag -- True if top level + !Int -- Arity. Invariant: always > 0 + !Bool -- True <=> no fvs + ArgDescr -- Argument descriptor (should really be in ClosureInfo) + + | LFThunk -- Thunk (zero arity) + TopLevelFlag + !Bool -- True <=> no free vars + !Bool -- True <=> updatable (i.e., *not* single-entry) + StandardFormInfo + !Bool -- True <=> *might* be a function type + + | LFCon -- A saturated constructor application + DataCon -- The constructor + + | LFUnknown -- Used for function arguments and imported things. + -- We know nothing about this closure. + -- Treat like updatable "LFThunk"... + -- Imported things which we *do* know something about use + -- one of the other LF constructors (eg LFReEntrant for + -- known functions) + !Bool -- True <=> *might* be a function type + -- The False case is good when we want to enter it, + -- because then we know the entry code will do + -- For a function, the entry code is the fast entry point + + | LFUnLifted -- A value of unboxed type; + -- always a value, neeeds evaluation + + | LFLetNoEscape -- See LetNoEscape module for precise description + + | LFBlackHole -- Used for the closures allocated to hold the result + -- of a CAF. We want the target of the update frame to + -- be in the heap, so we make a black hole to hold it. + CLabel -- Flavour (info label, eg CAF_BLACKHOLE_info). + + +------------------------- +-- An ArgDsecr describes the argument pattern of a function + +{- XXX -- imported from old ClosureInfo for now +data ArgDescr + = ArgSpec -- Fits one of the standard patterns + !StgHalfWord -- RTS type identifier ARG_P, ARG_N, ... + + | ArgGen -- General case + Liveness -- Details about the arguments +-} + +{- XXX -- imported from old ClosureInfo for now +------------------------- +-- We represent liveness bitmaps as a Bitmap (whose internal +-- representation really is a bitmap). These are pinned onto case return +-- vectors to indicate the state of the stack for the garbage collector. +-- +-- In the compiled program, liveness bitmaps that fit inside a single +-- word (StgWord) are stored as a single word, while larger bitmaps are +-- stored as a pointer to an array of words. + +data Liveness + = SmallLiveness -- Liveness info that fits in one word + StgWord -- Here's the bitmap + + | BigLiveness -- Liveness info witha a multi-word bitmap + CLabel -- Label for the bitmap +-} + +------------------------- +-- StandardFormInfo tells whether this thunk has one of +-- a small number of standard forms + +data StandardFormInfo + = NonStandardThunk + -- Not of of the standard forms + + | SelectorThunk + -- A SelectorThunk is of form + -- case x of + -- con a1,..,an -> ak + -- and the constructor is from a single-constr type. + WordOff -- 0-origin offset of ak within the "goods" of + -- constructor (Recall that the a1,...,an may be laid + -- out in the heap in a non-obvious order.) + + | ApThunk + -- An ApThunk is of form + -- x1 ... xn + -- The code for the thunk just pushes x2..xn on the stack and enters x1. + -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled + -- in the RTS to save space. + Int -- Arity, n + + +------------------------------------------------------ +-- Building LambdaFormInfo +------------------------------------------------------ + +mkLFArgument :: Id -> LambdaFormInfo +mkLFArgument id + | isUnLiftedType ty = LFUnLifted + | might_be_a_function ty = LFUnknown True + | otherwise = LFUnknown False + where + ty = idType id + +------------- +mkLFLetNoEscape :: LambdaFormInfo +mkLFLetNoEscape = LFLetNoEscape + +------------- +mkLFReEntrant :: TopLevelFlag -- True of top level + -> [Id] -- Free vars + -> [Id] -- Args + -> ArgDescr -- Argument descriptor + -> LambdaFormInfo + +mkLFReEntrant top fvs args arg_descr + = LFReEntrant top (length args) (null fvs) arg_descr + +------------- +mkLFThunk :: Type -> TopLevelFlag -> [Id] -> UpdateFlag -> LambdaFormInfo +mkLFThunk thunk_ty top fvs upd_flag + = ASSERT( not (isUpdatable upd_flag) || not (isUnLiftedType thunk_ty) ) + LFThunk top (null fvs) + (isUpdatable upd_flag) + NonStandardThunk + (might_be_a_function thunk_ty) + +-------------- +might_be_a_function :: Type -> Bool +-- Return False only if we are *sure* it's a data type +-- Look through newtypes etc as much as poss +might_be_a_function ty + = case splitTyConApp_maybe (repType ty) of + Just (tc, _) -> not (isDataTyCon tc) + Nothing -> True + +------------- +mkConLFInfo :: DataCon -> LambdaFormInfo +mkConLFInfo con = LFCon con + +------------- +mkSelectorLFInfo :: Id -> Int -> Bool -> LambdaFormInfo +mkSelectorLFInfo id offset updatable + = LFThunk NotTopLevel False updatable (SelectorThunk offset) + (might_be_a_function (idType id)) + +------------- +mkApLFInfo :: Id -> UpdateFlag -> Arity -> LambdaFormInfo +mkApLFInfo id upd_flag arity + = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity) + (might_be_a_function (idType id)) + +------------- +mkLFImported :: Id -> LambdaFormInfo +mkLFImported id + | Just con <- isDataConWorkId_maybe id + , isNullaryRepDataCon con + = LFCon con -- An imported nullary constructor + -- We assume that the constructor is evaluated so that + -- the id really does point directly to the constructor + + | arity > 0 + = LFReEntrant TopLevel arity True (panic "arg_descr") + + | otherwise + = mkLFArgument id -- Not sure of exact arity + where + arity = idArity id + +----------------------------------------------------- +-- Dynamic pointer tagging +----------------------------------------------------- + +type ConTagZ = Int -- A *zero-indexed* contructor tag + +type DynTag = Int -- The tag on a *pointer* + -- (from the dynamic-tagging paper) + +{- Note [Data constructor dynamic tags] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The family size of a data type (the number of constructors) +can be either: + * small, if the family size < 2**tag_bits + * big, otherwise. + +Small families can have the constructor tag in the tag bits. +Big families only use the tag value 1 to represent evaluatedness. -} + +isSmallFamily :: Int -> Bool +isSmallFamily fam_size = fam_size <= mAX_PTR_TAG + +-- We keep the *zero-indexed* tag in the srt_len field of the info +-- table of a data constructor. +dataConTagZ :: DataCon -> ConTagZ +dataConTagZ con = dataConTag con - fIRST_TAG + +tagForCon :: DataCon -> DynTag +tagForCon con + | isSmallFamily fam_size = con_tag + 1 + | otherwise = 1 + where + con_tag = dataConTagZ con + fam_size = tyConFamilySize (dataConTyCon con) + +tagForArity :: Int -> DynTag +tagForArity arity | isSmallFamily arity = arity + | otherwise = 0 + +lfDynTag :: LambdaFormInfo -> DynTag +lfDynTag (LFCon con) = tagForCon con +lfDynTag (LFReEntrant _ arity _ _) = tagForArity arity +lfDynTag _other = 0 + + +----------------------------------------------------------------------------- +-- Observing LambdaFormInfo +----------------------------------------------------------------------------- + +------------- +maybeIsLFCon :: LambdaFormInfo -> Maybe DataCon +maybeIsLFCon (LFCon con) = Just con +maybeIsLFCon _ = Nothing + +------------ +isLFThunk :: LambdaFormInfo -> Bool +isLFThunk (LFThunk _ _ _ _ _) = True +isLFThunk (LFBlackHole _) = True + -- return True for a blackhole: this function is used to determine + -- whether to use the thunk header in SMP mode, and a blackhole + -- must have one. +isLFThunk _ = False + + +----------------------------------------------------------------------------- +-- Choosing SM reps +----------------------------------------------------------------------------- + +chooseSMRep + :: Bool -- True <=> static closure + -> LambdaFormInfo + -> WordOff -> WordOff -- Tot wds, ptr wds + -> SMRep + +chooseSMRep is_static lf_info tot_wds ptr_wds + = let + nonptr_wds = tot_wds - ptr_wds + closure_type = getClosureType is_static ptr_wds lf_info + in + GenericRep is_static ptr_wds nonptr_wds closure_type + +-- We *do* get non-updatable top-level thunks sometimes. eg. f = g +-- gets compiled to a jump to g (if g has non-zero arity), instead of +-- messing around with update frames and PAPs. We set the closure type +-- to FUN_STATIC in this case. + +getClosureType :: Bool -> WordOff -> LambdaFormInfo -> ClosureType +getClosureType is_static ptr_wds lf_info + = case lf_info of + LFCon {} | is_static && ptr_wds == 0 -> ConstrNoCaf + | otherwise -> Constr + LFReEntrant {} -> Fun + LFThunk _ _ _ (SelectorThunk {}) _ -> ThunkSelector + LFThunk {} -> Thunk + _ -> panic "getClosureType" + + +----------------------------------------------------------------------------- +-- nodeMustPointToIt +----------------------------------------------------------------------------- + +-- Be sure to see the stg-details notes about these... + +nodeMustPointToIt :: LambdaFormInfo -> Bool +nodeMustPointToIt (LFReEntrant top _ no_fvs _) + = not no_fvs || -- Certainly if it has fvs we need to point to it + isNotTopLevel top + -- If it is not top level we will point to it + -- We can have a \r closure with no_fvs which + -- is not top level as special case cgRhsClosure + -- has been dissabled in favour of let floating + + -- For lex_profiling we also access the cost centre for a + -- non-inherited function i.e. not top level + -- the not top case above ensures this is ok. + +nodeMustPointToIt (LFCon _) = True + + -- Strictly speaking, the above two don't need Node to point + -- to it if the arity = 0. But this is a *really* unlikely + -- situation. If we know it's nil (say) and we are entering + -- it. Eg: let x = [] in x then we will certainly have inlined + -- x, since nil is a simple atom. So we gain little by not + -- having Node point to known zero-arity things. On the other + -- hand, we do lose something; Patrick's code for figuring out + -- when something has been updated but not entered relies on + -- having Node point to the result of an update. SLPJ + -- 27/11/92. + +nodeMustPointToIt (LFThunk _ no_fvs updatable NonStandardThunk _) + = updatable || not no_fvs || opt_SccProfilingOn + -- For the non-updatable (single-entry case): + -- + -- True if has fvs (in which case we need access to them, and we + -- should black-hole it) + -- or profiling (in which case we need to recover the cost centre + -- from inside it) + +nodeMustPointToIt (LFThunk {}) -- Node must point to a standard-form thunk + = True + +nodeMustPointToIt (LFUnknown _) = True +nodeMustPointToIt LFUnLifted = False +nodeMustPointToIt (LFBlackHole _) = True -- BH entry may require Node to point +nodeMustPointToIt LFLetNoEscape = False + +----------------------------------------------------------------------------- +-- getCallMethod +----------------------------------------------------------------------------- + +{- The entry conventions depend on the type of closure being entered, +whether or not it has free variables, and whether we're running +sequentially or in parallel. + +Closure Node Argument Enter +Characteristics Par Req'd Passing Via +------------------------------------------------------------------------------- +Unknown & no & yes & stack & node +Known fun (>1 arg), no fvs & no & no & registers & fast entry (enough args) + & slow entry (otherwise) +Known fun (>1 arg), fvs & no & yes & registers & fast entry (enough args) +0 arg, no fvs \r,\s & no & no & n/a & direct entry +0 arg, no fvs \u & no & yes & n/a & node +0 arg, fvs \r,\s & no & yes & n/a & direct entry +0 arg, fvs \u & no & yes & n/a & node + +Unknown & yes & yes & stack & node +Known fun (>1 arg), no fvs & yes & no & registers & fast entry (enough args) + & slow entry (otherwise) +Known fun (>1 arg), fvs & yes & yes & registers & node +0 arg, no fvs \r,\s & yes & no & n/a & direct entry +0 arg, no fvs \u & yes & yes & n/a & node +0 arg, fvs \r,\s & yes & yes & n/a & node +0 arg, fvs \u & yes & yes & n/a & node +\end{tabular} + +When black-holing, single-entry closures could also be entered via node +(rather than directly) to catch double-entry. -} + +data CallMethod + = EnterIt -- No args, not a function + + | JumpToIt -- A join point + + | ReturnIt -- It's a value (function, unboxed value, + -- or constructor), so just return it. + + | SlowCall -- Unknown fun, or known fun with + -- too few args. + + | DirectEntry -- Jump directly, with args in regs + CLabel -- The code label + Int -- Its arity + +getCallMethod :: Name -- Function being applied + -> CafInfo -- Can it refer to CAF's? + -> LambdaFormInfo -- Its info + -> Int -- Number of available arguments + -> CallMethod + +getCallMethod _name _ lf_info _n_args + | nodeMustPointToIt lf_info && opt_Parallel + = -- If we're parallel, then we must always enter via node. + -- The reason is that the closure may have been + -- fetched since we allocated it. + EnterIt + +getCallMethod name caf (LFReEntrant _ arity _ _) n_args + | n_args == 0 = ASSERT( arity /= 0 ) + ReturnIt -- No args at all + | n_args < arity = SlowCall -- Not enough args + | otherwise = DirectEntry (enterIdLabel name caf) arity + +getCallMethod _name _ LFUnLifted n_args + = ASSERT( n_args == 0 ) ReturnIt + +getCallMethod _name _ (LFCon _) n_args + = ASSERT( n_args == 0 ) ReturnIt + +getCallMethod name caf (LFThunk _ _ updatable std_form_info is_fun) n_args + | is_fun -- it *might* be a function, so we must "call" it (which is always safe) + = SlowCall -- We cannot just enter it [in eval/apply, the entry code + -- is the fast-entry code] + + -- Since is_fun is False, we are *definitely* looking at a data value + | updatable || opt_DoTickyProfiling -- to catch double entry + {- OLD: || opt_SMP + I decided to remove this, because in SMP mode it doesn't matter + if we enter the same thunk multiple times, so the optimisation + of jumping directly to the entry code is still valid. --SDM + -} + = EnterIt + -- We used to have ASSERT( n_args == 0 ), but actually it is + -- possible for the optimiser to generate + -- let bot :: Int = error Int "urk" + -- in (bot `cast` unsafeCoerce Int (Int -> Int)) 3 + -- This happens as a result of the case-of-error transformation + -- So the right thing to do is just to enter the thing + + | otherwise -- Jump direct to code for single-entry thunks + = ASSERT( n_args == 0 ) + DirectEntry (thunkEntryLabel name caf std_form_info updatable) 0 + +getCallMethod _name _ (LFUnknown True) _n_args + = SlowCall -- might be a function + +getCallMethod name _ (LFUnknown False) n_args + = ASSERT2 ( n_args == 0, ppr name <+> ppr n_args ) + EnterIt -- Not a function + +getCallMethod _name _ (LFBlackHole _) _n_args + = SlowCall -- Presumably the black hole has by now + -- been updated, but we don't know with + -- what, so we slow call it + +getCallMethod _name _ LFLetNoEscape _n_args + = JumpToIt + +isStandardFormThunk :: LambdaFormInfo -> Bool +isStandardFormThunk (LFThunk _ _ _ (SelectorThunk _) _) = True +isStandardFormThunk (LFThunk _ _ _ (ApThunk _) _) = True +isStandardFormThunk _other_lf_info = False + +isKnownFun :: LambdaFormInfo -> Bool +isKnownFun (LFReEntrant _ _ _ _) = True +isKnownFun LFLetNoEscape = True +isKnownFun _ = False + +----------------------------------------------------------------------------- +-- staticClosureRequired +----------------------------------------------------------------------------- + +{- staticClosureRequired is never called (hence commented out) + + SimonMar writes (Sept 07) It's an optimisation we used to apply at + one time, I believe, but it got lost probably in the rewrite of + the RTS/code generator. I left that code there to remind me to + look into whether it was worth doing sometime + +{- Avoiding generating entries and info tables + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +At present, for every function we generate all of the following, +just in case. But they aren't always all needed, as noted below: + +[NB1: all of this applies only to *functions*. Thunks always +have closure, info table, and entry code.] + +[NB2: All are needed if the function is *exported*, just to play safe.] + +* Fast-entry code ALWAYS NEEDED + +* Slow-entry code + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) we're in the parallel world and the function has free vars + [Reason: in parallel world, we always enter functions + with free vars via the closure.] + +* The function closure + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) if the function has free vars (ie not top level) + + Why case (a) here? Because if the arg-satis check fails, + UpdatePAP stuffs a pointer to the function closure in the PAP. + [Could be changed; UpdatePAP could stuff in a code ptr instead, + but doesn't seem worth it.] + + [NB: these conditions imply that we might need the closure + without the slow-entry code. Here's how. + + f x y = let g w = ...x..y..w... + in + ...(g t)... + + Here we need a closure for g which contains x and y, + but since the calls are all saturated we just jump to the + fast entry point for g, with R1 pointing to the closure for g.] + + +* Standard info table + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) the function has free vars (ie not top level) + + NB. In the sequential world, (c) is only required so that the function closure has + an info table to point to, to keep the storage manager happy. + If (c) alone is true we could fake up an info table by choosing + one of a standard family of info tables, whose entry code just + bombs out. + + [NB In the parallel world (c) is needed regardless because + we enter functions with free vars via the closure.] + + If (c) is retained, then we'll sometimes generate an info table + (for storage mgr purposes) without slow-entry code. Then we need + to use an error label in the info table to substitute for the absent + slow entry code. +-} + +staticClosureRequired + :: Name + -> StgBinderInfo + -> LambdaFormInfo + -> Bool +staticClosureRequired binder bndr_info + (LFReEntrant top_level _ _ _) -- It's a function + = ASSERT( isTopLevel top_level ) + -- Assumption: it's a top-level, no-free-var binding + not (satCallsOnly bndr_info) + +staticClosureRequired binder other_binder_info other_lf_info = True +-} + +----------------------------------------------------------------------------- +-- Data types for closure information} +----------------------------------------------------------------------------- + + +{- Information about a closure, from the code generator's point of view. + +A ClosureInfo decribes the info pointer of a closure. It has +enough information + a) to construct the info table itself + b) to allocate a closure containing that info pointer (i.e. + it knows the info table label) + +We make a ClosureInfo for + - each let binding (both top level and not) + - each data constructor (for its shared static and + dynamic info tables) +-} + +data ClosureInfo + = ClosureInfo { + closureName :: !Name, -- The thing bound to this closure + closureLFInfo :: !LambdaFormInfo, -- NOTE: not an LFCon (see below) + closureSMRep :: !SMRep, -- representation used by storage mgr + closureSRT :: !C_SRT, -- What SRT applies to this closure + closureType :: !Type, -- Type of closure (ToDo: remove) + closureDescr :: !String -- closure description (for profiling) + } + + -- Constructor closures don't have a unique info table label (they use + -- the constructor's info table), and they don't have an SRT. + | ConInfo { + closureCon :: !DataCon, + closureSMRep :: !SMRep + } + +{- XXX temp imported from old ClosureInfo +-- C_SRT is what StgSyn.SRT gets translated to... +-- we add a label for the table, and expect only the 'offset/length' form + +data C_SRT = NoC_SRT + | C_SRT !CLabel !WordOff !StgHalfWord {-bitmap or escape-} + deriving (Eq) + +instance Outputable C_SRT where + ppr (NoC_SRT) = ptext SLIT("_no_srt_") + ppr (C_SRT label off bitmap) = parens (ppr label <> comma <> ppr off <> comma <> text (show bitmap)) +-} + +needsSRT :: C_SRT -> Bool +needsSRT NoC_SRT = False +needsSRT (C_SRT _ _ _) = True + + +-------------------------------------- +-- Building ClosureInfos +-------------------------------------- + +mkClosureInfo :: Bool -- Is static + -> Id + -> LambdaFormInfo + -> Int -> Int -- Total and pointer words + -> C_SRT + -> String -- String descriptor + -> ClosureInfo +mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info descr + = ClosureInfo { closureName = name, + closureLFInfo = lf_info, + closureSMRep = sm_rep, + closureSRT = srt_info, + closureType = idType id, + closureDescr = descr } + where + name = idName id + sm_rep = chooseSMRep is_static lf_info tot_wds ptr_wds + +mkConInfo :: Bool -- Is static + -> DataCon + -> Int -> Int -- Total and pointer words + -> ClosureInfo +mkConInfo is_static data_con tot_wds ptr_wds + = ConInfo { closureSMRep = sm_rep, + closureCon = data_con } + where + sm_rep = chooseSMRep is_static (mkConLFInfo data_con) tot_wds ptr_wds + + +-- We need a black-hole closure info to pass to @allocDynClosure@ when we +-- want to allocate the black hole on entry to a CAF. These are the only +-- ways to build an LFBlackHole, maintaining the invariant that it really +-- is a black hole and not something else. + +cafBlackHoleClosureInfo :: ClosureInfo -> ClosureInfo +cafBlackHoleClosureInfo (ClosureInfo { closureName = nm, + closureType = ty }) + = ClosureInfo { closureName = nm, + closureLFInfo = LFBlackHole mkCAFBlackHoleInfoTableLabel, + closureSMRep = BlackHoleRep, + closureSRT = NoC_SRT, + closureType = ty, + closureDescr = "" } +cafBlackHoleClosureInfo _ = panic "cafBlackHoleClosureInfo" + +seCafBlackHoleClosureInfo :: ClosureInfo -> ClosureInfo +seCafBlackHoleClosureInfo (ClosureInfo { closureName = nm, + closureType = ty }) + = ClosureInfo { closureName = nm, + closureLFInfo = LFBlackHole mkSECAFBlackHoleInfoTableLabel, + closureSMRep = BlackHoleRep, + closureSRT = NoC_SRT, + closureType = ty, + closureDescr = "" } +seCafBlackHoleClosureInfo _ = panic "seCafBlackHoleClosureInfo" + +-------------------------------------- +-- Extracting ClosureTypeInfo +-------------------------------------- + +closureTypeInfo :: ClosureInfo -> ClosureTypeInfo +closureTypeInfo cl_info + = case cl_info of + ConInfo { closureCon = con } + -> ConstrInfo (ptrs, nptrs) + (fromIntegral (dataConTagZ con)) + con_name + where + con_name = panic "closureTypeInfo" + -- Was: + -- cstr <- mkByteStringCLit $ dataConIdentity con + -- con_name = makeRelativeRefTo info_lbl cstr + + ClosureInfo { closureName = name, + closureLFInfo = LFReEntrant _ arity _ arg_descr, + closureSRT = srt } + -> FunInfo (ptrs, nptrs) + srt + (fromIntegral arity) + arg_descr + (CmmLabel (mkSlowEntryLabel name (clHasCafRefs cl_info))) + + ClosureInfo { closureLFInfo = LFThunk _ _ _ (SelectorThunk offset) _, + closureSRT = srt } + -> ThunkSelectorInfo (fromIntegral offset) srt + + ClosureInfo { closureLFInfo = LFThunk {}, + closureSRT = srt } + -> ThunkInfo (ptrs, nptrs) srt + + _ -> panic "unexpected lambda form in mkCmmInfo" + where +-- info_lbl = infoTableLabelFromCI cl_info + ptrs = fromIntegral $ closurePtrsSize cl_info + size = fromIntegral $ closureNonHdrSize cl_info + nptrs = size - ptrs + +-------------------------------------- +-- Functions about closure *sizes* +-------------------------------------- + +closureSize :: ClosureInfo -> WordOff +closureSize cl_info = hdr_size + closureNonHdrSize cl_info + where hdr_size | closureIsThunk cl_info = thunkHdrSize + | otherwise = fixedHdrSize + -- All thunks use thunkHdrSize, even if they are non-updatable. + -- this is because we don't have separate closure types for + -- updatable vs. non-updatable thunks, so the GC can't tell the + -- difference. If we ever have significant numbers of non- + -- updatable thunks, it might be worth fixing this. + +closureNonHdrSize :: ClosureInfo -> WordOff +closureNonHdrSize cl_info + = tot_wds + computeSlopSize tot_wds cl_info + where + tot_wds = closureGoodStuffSize cl_info + +closureGoodStuffSize :: ClosureInfo -> WordOff +closureGoodStuffSize cl_info + = let (ptrs, nonptrs) = sizes_from_SMRep (closureSMRep cl_info) + in ptrs + nonptrs + +closurePtrsSize :: ClosureInfo -> WordOff +closurePtrsSize cl_info + = let (ptrs, _) = sizes_from_SMRep (closureSMRep cl_info) + in ptrs + +-- not exported: +sizes_from_SMRep :: SMRep -> (WordOff,WordOff) +sizes_from_SMRep (GenericRep _ ptrs nonptrs _) = (ptrs, nonptrs) +sizes_from_SMRep BlackHoleRep = (0, 0) + +-- Computing slop size. WARNING: this looks dodgy --- it has deep +-- knowledge of what the storage manager does with the various +-- representations... +-- +-- Slop Requirements: every thunk gets an extra padding word in the +-- header, which takes the the updated value. + +slopSize :: ClosureInfo -> WordOff +slopSize cl_info = computeSlopSize payload_size cl_info + where payload_size = closureGoodStuffSize cl_info + +computeSlopSize :: WordOff -> ClosureInfo -> WordOff +computeSlopSize payload_size cl_info + = max 0 (minPayloadSize smrep updatable - payload_size) + where + smrep = closureSMRep cl_info + updatable = closureNeedsUpdSpace cl_info + +closureNeedsUpdSpace :: ClosureInfo -> Bool +-- We leave space for an update if either (a) the closure is updatable +-- or (b) it is a static thunk. This is because a static thunk needs +-- a static link field in a predictable place (after the slop), regardless +-- of whether it is updatable or not. +closureNeedsUpdSpace (ClosureInfo { closureLFInfo = + LFThunk TopLevel _ _ _ _ }) = True +closureNeedsUpdSpace cl_info = closureUpdReqd cl_info + +minPayloadSize :: SMRep -> Bool -> WordOff +minPayloadSize smrep updatable + = case smrep of + BlackHoleRep -> min_upd_size + GenericRep _ _ _ _ | updatable -> min_upd_size + GenericRep True _ _ _ -> 0 -- static + GenericRep False _ _ _ -> mIN_PAYLOAD_SIZE + -- ^^^^^___ dynamic + where + min_upd_size = + ASSERT(mIN_PAYLOAD_SIZE <= sIZEOF_StgSMPThunkHeader) + 0 -- check that we already have enough + -- room for mIN_SIZE_NonUpdHeapObject, + -- due to the extra header word in SMP + +-------------------------------------- +-- Other functions over ClosureInfo +-------------------------------------- + +blackHoleOnEntry :: ClosureInfo -> Bool +-- Static closures are never themselves black-holed. +-- Updatable ones will be overwritten with a CAFList cell, which points to a +-- black hole; +-- Single-entry ones have no fvs to plug, and we trust they don't form part +-- of a loop. + +blackHoleOnEntry ConInfo{} = False +blackHoleOnEntry (ClosureInfo { closureLFInfo = lf_info, closureSMRep = rep }) + | isStaticRep rep + = False -- Never black-hole a static closure + + | otherwise + = case lf_info of + LFReEntrant _ _ _ _ -> False + LFLetNoEscape -> False + LFThunk _ no_fvs updatable _ _ + -> if updatable + then not opt_OmitBlackHoling + else opt_DoTickyProfiling || not no_fvs + -- the former to catch double entry, + -- and the latter to plug space-leaks. KSW/SDM 1999-04. + + _other -> panic "blackHoleOnEntry" -- Should never happen + + +staticClosureNeedsLink :: ClosureInfo -> Bool +-- A static closure needs a link field to aid the GC when traversing +-- the static closure graph. But it only needs such a field if either +-- a) it has an SRT +-- b) it's a constructor with one or more pointer fields +-- In case (b), the constructor's fields themselves play the role +-- of the SRT. +staticClosureNeedsLink (ClosureInfo { closureSRT = srt }) + = needsSRT srt +staticClosureNeedsLink (ConInfo { closureSMRep = sm_rep, closureCon = con }) + = not (isNullaryRepDataCon con) && not_nocaf_constr + where + not_nocaf_constr = + case sm_rep of + GenericRep _ _ _ ConstrNoCaf -> False + _other -> True + +isStaticClosure :: ClosureInfo -> Bool +isStaticClosure cl_info = isStaticRep (closureSMRep cl_info) + +closureUpdReqd :: ClosureInfo -> Bool +closureUpdReqd ClosureInfo{ closureLFInfo = lf_info } = lfUpdatable lf_info +closureUpdReqd ConInfo{} = False + +lfUpdatable :: LambdaFormInfo -> Bool +lfUpdatable (LFThunk _ _ upd _ _) = upd +lfUpdatable (LFBlackHole _) = True + -- Black-hole closures are allocated to receive the results of an + -- alg case with a named default... so they need to be updated. +lfUpdatable _ = False + +closureIsThunk :: ClosureInfo -> Bool +closureIsThunk ClosureInfo{ closureLFInfo = lf_info } = isLFThunk lf_info +closureIsThunk ConInfo{} = False + +closureSingleEntry :: ClosureInfo -> Bool +closureSingleEntry (ClosureInfo { closureLFInfo = LFThunk _ _ upd _ _}) = not upd +closureSingleEntry _ = False + +closureReEntrant :: ClosureInfo -> Bool +closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant _ _ _ _ }) = True +closureReEntrant _ = False + +isConstrClosure_maybe :: ClosureInfo -> Maybe DataCon +isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con +isConstrClosure_maybe _ = Nothing + +closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr) +closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info +closureFunInfo _ = Nothing + +lfFunInfo :: LambdaFormInfo -> Maybe (Int, ArgDescr) +lfFunInfo (LFReEntrant _ arity _ arg_desc) = Just (arity, arg_desc) +lfFunInfo _ = Nothing + +funTag :: ClosureInfo -> DynTag +funTag (ClosureInfo { closureLFInfo = lf_info }) = lfDynTag lf_info +funTag (ConInfo {}) = panic "funTag" + +isToplevClosure :: ClosureInfo -> Bool +isToplevClosure (ClosureInfo { closureLFInfo = lf_info }) + = case lf_info of + LFReEntrant TopLevel _ _ _ -> True + LFThunk TopLevel _ _ _ _ -> True + _other -> False +isToplevClosure _ = False + +-------------------------------------- +-- Label generation +-------------------------------------- + +infoTableLabelFromCI :: ClosureInfo -> CLabel +infoTableLabelFromCI cl@(ClosureInfo { closureName = name, + closureLFInfo = lf_info }) + = case lf_info of + LFBlackHole info -> info + + LFThunk _ _ upd_flag (SelectorThunk offset) _ -> + mkSelectorInfoLabel upd_flag offset + + LFThunk _ _ upd_flag (ApThunk arity) _ -> + mkApInfoTableLabel upd_flag arity + + LFThunk{} -> mkLocalInfoTableLabel name $ clHasCafRefs cl + + LFReEntrant _ _ _ _ -> mkLocalInfoTableLabel name $ clHasCafRefs cl + + _other -> panic "infoTableLabelFromCI" + +infoTableLabelFromCI cl@(ConInfo { closureCon = con, closureSMRep = rep }) + | isStaticRep rep = mkStaticInfoTableLabel name $ clHasCafRefs cl + | otherwise = mkConInfoTableLabel name $ clHasCafRefs cl + where + name = dataConName con + +-- ClosureInfo for a closure (as opposed to a constructor) is always local +closureLabelFromCI :: ClosureInfo -> CLabel +closureLabelFromCI cl@(ClosureInfo { closureName = nm }) = + mkLocalClosureLabel nm $ clHasCafRefs cl +closureLabelFromCI _ = panic "closureLabelFromCI" + +thunkEntryLabel :: Name -> CafInfo -> StandardFormInfo -> Bool -> CLabel +-- thunkEntryLabel is a local help function, not exported. It's used from both +-- entryLabelFromCI and getCallMethod. +thunkEntryLabel _thunk_id _ (ApThunk arity) upd_flag + = enterApLabel upd_flag arity +thunkEntryLabel _thunk_id _ (SelectorThunk offset) upd_flag + = enterSelectorLabel upd_flag offset +thunkEntryLabel thunk_id c _ _ + = enterIdLabel thunk_id c + +enterApLabel :: Bool -> Arity -> CLabel +enterApLabel is_updatable arity + | tablesNextToCode = mkApInfoTableLabel is_updatable arity + | otherwise = mkApEntryLabel is_updatable arity + +enterSelectorLabel :: Bool -> WordOff -> CLabel +enterSelectorLabel upd_flag offset + | tablesNextToCode = mkSelectorInfoLabel upd_flag offset + | otherwise = mkSelectorEntryLabel upd_flag offset + +enterIdLabel :: Name -> CafInfo -> CLabel +enterIdLabel id c + | tablesNextToCode = mkInfoTableLabel id c + | otherwise = mkEntryLabel id c + +enterLocalIdLabel :: Name -> CafInfo -> CLabel +enterLocalIdLabel id c + | tablesNextToCode = mkLocalInfoTableLabel id c + | otherwise = mkLocalEntryLabel id c + + +-------------------------------------- +-- Profiling +-------------------------------------- + +-- Profiling requires two pieces of information to be determined for +-- each closure's info table --- description and type. + +-- The description is stored directly in the @CClosureInfoTable@ when the +-- info table is built. + +-- The type is determined from the type information stored with the @Id@ +-- in the closure info using @closureTypeDescr@. + +closureValDescr, closureTypeDescr :: ClosureInfo -> String +closureValDescr (ClosureInfo {closureDescr = descr}) + = descr +closureValDescr (ConInfo {closureCon = con}) + = occNameString (getOccName con) + +closureTypeDescr (ClosureInfo { closureType = ty }) + = getTyDescription ty +closureTypeDescr (ConInfo { closureCon = data_con }) + = occNameString (getOccName (dataConTyCon data_con)) + +getTyDescription :: Type -> String +getTyDescription ty + = case (tcSplitSigmaTy ty) of { (_, _, tau_ty) -> + case tau_ty of + TyVarTy _ -> "*" + AppTy fun _ -> getTyDescription fun + FunTy _ res -> '-' : '>' : fun_result res + TyConApp tycon _ -> getOccString tycon + PredTy sty -> getPredTyDescription sty + ForAllTy _ ty -> getTyDescription ty + } + where + fun_result (FunTy _ res) = '>' : fun_result res + fun_result other = getTyDescription other + +getPredTyDescription :: PredType -> String +getPredTyDescription (ClassP cl _) = getOccString cl +getPredTyDescription (IParam ip _) = getOccString (ipNameName ip) +getPredTyDescription (EqPred ty1 _ty2) = getTyDescription ty1 -- Urk? + + +-------------------------------------- +-- SRTs/CAFs +-------------------------------------- + +-- This is horrible, but we need to know whether a closure may have CAFs. +clHasCafRefs :: ClosureInfo -> CafInfo +clHasCafRefs (ClosureInfo {closureSRT = srt}) = + case srt of NoC_SRT -> NoCafRefs + _ -> MayHaveCafRefs +clHasCafRefs (ConInfo {}) = NoCafRefs diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs new file mode 100644 index 0000000000..de1d77ad20 --- /dev/null +++ b/compiler/codeGen/StgCmmCon.hs @@ -0,0 +1,216 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C--: code generation for constructors +-- +-- This module provides the support code for StgCmm to deal with with +-- constructors on the RHSs of let(rec)s. +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmCon ( + cgTopRhsCon, buildDynCon, bindConArgs + ) where + +#include "HsVersions.h" + +import StgSyn +import CoreSyn ( AltCon(..) ) + +import StgCmmMonad +import StgCmmEnv +import StgCmmHeap +import StgCmmUtils +import StgCmmClosure +import StgCmmProf + +import Cmm +import CLabel +import SMRep +import CostCentre +import Constants +import DataCon +import FastString +import Id +import Literal +import PrelInfo +import Outputable +import Util ( lengthIs ) +import Char ( ord ) + + +--------------------------------------------------------------- +-- Top-level constructors +--------------------------------------------------------------- + +cgTopRhsCon :: Id -- Name of thing bound to this RHS + -> DataCon -- Id + -> [StgArg] -- Args + -> FCode (Id, CgIdInfo) +cgTopRhsCon id con args + = do { +#if mingw32_TARGET_OS + -- Windows DLLs have a problem with static cross-DLL refs. + ; this_pkg <- getThisPackage + ; ASSERT( not (isDllConApp this_pkg con args) ) return () +#endif + ; ASSERT( args `lengthIs` dataConRepArity con ) return () + + -- LAY IT OUT + ; let + name = idName id + lf_info = mkConLFInfo con + closure_label = mkClosureLabel name $ idCafInfo id + caffy = any stgArgHasCafRefs args + (closure_info, nv_args_w_offsets) + = layOutStaticConstr con (addArgReps args) + + get_lit (arg, _offset) = do { CmmLit lit <- getArgAmode arg + ; return lit } + + ; payload <- mapM get_lit nv_args_w_offsets + -- NB1: nv_args_w_offsets is sorted into ptrs then non-ptrs + -- NB2: all the amodes should be Lits! + + ; let closure_rep = mkStaticClosureFields + closure_info + dontCareCCS -- Because it's static data + caffy -- Has CAF refs + payload + + -- BUILD THE OBJECT + ; emitDataLits closure_label closure_rep + + -- RETURN + ; return (id, litIdInfo id lf_info (CmmLabel closure_label)) } + + +--------------------------------------------------------------- +-- Lay out and allocate non-top-level constructors +--------------------------------------------------------------- + +buildDynCon :: Id -- Name of the thing to which this constr will + -- be bound + -> CostCentreStack -- Where to grab cost centre from; + -- current CCS if currentOrSubsumedCCS + -> DataCon -- The data constructor + -> [StgArg] -- Its args + -> FCode CgIdInfo -- Return details about how to find it + +{- We used to pass a boolean indicating whether all the +args were of size zero, so we could use a static +construtor; but I concluded that it just isn't worth it. +Now I/O uses unboxed tuples there just aren't any constructors +with all size-zero args. + +The reason for having a separate argument, rather than looking at +the addr modes of the args is that we may be in a "knot", and +premature looking at the args will cause the compiler to black-hole! +-} + + +-------- buildDynCon: Nullary constructors -------------- +-- First we deal with the case of zero-arity constructors. They +-- will probably be unfolded, so we don't expect to see this case much, +-- if at all, but it does no harm, and sets the scene for characters. +-- +-- In the case of zero-arity constructors, or, more accurately, those +-- which have exclusively size-zero (VoidRep) args, we generate no code +-- at all. + +buildDynCon binder _cc con [] + = return (litIdInfo binder (mkConLFInfo con) + (CmmLabel (mkClosureLabel (dataConName con) (idCafInfo binder)))) + +-------- buildDynCon: Charlike and Intlike constructors ----------- +{- The following three paragraphs about @Char@-like and @Int@-like +closures are obsolete, but I don't understand the details well enough +to properly word them, sorry. I've changed the treatment of @Char@s to +be analogous to @Int@s: only a subset is preallocated, because @Char@ +has now 31 bits. Only literals are handled here. -- Qrczak + +Now for @Char@-like closures. We generate an assignment of the +address of the closure to a temporary. It would be possible simply to +generate no code, and record the addressing mode in the environment, +but we'd have to be careful if the argument wasn't a constant --- so +for simplicity we just always asssign to a temporary. + +Last special case: @Int@-like closures. We only special-case the +situation in which the argument is a literal in the range +@mIN_INTLIKE@..@mAX_INTLILKE@. NB: for @Char@-like closures we can +work with any old argument, but for @Int@-like ones the argument has +to be a literal. Reason: @Char@ like closures have an argument type +which is guaranteed in range. + +Because of this, we use can safely return an addressing mode. -} + +buildDynCon binder _cc con [arg] + | maybeIntLikeCon con + , StgLitArg (MachInt val) <- arg + , val <= fromIntegral mAX_INTLIKE -- Comparisons at type Integer! + , val >= fromIntegral mIN_INTLIKE -- ...ditto... + = do { let intlike_lbl = mkRtsGcPtrLabel (sLit "stg_INTLIKE_closure") + val_int = fromIntegral val :: Int + offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1) + -- INTLIKE closures consist of a header and one word payload + intlike_amode = cmmLabelOffW intlike_lbl offsetW + ; return (litIdInfo binder (mkConLFInfo con) intlike_amode) } + +buildDynCon binder _cc con [arg] + | maybeCharLikeCon con + , StgLitArg (MachChar val) <- arg + , let val_int = ord val :: Int + , val_int <= mAX_CHARLIKE + , val_int >= mIN_CHARLIKE + = do { let charlike_lbl = mkRtsGcPtrLabel (sLit "stg_CHARLIKE_closure") + offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1) + -- CHARLIKE closures consist of a header and one word payload + charlike_amode = cmmLabelOffW charlike_lbl offsetW + ; return (litIdInfo binder (mkConLFInfo con) charlike_amode) } + +-------- buildDynCon: the general case ----------- +buildDynCon binder ccs con args + = do { let (cl_info, args_w_offsets) = layOutDynConstr con (addArgReps args) + -- No void args in args_w_offsets + ; tmp <- allocDynClosure cl_info use_cc blame_cc args_w_offsets + ; return (regIdInfo binder lf_info tmp) } + where + lf_info = mkConLFInfo con + + use_cc -- cost-centre to stick in the object + | currentOrSubsumedCCS ccs = curCCS + | otherwise = CmmLit (mkCCostCentreStack ccs) + + blame_cc = use_cc -- cost-centre on which to blame the alloc (same) + + +--------------------------------------------------------------- +-- Binding constructor arguments +--------------------------------------------------------------- + +bindConArgs :: AltCon -> LocalReg -> [Id] -> FCode [LocalReg] +-- bindConArgs is called from cgAlt of a case +-- (bindConArgs con args) augments the environment with bindings for the +-- binders args, assuming that we have just returned from a 'case' which +-- found a con +bindConArgs (DataAlt con) base args + = ASSERT(not (isUnboxedTupleCon con)) + mapM bind_arg args_w_offsets + where + (_, args_w_offsets) = layOutDynConstr con (addIdReps args) + + tag = tagForCon con + + -- The binding below forces the masking out of the tag bits + -- when accessing the constructor field. + bind_arg :: (Id, VirtualHpOffset) -> FCode LocalReg + bind_arg (arg, offset) + = do { emit $ mkTaggedObjectLoad (idToReg arg) base offset tag + ; bindArgToReg arg } + +bindConArgs _other_con _base args + = ASSERT( null args ) return [] + + + diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs new file mode 100644 index 0000000000..c43bf80174 --- /dev/null +++ b/compiler/codeGen/StgCmmEnv.hs @@ -0,0 +1,209 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation: the binding environment +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmEnv ( + CgIdInfo, + + cgIdInfoId, cgIdInfoLF, + + litIdInfo, lneIdInfo, regIdInfo, + idInfoToAmode, + + addBindC, addBindsC, + + bindArgsToRegs, bindToReg, rebindToReg, + bindArgToReg, idToReg, + getArgAmode, getNonVoidArgAmodes, + getCgIdInfo, + maybeLetNoEscape, + ) where + +#include "HsVersions.h" + +import StgCmmMonad +import StgCmmUtils +import StgCmmClosure + +import CLabel + +import BlockId +import Cmm +import CmmUtils +import FastString +import PprCmm ( {- instance Outputable -} ) +import Id +import VarEnv +import Maybes +import Name +import StgSyn +import Outputable + + + +------------------------------------- +-- Manipulating CgIdInfo +------------------------------------- + +mkCgIdInfo :: Id -> LambdaFormInfo -> CmmExpr -> CgIdInfo +mkCgIdInfo id lf expr + = CgIdInfo { cg_id = id, cg_loc = CmmLoc expr, + cg_lf = lf, cg_rep = idPrimRep id, + cg_tag = lfDynTag lf } + +lneIdInfo :: Id -> [LocalReg] -> CgIdInfo +lneIdInfo id regs + = CgIdInfo { cg_id = id, cg_loc = LneLoc blk_id regs, + cg_lf = lf, cg_rep = idPrimRep id, + cg_tag = lfDynTag lf } + where + lf = mkLFLetNoEscape + blk_id = mkBlockId (idUnique id) + +litIdInfo :: Id -> LambdaFormInfo -> CmmLit -> CgIdInfo +litIdInfo id lf_info lit = mkCgIdInfo id lf_info (CmmLit lit) + +regIdInfo :: Id -> LambdaFormInfo -> LocalReg -> CgIdInfo +regIdInfo id lf_info reg = mkCgIdInfo id lf_info (CmmReg (CmmLocal reg)) + +idInfoToAmode :: CgIdInfo -> CmmExpr +-- Returns a CmmExpr for the *tagged* pointer +idInfoToAmode (CgIdInfo { cg_loc = CmmLoc e, cg_tag = tag }) + = addDynTag e tag +idInfoToAmode cg_info + = pprPanic "idInfoToAmode" (ppr (cg_id cg_info)) -- LneLoc + +addDynTag :: CmmExpr -> DynTag -> CmmExpr +-- A tag adds a byte offset to the pointer +addDynTag expr tag = cmmOffsetB expr tag + +cgIdInfoId :: CgIdInfo -> Id +cgIdInfoId = cg_id + +cgIdInfoLF :: CgIdInfo -> LambdaFormInfo +cgIdInfoLF = cg_lf + +maybeLetNoEscape :: CgIdInfo -> Maybe (BlockId, [LocalReg]) +maybeLetNoEscape (CgIdInfo { cg_loc = LneLoc blk_id args}) = Just (blk_id, args) +maybeLetNoEscape _other = Nothing + + + +--------------------------------------------------------- +-- The binding environment +-- +-- There are three basic routines, for adding (addBindC), +-- modifying(modifyBindC) and looking up (getCgIdInfo) bindings. +--------------------------------------------------------- + +addBindC :: Id -> CgIdInfo -> FCode () +addBindC name stuff_to_bind = do + binds <- getBinds + setBinds $ extendVarEnv binds name stuff_to_bind + +addBindsC :: [(Id, CgIdInfo)] -> FCode () +addBindsC new_bindings = do + binds <- getBinds + let new_binds = foldl (\ binds (name,info) -> extendVarEnv binds name info) + binds + new_bindings + setBinds new_binds + +getCgIdInfo :: Id -> FCode CgIdInfo +getCgIdInfo id + = do { -- Try local bindings first + ; local_binds <- getBinds + ; case lookupVarEnv local_binds id of { + Just info -> return info ; + Nothing -> do + + { -- Try top-level bindings + static_binds <- getStaticBinds + ; case lookupVarEnv static_binds id of { + Just info -> return info ; + Nothing -> + + -- Should be imported; make up a CgIdInfo for it + let + name = idName id + in + if isExternalName name then do + let ext_lbl = CmmLabel (mkClosureLabel name $ idCafInfo id) + return (litIdInfo id (mkLFImported id) ext_lbl) + else + -- Bug + cgLookupPanic id + }}}} + +cgLookupPanic :: Id -> FCode a +cgLookupPanic id + = do static_binds <- getStaticBinds + local_binds <- getBinds + srt <- getSRTLabel + pprPanic "StgCmmEnv: variable not found" + (vcat [ppr id, + ptext (sLit "static binds for:"), + vcat [ ppr (cg_id info) | info <- varEnvElts static_binds ], + ptext (sLit "local binds for:"), + vcat [ ppr (cg_id info) | info <- varEnvElts local_binds ], + ptext (sLit "SRT label") <+> pprCLabel srt + ]) + + +-------------------- +getArgAmode :: StgArg -> FCode CmmExpr +getArgAmode (StgVarArg var) = do { info <- getCgIdInfo var; return (idInfoToAmode info) } +getArgAmode (StgLitArg lit) = return (CmmLit (mkSimpleLit lit)) +getArgAmode (StgTypeArg _) = panic "getArgAmode: type arg" + +getNonVoidArgAmodes :: [StgArg] -> FCode [CmmExpr] +-- NB: Filters out void args, +-- so the result list may be shorter than the argument list +getNonVoidArgAmodes [] = return [] +getNonVoidArgAmodes (arg:args) + | isVoidRep (argPrimRep arg) = getNonVoidArgAmodes args + | otherwise = do { amode <- getArgAmode arg + ; amodes <- getNonVoidArgAmodes args + ; return ( amode : amodes ) } + + +------------------------------------------------------------------------ +-- Interface functions for binding and re-binding names +------------------------------------------------------------------------ + +bindToReg :: Id -> LambdaFormInfo -> FCode LocalReg +-- Bind an Id to a fresh LocalReg +bindToReg id lf_info + = do { let reg = idToReg id + ; addBindC id (regIdInfo id lf_info reg) + ; return reg } + +rebindToReg :: Id -> FCode LocalReg +-- Like bindToReg, but the Id is already in scope, so +-- get its LF info from the envt +rebindToReg id + = do { info <- getCgIdInfo id + ; bindToReg id (cgIdInfoLF info) } + +bindArgToReg :: Id -> FCode LocalReg +bindArgToReg id = bindToReg id (mkLFArgument id) + +bindArgsToRegs :: [Id] -> FCode [LocalReg] +bindArgsToRegs args = mapM bindArgToReg args + +idToReg :: Id -> LocalReg +-- Make a register from an Id, typically a function argument, +-- free variable, or case binder +-- +-- We re-use the Unique from the Id to make it easier to see what is going on +-- +-- By now the Ids should be uniquely named; else one would worry +-- about accidental collision +idToReg id = LocalReg (idUnique id) + (primRepCmmType (idPrimRep id)) + + diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs new file mode 100644 index 0000000000..74c69b7216 --- /dev/null +++ b/compiler/codeGen/StgCmmExpr.hs @@ -0,0 +1,451 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation: expressions +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmExpr ( cgExpr ) where + +#define FAST_STRING_NOT_NEEDED +#include "HsVersions.h" + +import {-# SOURCE #-} StgCmmBind ( cgBind ) + +import StgCmmMonad +import StgCmmHeap +import StgCmmEnv +import StgCmmCon +import StgCmmProf +import StgCmmLayout +import StgCmmPrim +import StgCmmHpc +import StgCmmTicky +import StgCmmUtils +import StgCmmClosure + +import StgSyn + +import MkZipCfgCmm +import BlockId +import Cmm() +import CmmExpr +import CoreSyn +import DataCon +import Id +import TyCon +import CostCentre ( CostCentreStack, currentCCS ) +import Maybes +import Util +import FastString +import Outputable + +------------------------------------------------------------------------ +-- cgExpr: the main function +------------------------------------------------------------------------ + +cgExpr :: StgExpr -> FCode () + +cgExpr (StgApp fun args) = cgIdApp fun args +cgExpr (StgOpApp op args ty) = cgOpApp op args ty +cgExpr (StgConApp con args) = cgConApp con args + +cgExpr (StgSCC cc expr) = do { emitSetCCC cc; cgExpr expr } +cgExpr (StgTick m n expr) = do { emit (mkTickBox m n); cgExpr expr } +cgExpr (StgLit lit) = emitReturn [CmmLit (mkSimpleLit lit)] + +cgExpr (StgLet binds expr) = do { emit (mkComment $ mkFastString "calling cgBind"); cgBind binds; emit (mkComment $ mkFastString "calling cgExpr"); cgExpr expr } +cgExpr (StgLetNoEscape _ _ binds expr) = do { cgLneBinds binds; cgExpr expr } + +cgExpr (StgCase expr _live_vars _save_vars bndr srt alt_type alts) + = cgCase expr bndr srt alt_type alts + +cgExpr (StgLam {}) = panic "cgExpr: StgLam" + +------------------------------------------------------------------------ +-- Let no escape +------------------------------------------------------------------------ + +{- Generating code for a let-no-escape binding, aka join point is very +very similar to whatwe do for a case expression. The duality is +between + let-no-escape x = b + in e +and + case e of ... -> b + +That is, the RHS of 'x' (ie 'b') will execute *later*, just like +the alternative of the case; it needs to be compiled in an environment +in which all volatile bindings are forgotten, and the free vars are +bound only to stable things like stack locations.. The 'e' part will +execute *next*, just like the scrutinee of a case. -} + +------------------------- +cgLneBinds :: StgBinding -> FCode () +cgLneBinds (StgNonRec bndr rhs) + = do { local_cc <- saveCurrentCostCentre + -- See Note [Saving the current cost centre] + ; (bndr,info) <- cgLetNoEscapeRhs local_cc bndr rhs + ; addBindC bndr info } + +cgLneBinds (StgRec pairs) + = do { local_cc <- saveCurrentCostCentre + ; new_bindings <- fixC (\ new_bindings -> do + { addBindsC new_bindings + ; listFCs [ cgLetNoEscapeRhs local_cc b e + | (b,e) <- pairs ] }) + + ; addBindsC new_bindings } + +------------------------- +cgLetNoEscapeRhs + :: Maybe LocalReg -- Saved cost centre + -> Id + -> StgRhs + -> FCode (Id, CgIdInfo) + +cgLetNoEscapeRhs local_cc bndr (StgRhsClosure cc _bi _ _upd srt args body) + = cgLetNoEscapeClosure bndr local_cc cc srt args body +cgLetNoEscapeRhs local_cc bndr (StgRhsCon cc con args) + = cgLetNoEscapeClosure bndr local_cc cc NoSRT [] (StgConApp con args) + -- For a constructor RHS we want to generate a single chunk of + -- code which can be jumped to from many places, which will + -- return the constructor. It's easy; just behave as if it + -- was an StgRhsClosure with a ConApp inside! + +------------------------- +cgLetNoEscapeClosure + :: Id -- binder + -> Maybe LocalReg -- Slot for saved current cost centre + -> CostCentreStack -- XXX: *** NOT USED *** why not? + -> SRT + -> [Id] -- Args (as in \ args -> body) + -> StgExpr -- Body (as in above) + -> FCode (Id, CgIdInfo) + +cgLetNoEscapeClosure bndr cc_slot _unused_cc srt args body + = do { arg_regs <- forkProc $ do + { restoreCurrentCostCentre cc_slot + ; arg_regs <- bindArgsToRegs args + ; c_srt <- getSRTInfo srt + ; altHeapCheck arg_regs c_srt (cgExpr body) + -- Using altHeapCheck just reduces + -- instructions to save on stack + ; return arg_regs } + ; return (bndr, lneIdInfo bndr arg_regs) } + + +------------------------------------------------------------------------ +-- Case expressions +------------------------------------------------------------------------ + +{- Note [Compiling case expressions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It is quite interesting to decide whether to put a heap-check at the +start of each alternative. Of course we certainly have to do so if +the case forces an evaluation, or if there is a primitive op which can +trigger GC. + +A more interesting situation is this (a Plan-B situation) + + !P!; + ...P... + case x# of + 0# -> !Q!; ...Q... + default -> !R!; ...R... + +where !x! indicates a possible heap-check point. The heap checks +in the alternatives *can* be omitted, in which case the topmost +heapcheck will take their worst case into account. + +In favour of omitting !Q!, !R!: + + - *May* save a heap overflow test, + if ...P... allocates anything. + + - We can use relative addressing from a single Hp to + get at all the closures so allocated. + + - No need to save volatile vars etc across heap checks + in !Q!, !R! + +Against omitting !Q!, !R! + + - May put a heap-check into the inner loop. Suppose + the main loop is P -> R -> P -> R... + Q is the loop exit, and only it does allocation. + This only hurts us if P does no allocation. If P allocates, + then there is a heap check in the inner loop anyway. + + - May do more allocation than reqd. This sometimes bites us + badly. For example, nfib (ha!) allocates about 30\% more space if the + worst-casing is done, because many many calls to nfib are leaf calls + which don't need to allocate anything. + + We can un-allocate, but that costs an instruction + +Neither problem hurts us if there is only one alternative. + +Suppose the inner loop is P->R->P->R etc. Then here is +how many heap checks we get in the *inner loop* under various +conditions + + Alooc Heap check in branches (!Q!, !R!)? + P Q R yes no (absorb to !P!) +-------------------------------------- + n n n 0 0 + n y n 0 1 + n . y 1 1 + y . y 2 1 + y . n 1 1 + +Best choices: absorb heap checks from Q and R into !P! iff + a) P itself does some allocation +or + b) P does allocation, or there is exactly one alternative + +We adopt (b) because that is more likely to put the heap check at the +entry to a function, when not many things are live. After a bunch of +single-branch cases, we may have lots of things live + +Hence: two basic plans for + + case e of r { alts } + +------ Plan A: the general case --------- + + ...save current cost centre... + + ...code for e, + with sequel (SetLocals r) + + ...restore current cost centre... + ...code for alts... + ...alts do their own heap checks + +------ Plan B: special case when --------- + (i) e does not allocate or call GC + (ii) either upstream code performs allocation + or there is just one alternative + + Then heap allocation in the (single) case branch + is absorbed by the upstream check. + Very common example: primops on unboxed values + + ...code for e, + with sequel (SetLocals r)... + + ...code for alts... + ...no heap check... +-} + + + +------------------------------------- +data GcPlan + = GcInAlts -- Put a GC check at the start the case alternatives, + [LocalReg] -- which binds these registers + SRT -- using this SRT + | NoGcInAlts -- The scrutinee is a primitive value, or a call to a + -- primitive op which does no GC. Absorb the allocation + -- of the case alternative(s) into the upstream check + +------------------------------------- +cgCase :: StgExpr -> Id -> SRT -> AltType -> [StgAlt] -> FCode () +cgCase scrut bndr srt alt_type alts + = do { up_hp_usg <- getVirtHp -- Upstream heap usage + ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts + alt_regs = map idToReg ret_bndrs + simple_scrut = isSimpleScrut scrut alt_type + gc_plan | not simple_scrut = GcInAlts alt_regs srt + | isSingleton alts = NoGcInAlts + | up_hp_usg > 0 = NoGcInAlts + | otherwise = GcInAlts alt_regs srt + + ; mb_cc <- maybeSaveCostCentre simple_scrut + ; c_srt <- getSRTInfo srt + ; withSequel (AssignTo alt_regs c_srt) + (cgExpr scrut) + ; restoreCurrentCostCentre mb_cc + + ; bindArgsToRegs ret_bndrs + ; cgAlts gc_plan bndr alt_type alts } + +----------------- +maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg) +maybeSaveCostCentre simple_scrut + | simple_scrut = saveCurrentCostCentre + | otherwise = return Nothing + + + +----------------- +isSimpleScrut :: StgExpr -> AltType -> Bool +-- Simple scrutinee, does not allocate +isSimpleScrut (StgOpApp _ _ _) _ = True +isSimpleScrut (StgLit _) _ = True +isSimpleScrut (StgApp _ []) (PrimAlt _) = True +isSimpleScrut _ _ = False + +----------------- +chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [Id] +-- These are the binders of a case that are assigned +-- by the evaluation of the scrutinee +-- Only non-void ones come back +chooseReturnBndrs bndr (PrimAlt _) _alts + = nonVoidIds [bndr] + +chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)] + = nonVoidIds ids -- 'bndr' is not assigned! + +chooseReturnBndrs bndr (AlgAlt _) _alts + = [bndr] -- Only 'bndr' is assigned + +chooseReturnBndrs bndr PolyAlt _alts + = [bndr] -- Only 'bndr' is assigned + +chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs" + -- UbxTupALt has only one alternative + +nonVoidIds :: [Id] -> [Id] +nonVoidIds ids = [id | id <- ids, not (isVoidRep (idPrimRep id))] + +------------------------------------- +cgAlts :: GcPlan -> Id -> AltType -> [StgAlt] -> FCode () +-- At this point the result of the case are in the binders +cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)] + = maybeAltHeapCheck gc_plan (cgExpr rhs) + +cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)] + = maybeAltHeapCheck gc_plan (cgExpr rhs) + -- Here bndrs are *already* in scope, so don't rebind them + +cgAlts gc_plan bndr (PrimAlt _) alts + = do { tagged_cmms <- cgAltRhss gc_plan bndr alts + + ; let bndr_reg = CmmLocal (idToReg bndr) + (DEFAULT,deflt) = head tagged_cmms + -- PrimAlts always have a DEFAULT case + -- and it always comes first + + tagged_cmms' = [(lit,code) + | (LitAlt lit, code) <- tagged_cmms] + ; emit (mkCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt) } + +cgAlts gc_plan bndr (AlgAlt tycon) alts + = do { tagged_cmms <- cgAltRhss gc_plan bndr alts + + ; let fam_sz = tyConFamilySize tycon + bndr_reg = CmmLocal (idToReg bndr) + mb_deflt = case tagged_cmms of + ((DEFAULT,rhs) : _) -> Just rhs + _other -> Nothing + -- DEFAULT is always first, if present + + branches = [ (dataConTagZ con, cmm) + | (DataAlt con, cmm) <- tagged_cmms ] + + -- Is the constructor tag in the node reg? + ; if isSmallFamily fam_sz + then let -- Yes, bndr_reg has constr. tag in ls bits + tag_expr = cmmConstrTag1 (CmmReg bndr_reg) + branches' = [(tag+1,branch) | (tag,branch) <- branches] + in + emitSwitch tag_expr branches' mb_deflt 1 fam_sz + + else -- No, get tag from info table + let -- Note that ptr _always_ has tag 1 + -- when the family size is big enough + untagged_ptr = cmmRegOffB bndr_reg (-1) + tag_expr = getConstrTag (untagged_ptr) + in + emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) } + +cgAlts _ _ _ _ = panic "cgAlts" + -- UbxTupAlt and PolyAlt have only one alternative + +------------------- +cgAltRhss :: GcPlan -> Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] +cgAltRhss gc_plan bndr alts + = forkAlts (map cg_alt alts) + where + base_reg = idToReg bndr + cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph) + cg_alt (con, bndrs, _uses, rhs) + = getCodeR $ + maybeAltHeapCheck gc_plan $ + do { bindConArgs con base_reg bndrs + ; cgExpr rhs + ; return con } + +maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a +maybeAltHeapCheck NoGcInAlts code + = code +maybeAltHeapCheck (GcInAlts regs srt) code + = do { c_srt <- getSRTInfo srt + ; altHeapCheck regs c_srt code } + +----------------------------------------------------------------------------- +-- Tail calls +----------------------------------------------------------------------------- + +cgConApp :: DataCon -> [StgArg] -> FCode () +cgConApp con stg_args + = ASSERT( stg_args `lengthIs` dataConRepArity con ) + do { idinfo <- buildDynCon (dataConWorkId con) currentCCS con stg_args + -- The first "con" says that the name bound to this closure is + -- is "con", which is a bit of a fudge, but it only affects profiling + + ; emitReturn [idInfoToAmode idinfo] } + +cgIdApp :: Id -> [StgArg] -> FCode () +cgIdApp fun_id args + = do { fun_info <- getCgIdInfo fun_id + ; case maybeLetNoEscape fun_info of + Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args + Nothing -> cgTailCall fun_id fun_info args } + +cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode () +cgLneJump blk_id lne_regs args -- Join point; discard sequel + = do { cmm_args <- getNonVoidArgAmodes args + ; emit (mkMultiAssign lne_regs cmm_args + <*> mkBranch blk_id) } + +cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode () +cgTailCall fun_id fun_info args + = case (getCallMethod fun_name (idCafInfo fun_id) lf_info (length args)) of + + -- A value in WHNF, so we can just return it. + ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged? + + EnterIt -> ASSERT( null args ) -- Discarding arguments + do { [ret,call] <- forkAlts [ + getCode $ emitReturn [fun], -- Is tagged; no need to untag + getCode $ emitCall (entryCode fun) [fun]] -- Not tagged + ; emit (mkCmmIfThenElse (cmmIsTagged fun) ret call) } + + SlowCall -> do -- A slow function call via the RTS apply routines + { tickySlowCall lf_info args + ; slowCall fun args } + + -- A direct function call (possibly with some left-over arguments) + DirectEntry lbl arity -> do + { tickyDirectCall arity args + ; if node_points then + do call <- getCode $ directCall lbl arity args + emit (mkAssign nodeReg fun <*> call) + -- directCall lbl (arity+1) (StgVarArg fun_id : args)) + -- >>= (emit . (mkComment (mkFastString "DirectEntry") <*>)) + else directCall lbl arity args } + + JumpToIt {} -> panic "cgTailCall" -- ??? + + where + fun_name = idName fun_id + fun = idInfoToAmode fun_info + lf_info = cgIdInfoLF fun_info + node_points = nodeMustPointToIt lf_info + + + diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs new file mode 100644 index 0000000000..2d5d79e6ff --- /dev/null +++ b/compiler/codeGen/StgCmmForeign.hs @@ -0,0 +1,316 @@ +{-# OPTIONS -w #-} +-- Lots of missing type sigs etc + +----------------------------------------------------------------------------- +-- +-- Code generation for foreign calls. +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmForeign ( + cgForeignCall, + emitPrimCall, emitCCall, + emitSaveThreadState, -- will be needed by the Cmm parser + emitLoadThreadState, -- ditto + emitCloseNursery, + emitOpenNursery, + ) where + +#include "HsVersions.h" + +import StgSyn +import StgCmmProf +import StgCmmEnv +import StgCmmMonad +import StgCmmUtils +import StgCmmClosure + +import MkZipCfgCmm +import Cmm +import CmmUtils +import Type +import TysPrim +import CLabel +import SMRep +import ForeignCall +import Constants +import StaticFlags +import Maybes +import Outputable + +import Control.Monad + +----------------------------------------------------------------------------- +-- Code generation for Foreign Calls +----------------------------------------------------------------------------- + +cgForeignCall :: [LocalReg] -- r1,r2 where to put the results + -> [ForeignHint] + -> ForeignCall -- the op + -> [StgArg] -- x,y arguments + -> FCode () +-- Emits code for an unsafe foreign call: r1, r2 = foo( x, y, z ) + +cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_args + = do { cmm_args <- getFCallArgs stg_args + ; let (args, arg_hints) = unzip cmm_args + fc = ForeignConvention cconv arg_hints result_hints + (call_args, cmm_target) + = case target of + StaticTarget lbl -> (args, CmmLit (CmmLabel + (mkForeignLabel lbl (call_size args) False))) + DynamicTarget -> case args of fn:rest -> (rest, fn) + call_target = ForeignTarget cmm_target fc + + ; srt <- getSRTInfo (panic "emitForeignCall") -- SLPJ: Not sure what SRT + -- is right here + ; emitForeignCall safety results call_target call_args srt CmmMayReturn } + where + -- in the stdcall calling convention, the symbol needs @size appended + -- to it, where size is the total number of bytes of arguments. We + -- attach this info to the CLabel here, and the CLabel pretty printer + -- will generate the suffix when the label is printed. + call_size args + | StdCallConv <- cconv = Just (sum (map arg_size args)) + | otherwise = Nothing + + -- ToDo: this might not be correct for 64-bit API + arg_size arg = max (widthInBytes $ typeWidth $ cmmExprType arg) wORD_SIZE + +cgForeignCall _ _ (DNCall _) _ + = panic "cgForeignCall: DNCall" + +emitCCall :: [(CmmFormal,ForeignHint)] + -> CmmExpr + -> [(CmmActual,ForeignHint)] + -> FCode () +emitCCall hinted_results fn hinted_args + = emitForeignCall PlayRisky results (ForeignTarget fn fc) args + NoC_SRT -- No SRT b/c we PlayRisky + CmmMayReturn + where + (args, arg_hints) = unzip hinted_args + (results, result_hints) = unzip hinted_results + target = ForeignTarget fn fc + fc = ForeignConvention CCallConv arg_hints result_hints + + +emitPrimCall :: CmmFormal -> CallishMachOp -> CmmActuals -> FCode () +emitPrimCall res op args + = emitForeignCall PlayRisky [res] (PrimTarget op) args NoC_SRT CmmMayReturn + +-- alternative entry point, used by CmmParse +emitForeignCall + :: Safety + -> CmmFormals -- where to put the results + -> MidCallTarget -- the op + -> CmmActuals -- arguments + -> C_SRT -- the SRT of the calls continuation + -> CmmReturnInfo -- This can say "never returns" + -- only RTS procedures do this + -> FCode () +emitForeignCall safety results target args _srt _ret + | not (playSafe safety) = trace "emitForeignCall; ret is undone" $ do + let (caller_save, caller_load) = callerSaveVolatileRegs + emit caller_save + emit (mkUnsafeCall target results args) + emit caller_load + + | otherwise = panic "ToDo: emitForeignCall'" + +{- + | otherwise = do + -- Both 'id' and 'new_base' are KindNonPtr because they're + -- RTS only objects and are not subject to garbage collection + id <- newTemp bWord + new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) + temp_target <- load_target_into_temp target + let (caller_save, caller_load) = callerSaveVolatileRegs + emitSaveThreadState + emit caller_save + -- The CmmUnsafe arguments are only correct because this part + -- of the code hasn't been moved into the CPS pass yet. + -- Once that happens, this function will just emit a (CmmSafe srt) call, + -- and the CPS will will be the one to convert that + -- to this sequence of three CmmUnsafe calls. + emit (mkCmmCall (CmmCallee suspendThread CCallConv) + [ (id,AddrHint) ] + [ (CmmReg (CmmGlobal BaseReg), AddrHint) ] + CmmUnsafe + ret) + emit (mkCmmCall temp_target results args CmmUnsafe ret) + emit (mkCmmCall (CmmCallee resumeThread CCallConv) + [ (new_base, AddrHint) ] + [ (CmmReg (CmmLocal id), AddrHint) ] + CmmUnsafe + ret ) + -- Assign the result to BaseReg: we + -- might now have a different Capability! + emit (mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base))) + emit caller_load + emitLoadThreadState + +suspendThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("suspendThread"))) +resumeThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("resumeThread"))) +-} + + +{- +-- THINK ABOUT THIS (used to happen) +-- we might need to load arguments into temporaries before +-- making the call, because certain global registers might +-- overlap with registers that the C calling convention uses +-- for passing arguments. +-- +-- This is a HACK; really it should be done in the back end, but +-- it's easier to generate the temporaries here. +load_args_into_temps = mapM arg_assign_temp + where arg_assign_temp (e,hint) = do + tmp <- maybe_assign_temp e + return (tmp,hint) + +load_target_into_temp (CmmCallee expr conv) = do + tmp <- maybe_assign_temp expr + return (CmmCallee tmp conv) +load_target_into_temp other_target = + return other_target + +maybe_assign_temp e + | hasNoGlobalRegs e = return e + | otherwise = do + -- don't use assignTemp, it uses its own notion of "trivial" + -- expressions, which are wrong here. + -- this is a NonPtr because it only duplicates an existing + reg <- newTemp (cmmExprType e) --TODO FIXME NOW + emit (mkAssign (CmmLocal reg) e) + return (CmmReg (CmmLocal reg)) +-} + +-- ----------------------------------------------------------------------------- +-- Save/restore the thread state in the TSO + +-- This stuff can't be done in suspendThread/resumeThread, because it +-- refers to global registers which aren't available in the C world. + +emitSaveThreadState :: FCode () +emitSaveThreadState = do + -- CurrentTSO->sp = Sp; + emit $ mkStore (cmmOffset stgCurrentTSO tso_SP) stgSp + emitCloseNursery + -- and save the current cost centre stack in the TSO when profiling: + when opt_SccProfilingOn $ + emit (mkStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS) + + -- CurrentNursery->free = Hp+1; +emitCloseNursery :: FCode () +emitCloseNursery = emit $ mkStore nursery_bdescr_free (cmmOffsetW stgHp 1) + +emitLoadThreadState :: FCode () +emitLoadThreadState = do + tso <- newTemp gcWord -- TODO FIXME NOW + emit $ catAGraphs [ + -- tso = CurrentTSO; + mkAssign (CmmLocal tso) stgCurrentTSO, + -- Sp = tso->sp; + mkAssign sp (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_SP) + bWord), + -- SpLim = tso->stack + RESERVED_STACK_WORDS; + mkAssign spLim (cmmOffsetW (cmmOffset (CmmReg (CmmLocal tso)) tso_STACK) + rESERVED_STACK_WORDS) + ] + emitOpenNursery + -- and load the current cost centre stack from the TSO when profiling: + when opt_SccProfilingOn $ + emit (mkStore curCCSAddr + (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) ccsType)) + +emitOpenNursery :: FCode () +emitOpenNursery = emit $ catAGraphs [ + -- Hp = CurrentNursery->free - 1; + mkAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free bWord) (-1)), + + -- HpLim = CurrentNursery->start + + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; + mkAssign hpLim + (cmmOffsetExpr + (CmmLoad nursery_bdescr_start bWord) + (cmmOffset + (CmmMachOp mo_wordMul [ + CmmMachOp (MO_SS_Conv W32 wordWidth) + [CmmLoad nursery_bdescr_blocks b32], + CmmLit (mkIntCLit bLOCK_SIZE) + ]) + (-1) + ) + ) + ] + + +nursery_bdescr_free = cmmOffset stgCurrentNursery oFFSET_bdescr_free +nursery_bdescr_start = cmmOffset stgCurrentNursery oFFSET_bdescr_start +nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks + +tso_SP = tsoFieldB oFFSET_StgTSO_sp +tso_STACK = tsoFieldB oFFSET_StgTSO_stack +tso_CCCS = tsoProfFieldB oFFSET_StgTSO_CCCS + +-- The TSO struct has a variable header, and an optional StgTSOProfInfo in +-- the middle. The fields we're interested in are after the StgTSOProfInfo. +tsoFieldB :: ByteOff -> ByteOff +tsoFieldB off + | opt_SccProfilingOn = off + sIZEOF_StgTSOProfInfo + fixedHdrSize * wORD_SIZE + | otherwise = off + fixedHdrSize * wORD_SIZE + +tsoProfFieldB :: ByteOff -> ByteOff +tsoProfFieldB off = off + fixedHdrSize * wORD_SIZE + +stgSp = CmmReg sp +stgHp = CmmReg hp +stgCurrentTSO = CmmReg currentTSO +stgCurrentNursery = CmmReg currentNursery + +sp = CmmGlobal Sp +spLim = CmmGlobal SpLim +hp = CmmGlobal Hp +hpLim = CmmGlobal HpLim +currentTSO = CmmGlobal CurrentTSO +currentNursery = CmmGlobal CurrentNursery + +-- ----------------------------------------------------------------------------- +-- For certain types passed to foreign calls, we adjust the actual +-- value passed to the call. For ByteArray#/Array# we pass the +-- address of the actual array, not the address of the heap object. + +getFCallArgs :: [StgArg] -> FCode [(CmmExpr, ForeignHint)] +-- (a) Drop void args +-- (b) Add foriegn-call shim code +-- It's (b) that makes this differ from getNonVoidArgAmodes + +getFCallArgs args + = do { mb_cmms <- mapM get args + ; return (catMaybes mb_cmms) } + where + get arg | isVoidRep arg_rep + = return Nothing + | otherwise + = do { cmm <- getArgAmode arg + ; return (Just (add_shim arg_ty cmm, hint)) } + where + arg_ty = stgArgType arg + arg_rep = typePrimRep arg_ty + hint = typeForeignHint arg_ty + +add_shim :: Type -> CmmExpr -> CmmExpr +add_shim arg_ty expr + | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon + = cmmOffsetB expr arrPtrsHdrSize + + | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon + = cmmOffsetB expr arrWordsHdrSize + + | otherwise = expr + where + tycon = tyConAppTyCon (repType arg_ty) + -- should be a tycon app, since this is a foreign call diff --git a/compiler/codeGen/StgCmmGran.hs b/compiler/codeGen/StgCmmGran.hs new file mode 100644 index 0000000000..5fad2bfc09 --- /dev/null +++ b/compiler/codeGen/StgCmmGran.hs @@ -0,0 +1,131 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow -2006 +-- +-- Code generation relaed to GpH +-- (a) parallel +-- (b) GranSim +-- +----------------------------------------------------------------------------- + +{-# OPTIONS -w #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + +module StgCmmGran ( + staticGranHdr,staticParHdr, + granThunk, granYield, + doGranAllocate + ) where + +-- This entire module consists of no-op stubs at the moment +-- GranSim worked once, but it certainly doesn't any more +-- I've left the calls, though, in case anyone wants to resurrect it + +import StgCmmMonad +import Id +import Cmm + +staticGranHdr :: [CmmLit] +staticGranHdr = [] + +staticParHdr :: [CmmLit] +staticParHdr = [] + +doGranAllocate :: VirtualHpOffset -> FCode () +-- Must be lazy in the amount of allocation +doGranAllocate n = return () + +granFetchAndReschedule :: [(Id,GlobalReg)] -> Bool -> FCode () +granFetchAndReschedule regs node_reqd = return () + +granYield :: [LocalReg] -> Bool -> FCode () +granYield regs node_reqd = return () + +granThunk :: Bool -> FCode () +granThunk node_points = return () + +----------------------------------------------------------------- +{- ------- Everything below here is commented out ------------- +----------------------------------------------------------------- + +-- Parallel header words in a static closure +staticParHdr :: [CmmLit] +-- Parallel header words in a static closure +staticParHdr = [] + +staticGranHdr :: [CmmLit] +-- Gransim header words in a static closure +staticGranHdr = [] + +doGranAllocate :: CmmExpr -> Code +-- macro DO_GRAN_ALLOCATE +doGranAllocate hp + | not opt_GranMacros = nopC + | otherwise = panic "doGranAllocate" + + + +------------------------- +granThunk :: Bool -> FCode () +-- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node +-- (we prefer fetchAndReschedule-style context switches to yield ones) +granThunk node_points + | node_points = granFetchAndReschedule [] node_points + | otherwise = granYield [] node_points + +granFetchAndReschedule :: [(Id,GlobalReg)] -- Live registers + -> Bool -- Node reqd? + -> Code +-- Emit code for simulating a fetch and then reschedule. +granFetchAndReschedule regs node_reqd + | opt_GranMacros && (node `elem` map snd regs || node_reqd) + = do { fetch + ; reschedule liveness node_reqd } + | otherwise + = nopC + where + liveness = mkRegLiveness regs 0 0 + +fetch = panic "granFetch" + -- Was: absC (CMacroStmt GRAN_FETCH []) + --HWL: generate GRAN_FETCH macro for GrAnSim + -- currently GRAN_FETCH and GRAN_FETCH_AND_RESCHEDULE are miai + +reschedule liveness node_reqd = panic "granReschedule" + -- Was: absC (CMacroStmt GRAN_RESCHEDULE [ + -- mkIntCLit (I# (word2Int# liveness_mask)), + -- mkIntCLit (if node_reqd then 1 else 0)]) + + +------------------------- +-- The @GRAN_YIELD@ macro is taken from JSM's code for Concurrent Haskell. It +-- allows to context-switch at places where @node@ is not alive (it uses the +-- @Continue@ rather than the @EnterNodeCode@ function in the RTS). We emit +-- this kind of macro at the beginning of the following kinds of basic bocks: +-- \begin{itemize} +-- \item Slow entry code where node is not alive (see @CgClosure.lhs@). Normally +-- we use @fetchAndReschedule@ at a slow entry code. +-- \item Fast entry code (see @CgClosure.lhs@). +-- \item Alternatives in case expressions (@CLabelledCode@ structures), provided +-- that they are not inlined (see @CgCases.lhs@). These alternatives will +-- be turned into separate functions. + +granYield :: [(Id,GlobalReg)] -- Live registers + -> Bool -- Node reqd? + -> Code + +granYield regs node_reqd + | opt_GranMacros && node_reqd = yield liveness + | otherwise = nopC + where + liveness = mkRegLiveness regs 0 0 + +yield liveness = panic "granYield" + -- Was : absC (CMacroStmt GRAN_YIELD + -- [mkIntCLit (I# (word2Int# liveness_mask))]) + +-} diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs new file mode 100644 index 0000000000..6a8a4354e1 --- /dev/null +++ b/compiler/codeGen/StgCmmHeap.hs @@ -0,0 +1,519 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C--: heap management functions +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmHeap ( + getVirtHp, setVirtHp, setRealHp, + getHpRelOffset, hpRel, + + entryHeapCheck, altHeapCheck, + + layOutDynConstr, layOutStaticConstr, + mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure, + + allocDynClosure, emitSetDynHdr + ) where + +#include "HsVersions.h" + +import StgSyn +import CLabel +import StgCmmLayout +import StgCmmUtils +import StgCmmMonad +import StgCmmProf +import StgCmmTicky +import StgCmmGran +import StgCmmClosure +import StgCmmEnv + +import MkZipCfgCmm + +import SMRep +import CmmExpr +import CmmUtils +import DataCon +import TyCon +import CostCentre +import Outputable +import FastString( LitString, mkFastString, sLit ) +import Constants +import Data.List + + +----------------------------------------------------------- +-- Layout of heap objects +----------------------------------------------------------- + +layOutDynConstr, layOutStaticConstr + :: DataCon -> [(PrimRep, a)] + -> (ClosureInfo, [(a, VirtualHpOffset)]) +-- No Void arguments in result + +layOutDynConstr = layOutConstr False +layOutStaticConstr = layOutConstr True + +layOutConstr :: Bool -> DataCon -> [(PrimRep, a)] + -> (ClosureInfo, [(a, VirtualHpOffset)]) +layOutConstr is_static data_con args + = (mkConInfo is_static data_con tot_wds ptr_wds, + things_w_offsets) + where + (tot_wds, -- #ptr_wds + #nonptr_wds + ptr_wds, -- #ptr_wds + things_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} args + + +----------------------------------------------------------- +-- Initialise dynamic heap objects +----------------------------------------------------------- + +allocDynClosure + :: ClosureInfo + -> CmmExpr -- Cost Centre to stick in the object + -> CmmExpr -- Cost Centre to blame for this alloc + -- (usually the same; sometimes "OVERHEAD") + + -> [(StgArg, VirtualHpOffset)] -- Offsets from start of the object + -- ie Info ptr has offset zero. + -- No void args in here + -> FCode LocalReg + +-- allocDynClosure allocates the thing in the heap, +-- and modifies the virtual Hp to account for this. + +-- Note [Return a LocalReg] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- allocDynClosure returns a LocalReg, not a (Hp+8) CmmExpr. +-- Reason: +-- ...allocate object... +-- obj = Hp + 8 +-- y = f(z) +-- ...here obj is still valid, +-- but Hp+8 means something quite different... + + +allocDynClosure cl_info use_cc _blame_cc args_w_offsets + = do { virt_hp <- getVirtHp + + -- SAY WHAT WE ARE ABOUT TO DO + ; tickyDynAlloc cl_info + ; profDynAlloc cl_info use_cc + -- ToDo: This is almost certainly wrong + -- We're ignoring blame_cc. But until we've + -- fixed the boxing hack in chooseDynCostCentres etc, + -- we're worried about making things worse by "fixing" + -- this part to use blame_cc! + + -- FIND THE OFFSET OF THE INFO-PTR WORD + ; let info_offset = virt_hp + 1 + -- info_offset is the VirtualHpOffset of the first + -- word of the new object + -- Remember, virtHp points to last allocated word, + -- ie 1 *before* the info-ptr word of new object. + + info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info)) + + -- ALLOCATE THE OBJECT + ; base <- getHpRelOffset info_offset + ; emit (mkComment $ mkFastString "allocDynClosure") + ; emitSetDynHdr base info_ptr use_cc + ; let (args, offsets) = unzip args_w_offsets + ; cmm_args <- mapM getArgAmode args -- No void args + ; hpStore base cmm_args offsets + + -- BUMP THE VIRTUAL HEAP POINTER + ; setVirtHp (virt_hp + closureSize cl_info) + + -- Assign to a temporary and return + -- Note [Return a LocalReg] + ; hp_rel <- getHpRelOffset info_offset + ; assignTemp hp_rel } + +emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () +emitSetDynHdr base info_ptr ccs + = hpStore base header [0..] + where + header :: [CmmExpr] + header = [info_ptr] ++ dynProfHdr ccs + -- ToDo: Gransim stuff + -- ToDo: Parallel stuff + -- No ticky header + +hpStore :: CmmExpr -> [CmmExpr] -> [VirtualHpOffset] -> FCode () +-- Store the item (expr,off) in base[off] +hpStore base vals offs + = emit (catAGraphs (zipWith mk_store vals offs)) + where + mk_store val off = mkStore (cmmOffsetW base off) val + + +----------------------------------------------------------- +-- Layout of static closures +----------------------------------------------------------- + +-- Make a static closure, adding on any extra padding needed for CAFs, +-- and adding a static link field if necessary. + +mkStaticClosureFields + :: ClosureInfo + -> CostCentreStack + -> Bool -- Has CAF refs + -> [CmmLit] -- Payload + -> [CmmLit] -- The full closure +mkStaticClosureFields cl_info ccs caf_refs payload + = mkStaticClosure info_lbl ccs payload padding_wds + static_link_field saved_info_field + where + info_lbl = infoTableLabelFromCI cl_info + + -- CAFs must have consistent layout, regardless of whether they + -- are actually updatable or not. The layout of a CAF is: + -- + -- 3 saved_info + -- 2 static_link + -- 1 indirectee + -- 0 info ptr + -- + -- the static_link and saved_info fields must always be in the same + -- place. So we use closureNeedsUpdSpace rather than + -- closureUpdReqd here: + + is_caf = closureNeedsUpdSpace cl_info + + padding_wds + | not is_caf = [] + | otherwise = ASSERT(null payload) [mkIntCLit 0] + + static_link_field + | is_caf || staticClosureNeedsLink cl_info = [static_link_value] + | otherwise = [] + + saved_info_field + | is_caf = [mkIntCLit 0] + | otherwise = [] + + -- for a static constructor which has NoCafRefs, we set the + -- static link field to a non-zero value so the garbage + -- collector will ignore it. + static_link_value + | caf_refs = mkIntCLit 0 + | otherwise = mkIntCLit 1 + + +mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit] + -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] +mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field + = [CmmLabel info_lbl] + ++ variable_header_words + ++ payload + ++ padding_wds + ++ static_link_field + ++ saved_info_field + where + variable_header_words + = staticGranHdr + ++ staticParHdr + ++ staticProfHdr ccs + ++ staticTickyHdr + +----------------------------------------------------------- +-- Heap overflow checking +----------------------------------------------------------- + +{- Note [Heap checks] + ~~~~~~~~~~~~~~~~~~ +Heap checks come in various forms. We provide the following entry +points to the runtime system, all of which use the native C-- entry +convention. + + * gc() performs garbage collection and returns + nothing to its caller + + * A series of canned entry points like + r = gc_1p( r ) + where r is a pointer. This performs gc, and + then returns its argument r to its caller. + + * A series of canned entry points like + gcfun_2p( f, x, y ) + where f is a function closure of arity 2 + This performs garbage collection, keeping alive the + three argument ptrs, and then tail-calls f(x,y) + +These are used in the following circumstances + +* entryHeapCheck: Function entry + (a) With a canned GC entry sequence + f( f_clo, x:ptr, y:ptr ) { + Hp = Hp+8 + if Hp > HpLim goto L + ... + L: HpAlloc = 8 + jump gcfun_2p( f_clo, x, y ) } + Note the tail call to the garbage collector; + it should do no register shuffling + + (b) No canned sequence + f( f_clo, x:ptr, y:ptr, ...etc... ) { + T: Hp = Hp+8 + if Hp > HpLim goto L + ... + L: HpAlloc = 8 + call gc() -- Needs an info table + goto T } + +* altHeapCheck: Immediately following an eval + Started as + case f x y of r { (p,q) -> rhs } + (a) With a canned sequence for the results of f + (which is the very common case since + all boxed cases return just one pointer + ... + r = f( x, y ) + K: -- K needs an info table + Hp = Hp+8 + if Hp > HpLim goto L + ...code for rhs... + + L: r = gc_1p( r ) + goto K } + + Here, the info table needed by the call + to gc_1p should be the *same* as the + one for the call to f; the C-- optimiser + spots this sharing opportunity + + (b) No canned sequence for results of f + Note second info table + ... + (r1,r2,r3) = call f( x, y ) + K: + Hp = Hp+8 + if Hp > HpLim goto L + ...code for rhs... + + L: call gc() -- Extra info table here + goto K + +* generalHeapCheck: Anywhere else + e.g. entry to thunk + case branch *not* following eval, + or let-no-escape + Exactly the same as the previous case: + + K: -- K needs an info table + Hp = Hp+8 + if Hp > HpLim goto L + ... + + L: call gc() + goto K +-} + +-------------------------------------------------------------- +-- A heap/stack check at a function or thunk entry point. + +entryHeapCheck :: LocalReg -- Function + -> [LocalReg] -- Args (empty for thunk) + -> C_SRT + -> FCode () + -> FCode () + +entryHeapCheck fun args srt code + = heapCheck gc_call code -- The 'fun' keeps relevant CAFs alive + where + gc_call + | null args = mkJump (CmmReg (CmmGlobal GCEnter1)) [CmmReg (CmmLocal fun)] + | otherwise = case gc_lbl args of + Just lbl -> mkJump (CmmLit (CmmLabel (mkRtsCodeLabel lbl))) + (map (CmmReg . CmmLocal) (fun:args)) + Nothing -> mkCmmCall generic_gc [] [] srt + + gc_lbl :: [LocalReg] -> Maybe LitString + gc_lbl [reg] + | isGcPtrType ty = Just (sLit "stg_gc_unpt_r1") -- "stg_gc_fun_1p" + | isFloatType ty = case width of + W32 -> Just (sLit "stg_gc_f1") -- "stg_gc_fun_f1" + W64 -> Just (sLit "stg_gc_d1") -- "stg_gc_fun_d1" + _other -> Nothing + | otherwise = case width of + W32 -> Just (sLit "stg_gc_unbx_r1") -- "stg_gc_fun_unbx_r1" + W64 -> Just (sLit "stg_gc_l1") -- "stg_gc_fun_unbx_l1" + _other -> Nothing -- Narrow cases + where + ty = localRegType reg + width = typeWidth ty + + gc_lbl regs = gc_lbl_ptrs (map (isGcPtrType . localRegType) regs) + + gc_lbl_ptrs :: [Bool] -> Maybe LitString + -- JD: TEMPORARY -- UNTIL THOSE FUNCTIONS EXIST... + --gc_lbl_ptrs [True,True] = Just (sLit "stg_gc_fun_2p") + --gc_lbl_ptrs [True,True,True] = Just (sLit "stg_gc_fun_3p") + gc_lbl_ptrs _ = Nothing + + +altHeapCheck :: [LocalReg] -> C_SRT -> FCode a -> FCode a +altHeapCheck regs srt code + = heapCheck gc_call code + where + gc_call + | null regs = mkCmmCall generic_gc [] [] srt + + | Just gc_lbl <- rts_label regs -- Canned call + = mkCmmCall (CmmLit (CmmLabel (mkRtsCodeLabel gc_lbl))) + regs + (map (CmmReg . CmmLocal) regs) + srt + | otherwise -- No canned call, and non-empty live vars + = mkCmmCall generic_gc [] [] srt + + rts_label [reg] + | isGcPtrType ty = Just (sLit "stg_gc_unpt_r1") + | isFloatType ty = case width of + W32 -> Just (sLit "stg_gc_f1") + W64 -> Just (sLit "stg_gc_d1") + _other -> Nothing + | otherwise = case width of + W32 -> Just (sLit "stg_gc_unbx_r1") + W64 -> Just (sLit "stg_gc_unbx_l1") + _other -> Nothing -- Narrow cases + where + ty = localRegType reg + width = typeWidth ty + + rts_label _ = Nothing + + +generic_gc :: CmmExpr -- The generic GC procedure; no params, no resuls +generic_gc = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_fun"))) + +------------------------------- +heapCheck :: CmmAGraph -> FCode a -> FCode a +heapCheck do_gc code + = getHeapUsage $ \ hpHw -> + do { emit (do_checks hpHw do_gc) + -- Emit heap checks, but be sure to do it lazily so + -- that the conditionals on hpHw don't cause a black hole + ; tickyAllocHeap hpHw + ; doGranAllocate hpHw + ; setRealHp hpHw + ; code } + +do_checks :: WordOff -- Heap headroom + -> CmmAGraph -- What to do on failure + -> CmmAGraph +do_checks 0 _ + = mkNop +do_checks alloc do_gc + = withFreshLabel "gc" $ \ blk_id -> + mkLabel blk_id Nothing + <*> mkAssign hpReg bump_hp + <*> mkCmmIfThen hp_oflo + (save_alloc + <*> do_gc + <*> mkBranch blk_id) + -- Bump heap pointer, and test for heap exhaustion + -- Note that we don't move the heap pointer unless the + -- stack check succeeds. Otherwise we might end up + -- with slop at the end of the current block, which can + -- confuse the LDV profiler. + where + alloc_lit = CmmLit (mkIntCLit (alloc*wORD_SIZE)) -- Bytes + bump_hp = cmmOffsetExprB (CmmReg hpReg) alloc_lit + + -- Hp overflow if (Hp > HpLim) + -- (Hp has been incremented by now) + -- HpLim points to the LAST WORD of valid allocation space. + hp_oflo = CmmMachOp mo_wordUGt + [CmmReg hpReg, CmmReg (CmmGlobal HpLim)] + + save_alloc = mkAssign (CmmGlobal HpAlloc) alloc_lit + +{- + +{- Unboxed tuple alternatives and let-no-escapes (the two most annoying +constructs to generate code for!) For unboxed tuple returns, there +are an arbitrary number of possibly unboxed return values, some of +which will be in registers, and the others will be on the stack. We +always organise the stack-resident fields into pointers & +non-pointers, and pass the number of each to the heap check code. -} + +unbxTupleHeapCheck + :: [(Id, GlobalReg)] -- Live registers + -> WordOff -- no. of stack slots containing ptrs + -> WordOff -- no. of stack slots containing nonptrs + -> CmmAGraph -- code to insert in the failure path + -> FCode () + -> FCode () + +unbxTupleHeapCheck regs ptrs nptrs fail_code code + -- We can't manage more than 255 pointers/non-pointers + -- in a generic heap check. + | ptrs > 255 || nptrs > 255 = panic "altHeapCheck" + | otherwise + = initHeapUsage $ \ hpHw -> do + { codeOnly $ do { do_checks 0 {- no stack check -} hpHw + full_fail_code rts_label + ; tickyAllocHeap hpHw } + ; setRealHp hpHw + ; code } + where + full_fail_code = fail_code `plusStmts` oneStmt assign_liveness + assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho! + (CmmLit (mkWordCLit liveness)) + liveness = mkRegLiveness regs ptrs nptrs + rts_label = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut"))) + + +{- Old Gransim comment -- I have no idea whether it still makes sense (SLPJ Sep07) +For GrAnSim the code for doing a heap check and doing a context switch +has been separated. Especially, the HEAP_CHK macro only performs a +heap check. THREAD_CONTEXT_SWITCH should be used for doing a context +switch. GRAN_FETCH_AND_RESCHEDULE must be put at the beginning of +every slow entry code in order to simulate the fetching of +closures. If fetching is necessary (i.e. current closure is not local) +then an automatic context switch is done. -} + + +When failing a check, we save a return address on the stack and +jump to a pre-compiled code fragment that saves the live registers +and returns to the scheduler. + +The return address in most cases will be the beginning of the basic +block in which the check resides, since we need to perform the check +again on re-entry because someone else might have stolen the resource +in the meantime. + +%************************************************************************ +%* * + Generic Heap/Stack Checks - used in the RTS +%* * +%************************************************************************ + +\begin{code} +hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () +hpChkGen bytes liveness reentry + = do_checks' bytes True assigns stg_gc_gen + where + assigns = mkStmts [ + CmmAssign (CmmGlobal (VanillaReg 9)) liveness, + CmmAssign (CmmGlobal (VanillaReg 10)) reentry + ] + +-- a heap check where R1 points to the closure to enter on return, and +-- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP). +hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> FCode () +hpChkNodePointsAssignSp0 bytes sp0 + = do_checks' bytes True assign stg_gc_enter1 + where assign = oneStmt (CmmStore (CmmReg spReg) sp0) + +stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_gen"))) +\end{code} + +-} diff --git a/compiler/codeGen/StgCmmHpc.hs b/compiler/codeGen/StgCmmHpc.hs new file mode 100644 index 0000000000..0205bd0911 --- /dev/null +++ b/compiler/codeGen/StgCmmHpc.hs @@ -0,0 +1,83 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for coverage +-- +-- (c) Galois Connections, Inc. 2006 +-- +----------------------------------------------------------------------------- + +{-# OPTIONS -w #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + +module StgCmmHpc ( initHpc, mkTickBox ) where + +import StgCmmUtils +import StgCmmMonad +import StgCmmForeign +import StgCmmClosure + +import MkZipCfgCmm +import Cmm +import CLabel +import Module +import CmmUtils +import ForeignCall +import FastString +import HscTypes +import Char +import StaticFlags +import PackageConfig + +mkTickBox :: Module -> Int -> CmmAGraph +mkTickBox mod n + = mkStore tick_box (CmmMachOp (MO_Add W64) + [ CmmLoad tick_box b64 + , CmmLit (CmmInt 1 W64) + ]) + where + tick_box = cmmIndex W64 + (CmmLit $ CmmLabel $ mkHpcTicksLabel $ mod) + (fromIntegral n) + +initHpc :: Module -> HpcInfo -> FCode CmmAGraph +-- Emit top-level tables for HPC and return code to initialise +initHpc this_mod (NoHpcInfo {}) + = return mkNop +initHpc this_mod (HpcInfo tickCount hashNo) + = getCode $ whenC opt_Hpc $ + do { emitData ReadOnlyData + [ CmmDataLabel mkHpcModuleNameLabel + , CmmString $ map (fromIntegral . ord) + (full_name_str) + ++ [0] + ] + ; emitData Data $ [ CmmDataLabel (mkHpcTicksLabel this_mod) + ] ++ + [ CmmStaticLit (CmmInt 0 W64) + | _ <- take tickCount [0::Int ..] + ] + + ; id <- newTemp bWord -- TODO FIXME NOW + ; emitCCall + [(id,NoHint)] + (CmmLit $ CmmLabel $ mkForeignLabel mod_alloc Nothing False) + [ (mkLblExpr mkHpcModuleNameLabel,AddrHint) + , (CmmLit $ mkIntCLit tickCount,NoHint) + , (CmmLit $ mkIntCLit hashNo,NoHint) + , (CmmLit $ CmmLabel $ mkHpcTicksLabel $ this_mod,AddrHint) + ] + } + where + mod_alloc = mkFastString "hs_hpc_module" + module_name_str = moduleNameString (Module.moduleName this_mod) + full_name_str = if modulePackageId this_mod == mainPackageId + then module_name_str + else packageIdString (modulePackageId this_mod) ++ "/" ++ + module_name_str + + + diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs new file mode 100644 index 0000000000..f8d39646d6 --- /dev/null +++ b/compiler/codeGen/StgCmmLayout.hs @@ -0,0 +1,618 @@ +----------------------------------------------------------------------------- +-- +-- Building info tables. +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +{-# OPTIONS #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + +module StgCmmLayout ( + mkArgDescr, + emitCall, emitReturn, + + emitClosureCodeAndInfoTable, + + slowCall, directCall, + + mkVirtHeapOffsets, getHpRelOffset, hpRel, + + stdInfoTableSizeB, + entryCode, closureInfoPtr, + getConstrTag, + cmmGetClosureType, + infoTable, infoTableClosureType, + infoTablePtrs, infoTableNonPtrs, + funInfoTable, makeRelativeRefTo + ) where + + +#include "HsVersions.h" + +import StgCmmClosure +import StgCmmEnv +import StgCmmTicky +import StgCmmUtils +import StgCmmMonad + +import MkZipCfgCmm +import SMRep +import CmmUtils +import Cmm +import CLabel +import StgSyn +import Id +import Name +import TyCon ( PrimRep(..) ) +import Unique +import BasicTypes ( Arity ) +import StaticFlags + +import Bitmap +import Data.Bits + +import Maybes +import Constants +import Util +import Data.List +import Outputable +import FastString ( LitString, sLit ) + +------------------------------------------------------------------------ +-- Call and return sequences +------------------------------------------------------------------------ + +emitReturn :: [CmmExpr] -> FCode () +-- Return multiple values to the sequel +-- +-- If the sequel is Return +-- return (x,y) +-- If the sequel is AssignTo [p,q] +-- p=x; q=y; +emitReturn results + = do { adjustHpBackwards + ; sequel <- getSequel; + ; case sequel of + Return _ -> emit (mkReturn results) + AssignTo regs _ -> emit (mkMultiAssign regs results) + } + +emitCall :: CmmExpr -> [CmmExpr] -> FCode () +-- (cgCall fun args) makes a call to the entry-code of 'fun', +-- passing 'args', and returning the results to the current sequel +emitCall fun args + = do { adjustHpBackwards + ; sequel <- getSequel; + ; case sequel of + Return _ -> emit (mkJump fun args) + AssignTo res_regs srt -> emit (mkCmmCall fun res_regs args srt) + } + +adjustHpBackwards :: FCode () +-- This function adjusts and heap pointers just before a tail call or +-- return. At a call or return, the virtual heap pointer may be less +-- than the real Hp, because the latter was advanced to deal with +-- the worst-case branch of the code, and we may be in a better-case +-- branch. In that case, move the real Hp *back* and retract some +-- ticky allocation count. +-- +-- It *does not* deal with high-water-mark adjustment. +-- That's done by functions which allocate heap. +adjustHpBackwards + = do { hp_usg <- getHpUsage + ; let rHp = realHp hp_usg + vHp = virtHp hp_usg + adjust_words = vHp -rHp + ; new_hp <- getHpRelOffset vHp + + ; emit (if adjust_words == 0 + then mkNop + else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp + + ; tickyAllocHeap adjust_words -- ...ditto + + ; setRealHp vHp + } + + +------------------------------------------------------------------------- +-- Making calls: directCall and slowCall +------------------------------------------------------------------------- + +directCall :: CLabel -> Arity -> [StgArg] -> FCode () +-- (directCall f n args) +-- calls f(arg1, ..., argn), and applies the result to the remaining args +-- The function f has arity n, and there are guaranteed at least n args +-- Both arity and args include void args +directCall lbl arity stg_args + = do { cmm_args <- getNonVoidArgAmodes stg_args + ; direct_call lbl arity cmm_args (argsLReps stg_args) } + +slowCall :: CmmExpr -> [StgArg] -> FCode () +-- (slowCall fun args) applies fun to args, returning the results to Sequel +slowCall fun stg_args + = do { cmm_args <- getNonVoidArgAmodes stg_args + ; slow_call fun cmm_args (argsLReps stg_args) } + +-------------- +direct_call :: CLabel -> Arity -> [CmmExpr] -> [LRep] -> FCode () +-- NB1: (length args) maybe less than (length reps), because +-- the args exclude the void ones +-- NB2: 'arity' refers to the *reps* +direct_call lbl arity args reps + | null rest_args + = ASSERT( arity == length args) + emitCall target args + + | otherwise + = ASSERT( arity == length initial_reps ) + do { pap_id <- newTemp gcWord + ; let srt = pprTrace "Urk! SRT for over-sat call" + (ppr lbl) NoC_SRT + -- XXX: what if rest_args contains static refs? + ; withSequel (AssignTo [pap_id] srt) + (emitCall target args) + ; slow_call (CmmReg (CmmLocal pap_id)) + rest_args rest_reps } + where + target = CmmLit (CmmLabel lbl) + (initial_reps, rest_reps) = splitAt arity reps + arg_arity = count isNonV initial_reps + (_, rest_args) = splitAt arg_arity args + +-------------- +slow_call :: CmmExpr -> [CmmExpr] -> [LRep] -> FCode () +slow_call fun args reps + = direct_call (mkRtsApFastLabel rts_fun) (arity+1) + (fun : args) (P : reps) + where + (rts_fun, arity) = slowCallPattern reps + +-- These cases were found to cover about 99% of all slow calls: +slowCallPattern :: [LRep] -> (LitString, Arity) +-- Returns the generic apply function and arity +slowCallPattern (P: P: P: P: P: P: _) = (sLit "stg_ap_pppppp", 6) +slowCallPattern (P: P: P: P: P: _) = (sLit "stg_ap_ppppp", 5) +slowCallPattern (P: P: P: P: _) = (sLit "stg_ap_pppp", 4) +slowCallPattern (P: P: P: V: _) = (sLit "stg_ap_pppv", 4) +slowCallPattern (P: P: P: _) = (sLit "stg_ap_ppp", 3) +slowCallPattern (P: P: V: _) = (sLit "stg_ap_ppv", 3) +slowCallPattern (P: P: _) = (sLit "stg_ap_pp", 2) +slowCallPattern (P: V: _) = (sLit "stg_ap_pv", 2) +slowCallPattern (P: _) = (sLit "stg_ap_p", 1) +slowCallPattern (V: _) = (sLit "stg_ap_v", 1) +slowCallPattern (N: _) = (sLit "stg_ap_n", 1) +slowCallPattern (F: _) = (sLit "stg_ap_f", 1) +slowCallPattern (D: _) = (sLit "stg_ap_d", 1) +slowCallPattern (L: _) = (sLit "stg_ap_l", 1) +slowCallPattern [] = (sLit "stg_ap_0", 0) + + +------------------------------------------------------------------------- +-- Classifying arguments: LRep +------------------------------------------------------------------------- + +-- LRep is not exported (even abstractly) +-- It's a local helper type for classification + +data LRep = P -- GC Ptr + | N -- One-word non-ptr + | L -- Two-word non-ptr (long) + | V -- Void + | F -- Float + | D -- Double + +toLRep :: PrimRep -> LRep +toLRep VoidRep = V +toLRep PtrRep = P +toLRep IntRep = N +toLRep WordRep = N +toLRep AddrRep = N +toLRep Int64Rep = L +toLRep Word64Rep = L +toLRep FloatRep = F +toLRep DoubleRep = D + +isNonV :: LRep -> Bool +isNonV V = False +isNonV _ = True + +argsLReps :: [StgArg] -> [LRep] +argsLReps = map (toLRep . argPrimRep) + +lRepSizeW :: LRep -> WordOff -- Size in words +lRepSizeW N = 1 +lRepSizeW P = 1 +lRepSizeW F = 1 +lRepSizeW L = wORD64_SIZE `quot` wORD_SIZE +lRepSizeW D = dOUBLE_SIZE `quot` wORD_SIZE +lRepSizeW V = 0 + +------------------------------------------------------------------------- +---- Laying out objects on the heap and stack +------------------------------------------------------------------------- + +-- The heap always grows upwards, so hpRel is easy +hpRel :: VirtualHpOffset -- virtual offset of Hp + -> VirtualHpOffset -- virtual offset of The Thing + -> WordOff -- integer word offset +hpRel hp off = off - hp + +getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr +getHpRelOffset virtual_offset + = do { hp_usg <- getHpUsage + ; return (cmmRegOffW hpReg (hpRel (realHp hp_usg) virtual_offset)) } + +mkVirtHeapOffsets + :: Bool -- True <=> is a thunk + -> [(PrimRep,a)] -- Things to make offsets for + -> (WordOff, -- _Total_ number of words allocated + WordOff, -- Number of words allocated for *pointers* + [(a, VirtualHpOffset)]) + +-- Things with their offsets from start of object in order of +-- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER +-- First in list gets lowest offset, which is initial offset + 1. +-- +-- Void arguments are removed, so output list may be shorter than +-- input list +-- +-- mkVirtHeapOffsets always returns boxed things with smaller offsets +-- than the unboxed things + +mkVirtHeapOffsets is_thunk things + = let non_void_things = filterOut (isVoidRep . fst) things + (ptrs, non_ptrs) = partition (isGcPtrRep . fst) non_void_things + (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs + (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs + in + (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets) + where + hdr_size | is_thunk = thunkHdrSize + | otherwise = fixedHdrSize + + computeOffset wds_so_far (rep, thing) + = (wds_so_far + lRepSizeW (toLRep rep), + (thing, hdr_size + wds_so_far)) + + +------------------------------------------------------------------------- +-- +-- Making argument descriptors +-- +-- An argument descriptor describes the layout of args on the stack, +-- both for * GC (stack-layout) purposes, and +-- * saving/restoring registers when a heap-check fails +-- +-- Void arguments aren't important, therefore (contrast constructSlowCall) +-- +------------------------------------------------------------------------- + +-- bring in ARG_P, ARG_N, etc. +#include "../includes/StgFun.h" + +------------------------- +-- argDescrType :: ArgDescr -> StgHalfWord +-- -- The "argument type" RTS field type +-- argDescrType (ArgSpec n) = n +-- argDescrType (ArgGen liveness) +-- | isBigLiveness liveness = ARG_GEN_BIG +-- | otherwise = ARG_GEN + + +mkArgDescr :: Name -> [Id] -> FCode ArgDescr +mkArgDescr nm args + = case stdPattern arg_reps of + Just spec_id -> return (ArgSpec spec_id) + Nothing -> do { liveness <- mkLiveness nm size bitmap + ; return (ArgGen liveness) } + where + arg_reps = filter isNonV (map (toLRep . idPrimRep) args) + -- Getting rid of voids eases matching of standard patterns + + bitmap = mkBitmap arg_bits + arg_bits = argBits arg_reps + size = length arg_bits + +argBits :: [LRep] -> [Bool] -- True for non-ptr, False for ptr +argBits [] = [] +argBits (P : args) = False : argBits args +argBits (arg : args) = take (lRepSizeW arg) (repeat True) ++ argBits args + +---------------------- +stdPattern :: [LRep] -> Maybe StgHalfWord +stdPattern reps + = case reps of + [] -> Just ARG_NONE -- just void args, probably + [N] -> Just ARG_N + [P] -> Just ARG_N + [F] -> Just ARG_F + [D] -> Just ARG_D + [L] -> Just ARG_L + + [N,N] -> Just ARG_NN + [N,P] -> Just ARG_NP + [P,N] -> Just ARG_PN + [P,P] -> Just ARG_PP + + [N,N,N] -> Just ARG_NNN + [N,N,P] -> Just ARG_NNP + [N,P,N] -> Just ARG_NPN + [N,P,P] -> Just ARG_NPP + [P,N,N] -> Just ARG_PNN + [P,N,P] -> Just ARG_PNP + [P,P,N] -> Just ARG_PPN + [P,P,P] -> Just ARG_PPP + + [P,P,P,P] -> Just ARG_PPPP + [P,P,P,P,P] -> Just ARG_PPPPP + [P,P,P,P,P,P] -> Just ARG_PPPPPP + + _ -> Nothing + +------------------------------------------------------------------------- +-- +-- Liveness info +-- +------------------------------------------------------------------------- + +-- TODO: This along with 'mkArgDescr' should be unified +-- with 'CmmInfo.mkLiveness'. However that would require +-- potentially invasive changes to the 'ClosureInfo' type. +-- For now, 'CmmInfo.mkLiveness' handles only continuations and +-- this one handles liveness everything else. Another distinction +-- between these two is that 'CmmInfo.mkLiveness' information +-- about the stack layout, and this one is information about +-- the heap layout of PAPs. +mkLiveness :: Name -> Int -> Bitmap -> FCode Liveness +mkLiveness name size bits + | size > mAX_SMALL_BITMAP_SIZE -- Bitmap does not fit in one word + = do { let lbl = mkBitmapLabel (getUnique name) + ; emitRODataLits lbl ( mkWordCLit (fromIntegral size) + : map mkWordCLit bits) + ; return (BigLiveness lbl) } + + | otherwise -- Bitmap fits in one word + = let + small_bits = case bits of + [] -> 0 + [b] -> fromIntegral b + _ -> panic "livenessToAddrMode" + in + return (smallLiveness size small_bits) + +smallLiveness :: Int -> StgWord -> Liveness +smallLiveness size small_bits = SmallLiveness bits + where bits = fromIntegral size .|. (small_bits `shiftL` bITMAP_BITS_SHIFT) + +------------------- +-- isBigLiveness :: Liveness -> Bool +-- isBigLiveness (BigLiveness _) = True +-- isBigLiveness (SmallLiveness _) = False + +------------------- +-- mkLivenessCLit :: Liveness -> CmmLit +-- mkLivenessCLit (BigLiveness lbl) = CmmLabel lbl +-- mkLivenessCLit (SmallLiveness bits) = mkWordCLit bits + + +------------------------------------------------------------------------- +-- +-- Bitmap describing register liveness +-- across GC when doing a "generic" heap check +-- (a RET_DYN stack frame). +-- +-- NB. Must agree with these macros (currently in StgMacros.h): +-- GET_NON_PTRS(), GET_PTRS(), GET_LIVENESS(). +------------------------------------------------------------------------- + +{- Not used in new code gen +mkRegLiveness :: [(Id, GlobalReg)] -> Int -> Int -> StgWord +mkRegLiveness regs ptrs nptrs + = (fromIntegral nptrs `shiftL` 16) .|. + (fromIntegral ptrs `shiftL` 24) .|. + all_non_ptrs `xor` reg_bits regs + where + all_non_ptrs = 0xff + + reg_bits [] = 0 + reg_bits ((id, VanillaReg i) : regs) | isGcPtrRep (idPrimRep id) + = (1 `shiftL` (i - 1)) .|. reg_bits regs + reg_bits (_ : regs) + = reg_bits regs +-} + +------------------------------------------------------------------------- +-- +-- Generating the info table and code for a closure +-- +------------------------------------------------------------------------- + +-- Here we make an info table of type 'CmmInfo'. The concrete +-- representation as a list of 'CmmAddr' is handled later +-- in the pipeline by 'cmmToRawCmm'. + +emitClosureCodeAndInfoTable :: ClosureInfo -> CmmFormals + -> CmmAGraph -> FCode () +emitClosureCodeAndInfoTable cl_info args body + = do { info <- mkCmmInfo cl_info + ; emitProc info (infoLblToEntryLbl info_lbl) args body } + where + info_lbl = infoTableLabelFromCI cl_info + +-- Convert from 'ClosureInfo' to 'CmmInfo'. +-- Not used for return points. (The 'smRepClosureTypeInt' call would panic.) +mkCmmInfo :: ClosureInfo -> FCode CmmInfo +mkCmmInfo cl_info + = do { prof <- if opt_SccProfilingOn then + do fd_lit <- mkStringCLit (closureTypeDescr cl_info) + ad_lit <- mkStringCLit (closureValDescr cl_info) + return $ ProfilingInfo fd_lit ad_lit + else return $ ProfilingInfo (mkIntCLit 0) (mkIntCLit 0) + ; return (CmmInfo gc_target Nothing (CmmInfoTable prof cl_type info)) } + where + info = closureTypeInfo cl_info + cl_type = smRepClosureTypeInt (closureSMRep cl_info) + + -- The gc_target is to inform the CPS pass when it inserts a stack check. + -- Since that pass isn't used yet we'll punt for now. + -- When the CPS pass is fully integrated, this should + -- be replaced by the label that any heap check jumped to, + -- so that branch can be shared by both the heap (from codeGen) + -- and stack checks (from the CPS pass). + -- JD: Actually, we've decided to go a different route here: + -- the code generator is now responsible for producing the + -- stack limit check explicitly, so this field is now obsolete. + gc_target = Nothing + +----------------------------------------------------------------------------- +-- +-- Info table offsets +-- +----------------------------------------------------------------------------- + +stdInfoTableSizeW :: WordOff +-- The size of a standard info table varies with profiling/ticky etc, +-- so we can't get it from Constants +-- It must vary in sync with mkStdInfoTable +stdInfoTableSizeW + = size_fixed + size_prof + where + size_fixed = 2 -- layout, type + size_prof | opt_SccProfilingOn = 2 + | otherwise = 0 + +stdInfoTableSizeB :: ByteOff +stdInfoTableSizeB = stdInfoTableSizeW * wORD_SIZE :: ByteOff + +stdSrtBitmapOffset :: ByteOff +-- Byte offset of the SRT bitmap half-word which is +-- in the *higher-addressed* part of the type_lit +stdSrtBitmapOffset = stdInfoTableSizeB - hALF_WORD_SIZE + +stdClosureTypeOffset :: ByteOff +-- Byte offset of the closure type half-word +stdClosureTypeOffset = stdInfoTableSizeB - wORD_SIZE + +stdPtrsOffset, stdNonPtrsOffset :: ByteOff +stdPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE +stdNonPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE + hALF_WORD_SIZE + +------------------------------------------------------------------------- +-- +-- Accessing fields of an info table +-- +------------------------------------------------------------------------- + +closureInfoPtr :: CmmExpr -> CmmExpr +-- Takes a closure pointer and returns the info table pointer +closureInfoPtr e = CmmLoad e bWord + +entryCode :: CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns its entry code +entryCode e | tablesNextToCode = e + | otherwise = CmmLoad e bWord + +getConstrTag :: CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the *zero-indexed* +-- constructor tag obtained from the info table +-- This lives in the SRT field of the info table +-- (constructors don't need SRTs). +getConstrTag closure_ptr + = CmmMachOp (MO_UU_Conv halfWordWidth wordWidth) [infoTableConstrTag info_table] + where + info_table = infoTable (closureInfoPtr closure_ptr) + +cmmGetClosureType :: CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the closure type +-- obtained from the info table +cmmGetClosureType closure_ptr + = CmmMachOp (MO_UU_Conv halfWordWidth wordWidth) [infoTableClosureType info_table] + where + info_table = infoTable (closureInfoPtr closure_ptr) + +infoTable :: CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns a pointer to the first word of the standard-form +-- info table, excluding the entry-code word (if present) +infoTable info_ptr + | tablesNextToCode = cmmOffsetB info_ptr (- stdInfoTableSizeB) + | otherwise = cmmOffsetW info_ptr 1 -- Past the entry code pointer + +infoTableConstrTag :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the constr tag +-- field of the info table (same as the srt_bitmap field) +infoTableConstrTag = infoTableSrtBitmap + +infoTableSrtBitmap :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the srt_bitmap +-- field of the info table +infoTableSrtBitmap info_tbl + = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) bHalfWord + +infoTableClosureType :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the closure type +-- field of the info table. +infoTableClosureType info_tbl + = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) bHalfWord + +infoTablePtrs :: CmmExpr -> CmmExpr +infoTablePtrs info_tbl + = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) bHalfWord + +infoTableNonPtrs :: CmmExpr -> CmmExpr +infoTableNonPtrs info_tbl + = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) bHalfWord + +funInfoTable :: CmmExpr -> CmmExpr +-- Takes the info pointer of a function, +-- and returns a pointer to the first word of the StgFunInfoExtra struct +-- in the info table. +funInfoTable info_ptr + | tablesNextToCode + = cmmOffsetB info_ptr (- stdInfoTableSizeB - sIZEOF_StgFunInfoExtraRev) + | otherwise + = cmmOffsetW info_ptr (1 + stdInfoTableSizeW) + -- Past the entry code pointer + +------------------------------------------------------------------------- +-- +-- Static reference tables +-- +------------------------------------------------------------------------- + +-- srtLabelAndLength :: C_SRT -> CLabel -> (CmmLit, StgHalfWord) +-- srtLabelAndLength NoC_SRT _ +-- = (zeroCLit, 0) +-- srtLabelAndLength (C_SRT lbl off bitmap) info_lbl +-- = (makeRelativeRefTo info_lbl $ cmmLabelOffW lbl off, bitmap) + +------------------------------------------------------------------------- +-- +-- Position independent code +-- +------------------------------------------------------------------------- +-- In order to support position independent code, we mustn't put absolute +-- references into read-only space. Info tables in the tablesNextToCode +-- case must be in .text, which is read-only, so we doctor the CmmLits +-- to use relative offsets instead. + +-- Note that this is done even when the -fPIC flag is not specified, +-- as we want to keep binary compatibility between PIC and non-PIC. + +makeRelativeRefTo :: CLabel -> CmmLit -> CmmLit + +makeRelativeRefTo info_lbl (CmmLabel lbl) + | tablesNextToCode + = CmmLabelDiffOff lbl info_lbl 0 +makeRelativeRefTo info_lbl (CmmLabelOff lbl off) + | tablesNextToCode + = CmmLabelDiffOff lbl info_lbl off +makeRelativeRefTo _ lit = lit diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs new file mode 100644 index 0000000000..365263941e --- /dev/null +++ b/compiler/codeGen/StgCmmMonad.hs @@ -0,0 +1,601 @@ +----------------------------------------------------------------------------- +-- +-- Monad for Stg to C-- code generation +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmMonad ( + FCode, -- type + + initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, + returnFC, fixC, nopC, whenC, + newUnique, newUniqSupply, + + emit, emitData, emitProc, emitSimpleProc, + + getCmm, cgStmtsToBlocks, + getCodeR, getCode, getHeapUsage, + + forkClosureBody, forkStatics, forkAlts, forkProc, codeOnly, + + ConTagZ, + + Sequel(..), + withSequel, getSequel, + + setSRTLabel, getSRTLabel, + setTickyCtrLabel, getTickyCtrLabel, + + HeapUsage(..), VirtualHpOffset, initHpUsage, + getHpUsage, setHpUsage, heapHWM, + setVirtHp, getVirtHp, setRealHp, + + getModuleName, + + -- ideally we wouldn't export these, but some other modules access internal state + getState, setState, getInfoDown, getDynFlags, getThisPackage, + + -- more localised access to monad state + CgIdInfo(..), CgLoc(..), + getBinds, setBinds, getStaticBinds, + + -- out of general friendliness, we also export ... + CgInfoDownwards(..), CgState(..) -- non-abstract + ) where + +#include "HsVersions.h" + +import StgCmmClosure +import DynFlags +import MkZipCfgCmm +import BlockId +import Cmm +import CLabel +import TyCon ( PrimRep ) +import SMRep +import Module +import Id +import VarEnv +import OrdList +import Unique +import Util() +import UniqSupply +import FastString(sLit) +import Outputable + +import Control.Monad +import Data.List +import Prelude hiding( sequence ) +import qualified Prelude( sequence ) + +infixr 9 `thenC` -- Right-associative! +infixr 9 `thenFC` + + +-------------------------------------------------------- +-- The FCode monad and its types +-------------------------------------------------------- + +newtype FCode a = FCode (CgInfoDownwards -> CgState -> (a, CgState)) + +instance Monad FCode where + (>>=) = thenFC + return = returnFC + +{-# INLINE thenC #-} +{-# INLINE thenFC #-} +{-# INLINE returnFC #-} + +initC :: DynFlags -> Module -> FCode a -> IO a +initC dflags mod (FCode code) + = do { uniqs <- mkSplitUniqSupply 'c' + ; case code (initCgInfoDown dflags mod) (initCgState uniqs) of + (res, _) -> return res + } + +returnFC :: a -> FCode a +returnFC val = FCode (\_info_down state -> (val, state)) + +thenC :: FCode () -> FCode a -> FCode a +thenC (FCode m) (FCode k) = + FCode (\info_down state -> let (_,new_state) = m info_down state in + k info_down new_state) + +nopC :: FCode () +nopC = return () + +whenC :: Bool -> FCode () -> FCode () +whenC True code = code +whenC False _code = nopC + +listCs :: [FCode ()] -> FCode () +listCs [] = return () +listCs (fc:fcs) = do + fc + listCs fcs + +mapCs :: (a -> FCode ()) -> [a] -> FCode () +mapCs = mapM_ + +thenFC :: FCode a -> (a -> FCode c) -> FCode c +thenFC (FCode m) k = FCode ( + \info_down state -> + let + (m_result, new_state) = m info_down state + (FCode kcode) = k m_result + in + kcode info_down new_state + ) + +listFCs :: [FCode a] -> FCode [a] +listFCs = Prelude.sequence + +mapFCs :: (a -> FCode b) -> [a] -> FCode [b] +mapFCs = mapM + +fixC :: (a -> FCode a) -> FCode a +fixC fcode = FCode ( + \info_down state -> + let + FCode fc = fcode v + result@(v,_) = fc info_down state + -- ^--------^ + in + result + ) + + +-------------------------------------------------------- +-- The code generator environment +-------------------------------------------------------- + +-- This monadery has some information that it only passes +-- *downwards*, as well as some ``state'' which is modified +-- as we go along. + +data CgInfoDownwards -- information only passed *downwards* by the monad + = MkCgInfoDown { + cgd_dflags :: DynFlags, + cgd_mod :: Module, -- Module being compiled + cgd_statics :: CgBindings, -- [Id -> info] : static environment + cgd_srt_lbl :: CLabel, -- Label of the current top-level SRT + cgd_ticky :: CLabel, -- Current destination for ticky counts + cgd_sequel :: Sequel -- What to do at end of basic block + } + +type CgBindings = IdEnv CgIdInfo + +data CgIdInfo + = CgIdInfo + { cg_id :: Id -- Id that this is the info for + -- Can differ from the Id at occurrence sites by + -- virtue of being externalised, for splittable C + , cg_lf :: LambdaFormInfo + , cg_loc :: CgLoc + , cg_rep :: PrimRep -- Cache for (idPrimRep id) + , cg_tag :: {-# UNPACK #-} !DynTag -- Cache for (lfDynTag cg_lf) + } + +data CgLoc + = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning + -- Hp, so that it remains valid across calls + + | LneLoc BlockId [LocalReg] -- A join point + -- A join point (= let-no-escape) should only + -- be tail-called, and in a saturated way. + -- To tail-call it, assign to these locals, + -- and branch to the block id + +instance Outputable CgIdInfo where + ppr (CgIdInfo { cg_id = id, cg_loc = loc }) + = ppr id <+> ptext (sLit "-->") <+> ppr loc + +instance Outputable CgLoc where + ppr (CmmLoc e) = ptext (sLit "cmm") <+> ppr e + ppr (LneLoc b rs) = ptext (sLit "lne") <+> ppr b <+> ppr rs + + +-- Sequel tells what to do with the result of this expression +data Sequel + = Return Bool -- Return result(s) to continuation found on the stack + -- True <=> the continuation is update code (???) + + | AssignTo + [LocalReg] -- Put result(s) in these regs and fall through + -- NB: no void arguments here + C_SRT -- Here are the statics live in the continuation + + + +initCgInfoDown :: DynFlags -> Module -> CgInfoDownwards +initCgInfoDown dflags mod + = MkCgInfoDown { cgd_dflags = dflags, + cgd_mod = mod, + cgd_statics = emptyVarEnv, + cgd_srt_lbl = error "initC: srt_lbl", + cgd_ticky = mkTopTickyCtrLabel, + cgd_sequel = initSequel } + +initSequel :: Sequel +initSequel = Return False + + +-------------------------------------------------------- +-- The code generator state +-------------------------------------------------------- + +data CgState + = MkCgState { + cgs_stmts :: CmmAGraph, -- Current procedure + + cgs_tops :: OrdList CmmTopZ, + -- Other procedures and data blocks in this compilation unit + -- Both are ordered only so that we can + -- reduce forward references, when it's easy to do so + + cgs_binds :: CgBindings, -- [Id -> info] : *local* bindings environment + -- Bindings for top-level things are given in + -- the info-down part + + cgs_hp_usg :: HeapUsage, + + cgs_uniqs :: UniqSupply } + +data HeapUsage = + HeapUsage { + virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word + realHp :: VirtualHpOffset -- realHp: Virtual offset of real heap ptr + } + +type VirtualHpOffset = WordOff + +initCgState :: UniqSupply -> CgState +initCgState uniqs + = MkCgState { cgs_stmts = mkNop, cgs_tops = nilOL, + cgs_binds = emptyVarEnv, + cgs_hp_usg = initHpUsage, + cgs_uniqs = uniqs } + +stateIncUsage :: CgState -> CgState -> CgState +-- stateIncUsage@ e1 e2 incorporates in e1 +-- the heap high water mark found in e2. +stateIncUsage s1 s2@(MkCgState { cgs_hp_usg = hp_usg }) + = s1 { cgs_hp_usg = cgs_hp_usg s1 `maxHpHw` virtHp hp_usg } + `addCodeBlocksFrom` s2 + +addCodeBlocksFrom :: CgState -> CgState -> CgState +-- Add code blocks from the latter to the former +-- (The cgs_stmts will often be empty, but not always; see codeOnly) +s1 `addCodeBlocksFrom` s2 + = s1 { cgs_stmts = cgs_stmts s1 <*> cgs_stmts s2, + cgs_tops = cgs_tops s1 `appOL` cgs_tops s2 } + + +-- The heap high water mark is the larger of virtHp and hwHp. The latter is +-- only records the high water marks of forked-off branches, so to find the +-- heap high water mark you have to take the max of virtHp and hwHp. Remember, +-- virtHp never retreats! +-- +-- Note Jan 04: ok, so why do we only look at the virtual Hp?? + +heapHWM :: HeapUsage -> VirtualHpOffset +heapHWM = virtHp + +initHpUsage :: HeapUsage +initHpUsage = HeapUsage { virtHp = 0, realHp = 0 } + +maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage +hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw } + + +-------------------------------------------------------- +-- Operators for getting and setting the state and "info_down". +-------------------------------------------------------- + +getState :: FCode CgState +getState = FCode $ \_info_down state -> (state,state) + +setState :: CgState -> FCode () +setState state = FCode $ \_info_down _ -> ((),state) + +getHpUsage :: FCode HeapUsage +getHpUsage = do + state <- getState + return $ cgs_hp_usg state + +setHpUsage :: HeapUsage -> FCode () +setHpUsage new_hp_usg = do + state <- getState + setState $ state {cgs_hp_usg = new_hp_usg} + +setVirtHp :: VirtualHpOffset -> FCode () +setVirtHp new_virtHp + = do { hp_usage <- getHpUsage + ; setHpUsage (hp_usage {virtHp = new_virtHp}) } + +getVirtHp :: FCode VirtualHpOffset +getVirtHp + = do { hp_usage <- getHpUsage + ; return (virtHp hp_usage) } + +setRealHp :: VirtualHpOffset -> FCode () +setRealHp new_realHp + = do { hp_usage <- getHpUsage + ; setHpUsage (hp_usage {realHp = new_realHp}) } + +getBinds :: FCode CgBindings +getBinds = do + state <- getState + return $ cgs_binds state + +setBinds :: CgBindings -> FCode () +setBinds new_binds = do + state <- getState + setState $ state {cgs_binds = new_binds} + +getStaticBinds :: FCode CgBindings +getStaticBinds = do + info <- getInfoDown + return (cgd_statics info) + +withState :: FCode a -> CgState -> FCode (a,CgState) +withState (FCode fcode) newstate = FCode $ \info_down state -> + let (retval, state2) = fcode info_down newstate in ((retval,state2), state) + +newUniqSupply :: FCode UniqSupply +newUniqSupply = do + state <- getState + let (us1, us2) = splitUniqSupply (cgs_uniqs state) + setState $ state { cgs_uniqs = us1 } + return us2 + +newUnique :: FCode Unique +newUnique = do + us <- newUniqSupply + return (uniqFromSupply us) + +------------------ +getInfoDown :: FCode CgInfoDownwards +getInfoDown = FCode $ \info_down state -> (info_down,state) + +getDynFlags :: FCode DynFlags +getDynFlags = liftM cgd_dflags getInfoDown + +getThisPackage :: FCode PackageId +getThisPackage = liftM thisPackage getDynFlags + +withInfoDown :: FCode a -> CgInfoDownwards -> FCode a +withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state + +doFCode :: FCode a -> CgInfoDownwards -> CgState -> (a,CgState) +doFCode (FCode fcode) info_down state = fcode info_down state + + +-- ---------------------------------------------------------------------------- +-- Get the current module name + +getModuleName :: FCode Module +getModuleName = do { info <- getInfoDown; return (cgd_mod info) } + +-- ---------------------------------------------------------------------------- +-- Get/set the end-of-block info + +withSequel :: Sequel -> FCode () -> FCode () +withSequel sequel code + = do { info <- getInfoDown + ; withInfoDown code (info {cgd_sequel = sequel }) } + +getSequel :: FCode Sequel +getSequel = do { info <- getInfoDown + ; return (cgd_sequel info) } + +-- ---------------------------------------------------------------------------- +-- Get/set the current SRT label + +-- There is just one SRT for each top level binding; all the nested +-- bindings use sub-sections of this SRT. The label is passed down to +-- the nested bindings via the monad. + +getSRTLabel :: FCode CLabel -- Used only by cgPanic +getSRTLabel = do info <- getInfoDown + return (cgd_srt_lbl info) + +setSRTLabel :: CLabel -> FCode a -> FCode a +setSRTLabel srt_lbl code + = do info <- getInfoDown + withInfoDown code (info { cgd_srt_lbl = srt_lbl}) + +-- ---------------------------------------------------------------------------- +-- Get/set the current ticky counter label + +getTickyCtrLabel :: FCode CLabel +getTickyCtrLabel = do + info <- getInfoDown + return (cgd_ticky info) + +setTickyCtrLabel :: CLabel -> FCode () -> FCode () +setTickyCtrLabel ticky code = do + info <- getInfoDown + withInfoDown code (info {cgd_ticky = ticky}) + + +-------------------------------------------------------- +-- Forking +-------------------------------------------------------- + +forkClosureBody :: FCode () -> FCode () +-- forkClosureBody takes a code, $c$, and compiles it in a +-- fresh environment, except that: +-- - compilation info and statics are passed in unchanged. +-- - local bindings are passed in unchanged +-- (it's up to the enclosed code to re-bind the +-- free variables to a field of the closure) +-- +-- The current state is passed on completely unaltered, except that +-- C-- from the fork is incorporated. + +forkClosureBody body_code + = do { info <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let body_info_down = info { cgd_sequel = initSequel } + fork_state_in = (initCgState us) { cgs_binds = cgs_binds state } + ((),fork_state_out) + = doFCode body_code body_info_down fork_state_in + ; setState $ state `addCodeBlocksFrom` fork_state_out } + +forkStatics :: FCode a -> FCode a +-- @forkStatics@ $fc$ compiles $fc$ in an environment whose *statics* come +-- from the current *local bindings*, but which is otherwise freshly initialised. +-- The Abstract~C returned is attached to the current state, but the +-- bindings and usage information is otherwise unchanged. +forkStatics body_code + = do { info <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let rhs_info_down = info { cgd_statics = cgs_binds state, + cgd_sequel = initSequel } + (result, fork_state_out) = doFCode body_code rhs_info_down + (initCgState us) + ; setState (state `addCodeBlocksFrom` fork_state_out) + ; return result } + +forkProc :: FCode a -> FCode a +-- 'forkProc' takes a code and compiles it in the *current* environment, +-- returning the graph thus constructed. +-- +-- The current environment is passed on completely unchanged to +-- the successor. In particular, any heap usage from the enclosed +-- code is discarded; it should deal with its own heap consumption +forkProc body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let fork_state_in = (initCgState us) + { cgs_binds = cgs_binds state } + (result, fork_state_out) = doFCode body_code info_down fork_state_in + ; setState $ state `addCodeBlocksFrom` fork_state_out + ; return result } + +codeOnly :: FCode () -> FCode () +-- Emit any code from the inner thing into the outer thing +-- Do not affect anything else in the outer state +-- Used in almost-circular code to prevent false loop dependencies +codeOnly body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state, + cgs_hp_usg = cgs_hp_usg state } + ((), fork_state_out) = doFCode body_code info_down fork_state_in + ; setState $ state `addCodeBlocksFrom` fork_state_out } + +forkAlts :: [FCode a] -> FCode [a] +-- (forkAlts' bs d) takes fcodes 'bs' for the branches of a 'case', and +-- an fcode for the default case 'd', and compiles each in the current +-- environment. The current environment is passed on unmodified, except +-- that the virtual Hp is moved on to the worst virtual Hp for the branches + +forkAlts branch_fcodes + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let compile us branch + = (us2, doFCode branch info_down branch_state) + where + (us1,us2) = splitUniqSupply us + branch_state = (initCgState us1) { + cgs_binds = cgs_binds state, + cgs_hp_usg = cgs_hp_usg state } + + (_us, results) = mapAccumL compile us branch_fcodes + (branch_results, branch_out_states) = unzip results + ; setState $ foldl stateIncUsage state branch_out_states + -- NB foldl. state is the *left* argument to stateIncUsage + ; return branch_results } + +-- collect the code emitted by an FCode computation +getCodeR :: FCode a -> FCode (a, CmmAGraph) +getCodeR fcode + = do { state1 <- getState + ; (a, state2) <- withState fcode (state1 { cgs_stmts = mkNop }) + ; setState $ state2 { cgs_stmts = cgs_stmts state1 } + ; return (a, cgs_stmts state2) } + +getCode :: FCode a -> FCode CmmAGraph +getCode fcode = do { (_,stmts) <- getCodeR fcode; return stmts } + +-- 'getHeapUsage' applies a function to the amount of heap that it uses. +-- It initialises the heap usage to zeros, and passes on an unchanged +-- heap usage. +-- +-- It is usually a prelude to performing a GC check, so everything must +-- be in a tidy and consistent state. +-- +-- Note the slightly subtle fixed point behaviour needed here + +getHeapUsage :: (VirtualHpOffset -> FCode a) -> FCode a +getHeapUsage fcode + = do { info_down <- getInfoDown + ; state <- getState + ; let fstate_in = state { cgs_hp_usg = initHpUsage } + (r, fstate_out) = doFCode (fcode hp_hw) info_down fstate_in + hp_hw = heapHWM (cgs_hp_usg fstate_out) -- Loop here! + + ; setState $ fstate_out { cgs_hp_usg = cgs_hp_usg state } + ; return r } + +-- ---------------------------------------------------------------------------- +-- Combinators for emitting code + +emit :: CmmAGraph -> FCode () +emit ag + = do { state <- getState + ; setState $ state { cgs_stmts = cgs_stmts state <*> ag } } + +emitData :: Section -> [CmmStatic] -> FCode () +emitData sect lits + = do { state <- getState + ; setState $ state { cgs_tops = cgs_tops state `snocOL` data_block } } + where + data_block = CmmData sect lits + +emitProc :: CmmInfo -> CLabel -> CmmFormals -> CmmAGraph -> FCode () +emitProc info lbl args blocks + = do { us <- newUniqSupply + ; let (offset, entry) = mkEntry (mkBlockId $ uniqFromSupply us) Native args + blks = initUs_ us $ lgraphOfAGraph offset $ entry <*> blocks + -- ; blks <- cgStmtsToBlocks blocks + ; let proc_block = CmmProc info lbl args blks + ; state <- getState + ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } } + +emitSimpleProc :: CLabel -> CmmAGraph -> FCode () +-- Emit a procedure whose body is the specified code; no info table +emitSimpleProc lbl code + = emitProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] code + +getCmm :: FCode () -> FCode CmmZ +-- Get all the CmmTops (there should be no stmts) +-- Return a single Cmm which may be split from other Cmms by +-- object splitting (at a later stage) +getCmm code + = do { state1 <- getState + ; ((), state2) <- withState code (state1 { cgs_tops = nilOL }) + ; setState $ state2 { cgs_tops = cgs_tops state1 } + ; return (Cmm (fromOL (cgs_tops state2))) } + +-- ---------------------------------------------------------------------------- +-- CgStmts + +-- These functions deal in terms of CgStmts, which is an abstract type +-- representing the code in the current proc. + +-- turn CgStmts into [CmmBasicBlock], for making a new proc. +cgStmtsToBlocks :: CmmAGraph -> FCode CmmGraph +cgStmtsToBlocks stmts + = do { us <- newUniqSupply + ; return (initUs_ us (lgraphOfAGraph 0 stmts)) } + diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs new file mode 100644 index 0000000000..96467fe781 --- /dev/null +++ b/compiler/codeGen/StgCmmPrim.hs @@ -0,0 +1,662 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C--: primitive operations +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmPrim ( + cgOpApp + ) where + +#include "HsVersions.h" + +import StgCmmLayout +import StgCmmForeign +import StgCmmEnv +import StgCmmMonad +import StgCmmUtils + +import MkZipCfgCmm +import StgSyn +import Cmm +import Type ( Type, tyConAppTyCon ) +import TyCon +import CLabel +import CmmUtils +import PrimOp +import SMRep +import Constants +import FastString +import Outputable + +------------------------------------------------------------------------ +-- Primitive operations and foreign calls +------------------------------------------------------------------------ + +{- Note [Foreign call results] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A foreign call always returns an unboxed tuple of results, one +of which is the state token. This seems to happen even for pure +calls. + +Even if we returned a single result for pure calls, it'd still be +right to wrap it in a singleton unboxed tuple, because the result +might be a Haskell closure pointer, we don't want to evaluate it. -} + +---------------------------------- +cgOpApp :: StgOp -- The op + -> [StgArg] -- Arguments + -> Type -- Result type (always an unboxed tuple) + -> FCode () + +-- Foreign calls +cgOpApp (StgFCallOp fcall _) stg_args res_ty + = do { (res_regs, res_hints) <- newUnboxedTupleRegs res_ty + -- Choose result regs r1, r2 + -- Note [Foreign call results] + ; cgForeignCall res_regs res_hints fcall stg_args + -- r1, r2 = foo( x, y ) + ; emitReturn (map (CmmReg . CmmLocal) res_regs) } + -- return (r1, r2) + +-- tagToEnum# is special: we need to pull the constructor +-- out of the table, and perform an appropriate return. + +cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty + = ASSERT(isEnumerationTyCon tycon) + do { amode <- getArgAmode arg + ; emitReturn [tagToClosure tycon amode] } + where + -- If you're reading this code in the attempt to figure + -- out why the compiler panic'ed here, it is probably because + -- you used tagToEnum# in a non-monomorphic setting, e.g., + -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x# + -- That won't work. + tycon = tyConAppTyCon res_ty + +cgOpApp (StgPrimOp primop) args res_ty + | primOpOutOfLine primop + = do { cmm_args <- getNonVoidArgAmodes args + ; let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop)) + ; emitCall fun cmm_args } + + | ReturnsPrim VoidRep <- result_info + = do cgPrimOp [] primop args + emitReturn [] + + | ReturnsPrim rep <- result_info + = do res <- newTemp (primRepCmmType rep) + cgPrimOp [res] primop args + emitReturn [CmmReg (CmmLocal res)] + + | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon + = do (regs, _hints) <- newUnboxedTupleRegs res_ty + cgPrimOp regs primop args + emitReturn (map (CmmReg . CmmLocal) regs) + + | ReturnsAlg tycon <- result_info + , isEnumerationTyCon tycon + -- c.f. cgExpr (...TagToEnumOp...) + = do tag_reg <- newTemp bWord + cgPrimOp [tag_reg] primop args + emitReturn [tagToClosure tycon + (CmmReg (CmmLocal tag_reg))] + + | otherwise = panic "cgPrimop" + where + result_info = getPrimOpResultInfo primop + +--------------------------------------------------- +cgPrimOp :: [LocalReg] -- where to put the results + -> PrimOp -- the op + -> [StgArg] -- arguments + -> FCode () + +cgPrimOp results op args + = do arg_exprs <- getNonVoidArgAmodes args + emitPrimOp results op arg_exprs + + +------------------------------------------------------------------------ +-- Emitting code for a primop +------------------------------------------------------------------------ + +emitPrimOp :: [LocalReg] -- where to put the results + -> PrimOp -- the op + -> [CmmExpr] -- arguments + -> FCode () + +-- First we handle various awkward cases specially. The remaining +-- easy cases are then handled by translateOp, defined below. + +emitPrimOp [res_r,res_c] IntAddCOp [aa,bb] +{- + With some bit-twiddling, we can define int{Add,Sub}Czh portably in + C, and without needing any comparisons. This may not be the + fastest way to do it - if you have better code, please send it! --SDM + + Return : r = a + b, c = 0 if no overflow, 1 on overflow. + + We currently don't make use of the r value if c is != 0 (i.e. + overflow), we just convert to big integers and try again. This + could be improved by making r and c the correct values for + plugging into a new J#. + + { r = ((I_)(a)) + ((I_)(b)); \ + c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ + >> (BITS_IN (I_) - 1); \ + } + Wading through the mass of bracketry, it seems to reduce to: + c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1) + +-} + = emit $ catAGraphs [ + mkAssign (CmmLocal res_r) (CmmMachOp mo_wordAdd [aa,bb]), + mkAssign (CmmLocal res_c) $ + CmmMachOp mo_wordUShr [ + CmmMachOp mo_wordAnd [ + CmmMachOp mo_wordNot [CmmMachOp mo_wordXor [aa,bb]], + CmmMachOp mo_wordXor [aa, CmmReg (CmmLocal res_r)] + ], + CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1)) + ] + ] + + +emitPrimOp [res_r,res_c] IntSubCOp [aa,bb] +{- Similarly: + #define subIntCzh(r,c,a,b) \ + { r = ((I_)(a)) - ((I_)(b)); \ + c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ + >> (BITS_IN (I_) - 1); \ + } + + c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1) +-} + = emit $ catAGraphs [ + mkAssign (CmmLocal res_r) (CmmMachOp mo_wordSub [aa,bb]), + mkAssign (CmmLocal res_c) $ + CmmMachOp mo_wordUShr [ + CmmMachOp mo_wordAnd [ + CmmMachOp mo_wordXor [aa,bb], + CmmMachOp mo_wordXor [aa, CmmReg (CmmLocal res_r)] + ], + CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1)) + ] + ] + + +emitPrimOp [res] ParOp [arg] + = + -- for now, just implement this in a C function + -- later, we might want to inline it. + emitCCall + [(res,NoHint)] + (CmmLit (CmmLabel (mkRtsCodeLabel (sLit "newSpark")))) + [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)] + +emitPrimOp [res] ReadMutVarOp [mutv] + = emit (mkAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)) + +emitPrimOp [] WriteMutVarOp [mutv,var] + = do + emit (mkStore (cmmOffsetW mutv fixedHdrSize) var) + emitCCall + [{-no results-}] + (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) + [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)] + +-- #define sizzeofByteArrayzh(r,a) \ +-- r = (((StgArrWords *)(a))->words * sizeof(W_)) +emitPrimOp [res] SizeofByteArrayOp [arg] + = emit $ + mkAssign (CmmLocal res) (CmmMachOp mo_wordMul [ + cmmLoadIndexW arg fixedHdrSize bWord, + CmmLit (mkIntCLit wORD_SIZE) + ]) + +-- #define sizzeofMutableByteArrayzh(r,a) \ +-- r = (((StgArrWords *)(a))->words * sizeof(W_)) +emitPrimOp [res] SizeofMutableByteArrayOp [arg] + = emitPrimOp [res] SizeofByteArrayOp [arg] + + +-- #define touchzh(o) /* nothing */ +emitPrimOp [] TouchOp [_arg] + = nopC + +-- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a) +emitPrimOp [res] ByteArrayContents_Char [arg] + = emit (mkAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize)) + +-- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn) +emitPrimOp [res] StableNameToIntOp [arg] + = emit (mkAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord)) + +-- #define eqStableNamezh(r,sn1,sn2) \ +-- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) +emitPrimOp [res] EqStableNameOp [arg1,arg2] + = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [ + cmmLoadIndexW arg1 fixedHdrSize bWord, + cmmLoadIndexW arg2 fixedHdrSize bWord + ])) + + +emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2] + = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2])) + +-- #define addrToHValuezh(r,a) r=(P_)a +emitPrimOp [res] AddrToHValueOp [arg] + = emit (mkAssign (CmmLocal res) arg) + +-- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info)) +-- Note: argument may be tagged! +emitPrimOp [res] DataToTagOp [arg] + = emit (mkAssign (CmmLocal res) (getConstrTag (cmmUntag arg))) + +{- Freezing arrays-of-ptrs requires changing an info table, for the + benefit of the generational collector. It needs to scavenge mutable + objects, even if they are in old space. When they become immutable, + they can be removed from this scavenge list. -} + +-- #define unsafeFreezzeArrayzh(r,a) +-- { +-- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info); +-- r = a; +-- } +emitPrimOp [res] UnsafeFreezeArrayOp [arg] + = emit $ catAGraphs + [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)), + mkAssign (CmmLocal res) arg ] + +-- #define unsafeFreezzeByteArrayzh(r,a) r=(a) +emitPrimOp [res] UnsafeFreezeByteArrayOp [arg] + = emit (mkAssign (CmmLocal res) arg) + +-- Reading/writing pointer arrays + +emitPrimOp [r] ReadArrayOp [obj,ix] = doReadPtrArrayOp r obj ix +emitPrimOp [r] IndexArrayOp [obj,ix] = doReadPtrArrayOp r obj ix +emitPrimOp [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v + +-- IndexXXXoffAddr + +emitPrimOp res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args +emitPrimOp res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args +emitPrimOp res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args +emitPrimOp res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args + +-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr. + +emitPrimOp res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args +emitPrimOp res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args +emitPrimOp res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args +emitPrimOp res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args + +-- IndexXXXArray + +emitPrimOp res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args +emitPrimOp res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args +emitPrimOp res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args +emitPrimOp res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args + +-- ReadXXXArray, identical to IndexXXXArray. + +emitPrimOp res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args +emitPrimOp res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args +emitPrimOp res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args +emitPrimOp res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args + +-- WriteXXXoffAddr + +emitPrimOp res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just mo_WordTo8) res args +emitPrimOp res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just mo_WordTo32) res args +emitPrimOp res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just mo_WordTo8) res args +emitPrimOp res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just mo_WordTo16) res args +emitPrimOp res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just mo_WordTo32) res args +emitPrimOp res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just mo_WordTo8) res args +emitPrimOp res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just mo_WordTo16) res args +emitPrimOp res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just mo_WordTo32) res args +emitPrimOp res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing res args + +-- WriteXXXArray + +emitPrimOp res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just mo_WordTo8) res args +emitPrimOp res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just mo_WordTo32) res args +emitPrimOp res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just mo_WordTo8) res args +emitPrimOp res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just mo_WordTo16) res args +emitPrimOp res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just mo_WordTo32) res args +emitPrimOp res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just mo_WordTo8) res args +emitPrimOp res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just mo_WordTo16) res args +emitPrimOp res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just mo_WordTo32) res args +emitPrimOp res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing res args + + +-- The rest just translate straightforwardly +emitPrimOp [res] op [arg] + | nopOp op + = emit (mkAssign (CmmLocal res) arg) + + | Just (mop,rep) <- narrowOp op + = emit (mkAssign (CmmLocal res) $ + CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]]) + +emitPrimOp [res] op args + | Just prim <- callishOp op + = do emitPrimCall res prim args + + | Just mop <- translateOp op + = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in + emit stmt + +emitPrimOp _ op _ + = pprPanic "emitPrimOp: can't translate PrimOp" (ppr op) + + +-- These PrimOps are NOPs in Cmm + +nopOp :: PrimOp -> Bool +nopOp Int2WordOp = True +nopOp Word2IntOp = True +nopOp Int2AddrOp = True +nopOp Addr2IntOp = True +nopOp ChrOp = True -- Int# and Char# are rep'd the same +nopOp OrdOp = True +nopOp _ = False + +-- These PrimOps turn into double casts + +narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width) +narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8) +narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16) +narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32) +narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8) +narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16) +narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32) +narrowOp _ = Nothing + +-- Native word signless ops + +translateOp :: PrimOp -> Maybe MachOp +translateOp IntAddOp = Just mo_wordAdd +translateOp IntSubOp = Just mo_wordSub +translateOp WordAddOp = Just mo_wordAdd +translateOp WordSubOp = Just mo_wordSub +translateOp AddrAddOp = Just mo_wordAdd +translateOp AddrSubOp = Just mo_wordSub + +translateOp IntEqOp = Just mo_wordEq +translateOp IntNeOp = Just mo_wordNe +translateOp WordEqOp = Just mo_wordEq +translateOp WordNeOp = Just mo_wordNe +translateOp AddrEqOp = Just mo_wordEq +translateOp AddrNeOp = Just mo_wordNe + +translateOp AndOp = Just mo_wordAnd +translateOp OrOp = Just mo_wordOr +translateOp XorOp = Just mo_wordXor +translateOp NotOp = Just mo_wordNot +translateOp SllOp = Just mo_wordShl +translateOp SrlOp = Just mo_wordUShr + +translateOp AddrRemOp = Just mo_wordURem + +-- Native word signed ops + +translateOp IntMulOp = Just mo_wordMul +translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordWidth) +translateOp IntQuotOp = Just mo_wordSQuot +translateOp IntRemOp = Just mo_wordSRem +translateOp IntNegOp = Just mo_wordSNeg + + +translateOp IntGeOp = Just mo_wordSGe +translateOp IntLeOp = Just mo_wordSLe +translateOp IntGtOp = Just mo_wordSGt +translateOp IntLtOp = Just mo_wordSLt + +translateOp ISllOp = Just mo_wordShl +translateOp ISraOp = Just mo_wordSShr +translateOp ISrlOp = Just mo_wordUShr + +-- Native word unsigned ops + +translateOp WordGeOp = Just mo_wordUGe +translateOp WordLeOp = Just mo_wordULe +translateOp WordGtOp = Just mo_wordUGt +translateOp WordLtOp = Just mo_wordULt + +translateOp WordMulOp = Just mo_wordMul +translateOp WordQuotOp = Just mo_wordUQuot +translateOp WordRemOp = Just mo_wordURem + +translateOp AddrGeOp = Just mo_wordUGe +translateOp AddrLeOp = Just mo_wordULe +translateOp AddrGtOp = Just mo_wordUGt +translateOp AddrLtOp = Just mo_wordULt + +-- Char# ops + +translateOp CharEqOp = Just (MO_Eq wordWidth) +translateOp CharNeOp = Just (MO_Ne wordWidth) +translateOp CharGeOp = Just (MO_U_Ge wordWidth) +translateOp CharLeOp = Just (MO_U_Le wordWidth) +translateOp CharGtOp = Just (MO_U_Gt wordWidth) +translateOp CharLtOp = Just (MO_U_Lt wordWidth) + +-- Double ops + +translateOp DoubleEqOp = Just (MO_F_Eq W64) +translateOp DoubleNeOp = Just (MO_F_Ne W64) +translateOp DoubleGeOp = Just (MO_F_Ge W64) +translateOp DoubleLeOp = Just (MO_F_Le W64) +translateOp DoubleGtOp = Just (MO_F_Gt W64) +translateOp DoubleLtOp = Just (MO_F_Lt W64) + +translateOp DoubleAddOp = Just (MO_F_Add W64) +translateOp DoubleSubOp = Just (MO_F_Sub W64) +translateOp DoubleMulOp = Just (MO_F_Mul W64) +translateOp DoubleDivOp = Just (MO_F_Quot W64) +translateOp DoubleNegOp = Just (MO_F_Neg W64) + +-- Float ops + +translateOp FloatEqOp = Just (MO_F_Eq W32) +translateOp FloatNeOp = Just (MO_F_Ne W32) +translateOp FloatGeOp = Just (MO_F_Ge W32) +translateOp FloatLeOp = Just (MO_F_Le W32) +translateOp FloatGtOp = Just (MO_F_Gt W32) +translateOp FloatLtOp = Just (MO_F_Lt W32) + +translateOp FloatAddOp = Just (MO_F_Add W32) +translateOp FloatSubOp = Just (MO_F_Sub W32) +translateOp FloatMulOp = Just (MO_F_Mul W32) +translateOp FloatDivOp = Just (MO_F_Quot W32) +translateOp FloatNegOp = Just (MO_F_Neg W32) + +-- Conversions + +translateOp Int2DoubleOp = Just (MO_SF_Conv wordWidth W64) +translateOp Double2IntOp = Just (MO_FS_Conv W64 wordWidth) + +translateOp Int2FloatOp = Just (MO_SF_Conv wordWidth W32) +translateOp Float2IntOp = Just (MO_FS_Conv W32 wordWidth) + +translateOp Float2DoubleOp = Just (MO_FF_Conv W32 W64) +translateOp Double2FloatOp = Just (MO_FF_Conv W64 W32) + +-- Word comparisons masquerading as more exotic things. + +translateOp SameMutVarOp = Just mo_wordEq +translateOp SameMVarOp = Just mo_wordEq +translateOp SameMutableArrayOp = Just mo_wordEq +translateOp SameMutableByteArrayOp = Just mo_wordEq +translateOp SameTVarOp = Just mo_wordEq +translateOp EqStablePtrOp = Just mo_wordEq + +translateOp _ = Nothing + +-- These primops are implemented by CallishMachOps, because they sometimes +-- turn into foreign calls depending on the backend. + +callishOp :: PrimOp -> Maybe CallishMachOp +callishOp DoublePowerOp = Just MO_F64_Pwr +callishOp DoubleSinOp = Just MO_F64_Sin +callishOp DoubleCosOp = Just MO_F64_Cos +callishOp DoubleTanOp = Just MO_F64_Tan +callishOp DoubleSinhOp = Just MO_F64_Sinh +callishOp DoubleCoshOp = Just MO_F64_Cosh +callishOp DoubleTanhOp = Just MO_F64_Tanh +callishOp DoubleAsinOp = Just MO_F64_Asin +callishOp DoubleAcosOp = Just MO_F64_Acos +callishOp DoubleAtanOp = Just MO_F64_Atan +callishOp DoubleLogOp = Just MO_F64_Log +callishOp DoubleExpOp = Just MO_F64_Exp +callishOp DoubleSqrtOp = Just MO_F64_Sqrt + +callishOp FloatPowerOp = Just MO_F32_Pwr +callishOp FloatSinOp = Just MO_F32_Sin +callishOp FloatCosOp = Just MO_F32_Cos +callishOp FloatTanOp = Just MO_F32_Tan +callishOp FloatSinhOp = Just MO_F32_Sinh +callishOp FloatCoshOp = Just MO_F32_Cosh +callishOp FloatTanhOp = Just MO_F32_Tanh +callishOp FloatAsinOp = Just MO_F32_Asin +callishOp FloatAcosOp = Just MO_F32_Acos +callishOp FloatAtanOp = Just MO_F32_Atan +callishOp FloatLogOp = Just MO_F32_Log +callishOp FloatExpOp = Just MO_F32_Exp +callishOp FloatSqrtOp = Just MO_F32_Sqrt + +callishOp _ = Nothing + +------------------------------------------------------------------------------ +-- Helpers for translating various minor variants of array indexing. + +doIndexOffAddrOp :: Maybe MachOp -> CmmType -> [LocalReg] -> [CmmExpr] -> FCode () +doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx] + = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx +doIndexOffAddrOp _ _ _ _ + = panic "CgPrimOp: doIndexOffAddrOp" + +doIndexByteArrayOp :: Maybe MachOp -> CmmType -> [LocalReg] -> [CmmExpr] -> FCode () +doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx] + = mkBasicIndexedRead arrWordsHdrSize maybe_post_read_cast rep res addr idx +doIndexByteArrayOp _ _ _ _ + = panic "CgPrimOp: doIndexByteArrayOp" + +doReadPtrArrayOp :: LocalReg -> CmmExpr -> CmmExpr -> FCode () +doReadPtrArrayOp res addr idx + = mkBasicIndexedRead arrPtrsHdrSize Nothing gcWord res addr idx + + +doWriteOffAddrOp :: Maybe MachOp -> [LocalReg] -> [CmmExpr] -> FCode () +doWriteOffAddrOp maybe_pre_write_cast [] [addr,idx,val] + = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx val +doWriteOffAddrOp _ _ _ + = panic "CgPrimOp: doWriteOffAddrOp" + +doWriteByteArrayOp :: Maybe MachOp -> [LocalReg] -> [CmmExpr] -> FCode () +doWriteByteArrayOp maybe_pre_write_cast [] [addr,idx,val] + = mkBasicIndexedWrite arrWordsHdrSize maybe_pre_write_cast addr idx val +doWriteByteArrayOp _ _ _ + = panic "CgPrimOp: doWriteByteArrayOp" + +doWritePtrArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () +doWritePtrArrayOp addr idx val + = do emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel))) + mkBasicIndexedWrite arrPtrsHdrSize Nothing addr idx val + +mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType + -> LocalReg -> CmmExpr -> CmmExpr -> FCode () +mkBasicIndexedRead off Nothing read_rep res base idx + = emit (mkAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx)) +mkBasicIndexedRead off (Just cast) read_rep res base idx + = emit (mkAssign (CmmLocal res) (CmmMachOp cast [ + cmmLoadIndexOffExpr off read_rep base idx])) + +mkBasicIndexedWrite :: ByteOff -> Maybe MachOp + -> CmmExpr -> CmmExpr -> CmmExpr -> FCode () +mkBasicIndexedWrite off Nothing base idx val + = emit (mkStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val) +mkBasicIndexedWrite off (Just cast) base idx val + = mkBasicIndexedWrite off Nothing base idx (CmmMachOp cast [val]) + +-- ---------------------------------------------------------------------------- +-- Misc utils + +cmmIndexOffExpr :: ByteOff -> Width -> CmmExpr -> CmmExpr -> CmmExpr +cmmIndexOffExpr off width base idx + = cmmIndexExpr width (cmmOffsetB base off) idx + +cmmLoadIndexOffExpr :: ByteOff -> CmmType -> CmmExpr -> CmmExpr -> CmmExpr +cmmLoadIndexOffExpr off ty base idx + = CmmLoad (cmmIndexOffExpr off (typeWidth ty) base idx) ty + +setInfo :: CmmExpr -> CmmExpr -> CmmAGraph +setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr + diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs new file mode 100644 index 0000000000..f442295d25 --- /dev/null +++ b/compiler/codeGen/StgCmmProf.hs @@ -0,0 +1,553 @@ +{-# OPTIONS -w #-} +-- Lots of missing type sigs etc + +----------------------------------------------------------------------------- +-- +-- Code generation for profiling +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmProf ( + initCostCentres, ccType, ccsType, + mkCCostCentre, mkCCostCentreStack, + + -- Cost-centre Profiling + dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, + enterCostCentre, enterCostCentrePAP, enterCostCentreThunk, + chooseDynCostCentres, + costCentreFrom, + curCCS, curCCSAddr, + emitSetCCC, emitCCS, + + saveCurrentCostCentre, restoreCurrentCostCentre, + + -- Lag/drag/void stuff + ldvEnter, ldvEnterClosure, ldvRecordCreate + ) where + +#include "HsVersions.h" +#include "MachDeps.h" + -- For WORD_SIZE_IN_BITS only. +#include "../includes/Constants.h" + -- For LDV_CREATE_MASK, LDV_STATE_USE + -- which are StgWords +#include "../includes/DerivedConstants.h" + -- For REP_xxx constants, which are MachReps + +import StgCmmClosure +import StgCmmUtils +import StgCmmMonad +import SMRep + +import MkZipCfgCmm +import Cmm +import TyCon ( PrimRep(..) ) +import CmmUtils +import CLabel + +import Id +import qualified Module +import CostCentre +import StgSyn +import StaticFlags +import FastString +import Constants -- Lots of field offsets +import Outputable + +import Data.Maybe +import Data.Char +import Control.Monad + +----------------------------------------------------------------------------- +-- +-- Cost-centre-stack Profiling +-- +----------------------------------------------------------------------------- + +-- Expression representing the current cost centre stack +ccsType :: CmmType -- Type of a cost-centre stack +ccsType = bWord + +ccType :: CmmType -- Type of a cost centre +ccType = bWord + +curCCS :: CmmExpr +curCCS = CmmLoad curCCSAddr ccsType + +-- Address of current CCS variable, for storing into +curCCSAddr :: CmmExpr +curCCSAddr = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CCCS"))) + +mkCCostCentre :: CostCentre -> CmmLit +mkCCostCentre cc = CmmLabel (mkCCLabel cc) + +mkCCostCentreStack :: CostCentreStack -> CmmLit +mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs) + +costCentreFrom :: CmmExpr -- A closure pointer + -> CmmExpr -- The cost centre from that closure +costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) ccsType + +staticProfHdr :: CostCentreStack -> [CmmLit] +-- The profiling header words in a static closure +-- Was SET_STATIC_PROF_HDR +staticProfHdr ccs = ifProfilingL [mkCCostCentreStack ccs, + staticLdvInit] + +dynProfHdr :: CmmExpr -> [CmmExpr] +-- Profiling header words in a dynamic closure +dynProfHdr ccs = ifProfilingL [ccs, dynLdvInit] + +initUpdFrameProf :: CmmExpr -> FCode () +-- Initialise the profiling field of an update frame +initUpdFrameProf frame_amode + = ifProfiling $ -- frame->header.prof.ccs = CCCS + emit (mkStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS) + -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) + -- is unnecessary because it is not used anyhow. + +--------------------------------------------------------------------------- +-- Saving and restoring the current cost centre +--------------------------------------------------------------------------- + +{- Note [Saving the current cost centre] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The current cost centre is like a global register. Like other +global registers, it's a caller-saves one. But consider + case (f x) of (p,q) -> rhs +Since 'f' may set the cost centre, we must restore it +before resuming rhs. So we want code like this: + local_cc = CCC -- save + r = f( x ) + CCC = local_cc -- restore +That is, we explicitly "save" the current cost centre in +a LocalReg, local_cc; and restore it after the call. The +C-- infrastructure will arrange to save local_cc across the +call. + +The same goes for join points; + let j x = join-stuff + in blah-blah +We want this kind of code: + local_cc = CCC -- save + blah-blah + J: + CCC = local_cc -- restore +-} + +saveCurrentCostCentre :: FCode (Maybe LocalReg) + -- Returns Nothing if profiling is off +saveCurrentCostCentre + | not opt_SccProfilingOn + = return Nothing + | otherwise + = do { local_cc <- newTemp ccType + ; emit (mkAssign (CmmLocal local_cc) curCCS) + ; return (Just local_cc) } + +restoreCurrentCostCentre :: Maybe LocalReg -> FCode () +restoreCurrentCostCentre Nothing + = return () +restoreCurrentCostCentre (Just local_cc) + = emit (mkStore curCCSAddr (CmmReg (CmmLocal local_cc))) + + +------------------------------------------------------------------------------- +-- Recording allocation in a cost centre +------------------------------------------------------------------------------- + +-- | Record the allocation of a closure. The CmmExpr is the cost +-- centre stack to which to attribute the allocation. +profDynAlloc :: ClosureInfo -> CmmExpr -> FCode () +profDynAlloc cl_info ccs + = ifProfiling $ + profAlloc (CmmLit (mkIntCLit (closureSize cl_info))) ccs + +-- | Record the allocation of a closure (size is given by a CmmExpr) +-- The size must be in words, because the allocation counter in a CCS counts +-- in words. +profAlloc :: CmmExpr -> CmmExpr -> FCode () +profAlloc words ccs + = ifProfiling $ + emit (addToMemE alloc_rep + (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc) + (CmmMachOp (MO_UU_Conv wordWidth (typeWidth alloc_rep)) $ + [CmmMachOp mo_wordSub [words, + CmmLit (mkIntCLit profHdrSize)]])) + -- subtract the "profiling overhead", which is the + -- profiling header in a closure. + where + alloc_rep = REP_CostCentreStack_mem_alloc + +-- ---------------------------------------------------------------------- +-- Setting the cost centre in a new closure + +chooseDynCostCentres :: CostCentreStack + -> [Id] -- Args + -> StgExpr -- Body + -> FCode (CmmExpr, CmmExpr) +-- Called when alllcating a closure +-- Tells which cost centre to put in the object, and which +-- to blame the cost of allocation on +chooseDynCostCentres ccs args body = do + -- Cost-centre we record in the object + use_ccs <- emitCCS ccs + + -- Cost-centre on whom we blame the allocation + let blame_ccs + | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS) + | otherwise = use_ccs + + return (use_ccs, blame_ccs) + + +-- Some CostCentreStacks are a sequence of pushes on top of CCCS. +-- These pushes must be performed before we can refer to the stack in +-- an expression. +emitCCS :: CostCentreStack -> FCode CmmExpr +emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) + where + (cc's, ccs') = decomposeCCS ccs + + push_em ccs [] = return ccs + push_em ccs (cc:rest) = do + tmp <- newTemp ccsType + pushCostCentre tmp ccs cc + push_em (CmmReg (CmmLocal tmp)) rest + +ccsExpr :: CostCentreStack -> CmmExpr +ccsExpr ccs + | isCurrentCCS ccs = curCCS + | otherwise = CmmLit (mkCCostCentreStack ccs) + + +isBox :: StgExpr -> Bool +-- If it's an utterly trivial RHS, then it must be +-- one introduced by boxHigherOrderArgs for profiling, +-- so we charge it to "OVERHEAD". +-- This looks like a GROSS HACK to me --SDM +isBox (StgApp fun []) = True +isBox other = False + + +-- ----------------------------------------------------------------------- +-- Setting the current cost centre on entry to a closure + +-- For lexically scoped profiling we have to load the cost centre from +-- the closure entered, if the costs are not supposed to be inherited. +-- This is done immediately on entering the fast entry point. + +-- Load current cost centre from closure, if not inherited. +-- Node is guaranteed to point to it, if profiling and not inherited. + +enterCostCentre + :: ClosureInfo + -> CostCentreStack + -> StgExpr -- The RHS of the closure + -> FCode () + +-- We used to have a special case for bindings of form +-- f = g True +-- where g has arity 2. The RHS is a thunk, but we don't +-- need to update it; and we want to subsume costs. +-- We don't have these sort of PAPs any more, so the special +-- case has gone away. + +enterCostCentre closure_info ccs body + = ifProfiling $ + ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs) + enter_cost_centre closure_info ccs body + +enter_cost_centre closure_info ccs body + | isSubsumedCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(re_entrant) + enter_ccs_fsub + + | isDerivedFromCurrentCCS ccs + = do { + if re_entrant && not is_box + then + enter_ccs_fun node_ccs + else + emit (mkStore curCCSAddr node_ccs) + + -- don't forget to bump the scc count. This closure might have been + -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal + -- pass has turned into simply let x = e in ...x... and attached + -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that + -- we don't lose the scc counter, bump it in the entry code for x. + -- ToDo: for a multi-push we should really bump the counter for + -- each of the intervening CCSs, not just the top one. + ; when (not (isCurrentCCS ccs)) $ + emit (bumpSccCount curCCS) + } + + | isCafCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(not re_entrant) + do { -- This is just a special case of the isDerivedFromCurrentCCS + -- case above. We could delete this, but it's a micro + -- optimisation and saves a bit of code. + emit (mkStore curCCSAddr enc_ccs) + ; emit (bumpSccCount node_ccs) + } + + | otherwise + = panic "enterCostCentre" + where + enc_ccs = CmmLit (mkCCostCentreStack ccs) + re_entrant = closureReEntrant closure_info + node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag)) + is_box = isBox body + + -- if this is a function, then node will be tagged; we must subract the tag + node_tag = funTag closure_info + +-- set the current CCS when entering a PAP +enterCostCentrePAP :: CmmExpr -> FCode () +enterCostCentrePAP closure = + ifProfiling $ do + enter_ccs_fun (costCentreFrom closure) + enteringPAP 1 + +enterCostCentreThunk :: CmmExpr -> FCode () +enterCostCentreThunk closure = + ifProfiling $ do + emit $ mkStore curCCSAddr (costCentreFrom closure) + +enter_ccs_fun stack = emitRtsCall (sLit "EnterFunCCS") [(stack,AddrHint)] False + -- ToDo: vols + +enter_ccs_fsub = enteringPAP 0 + +-- When entering a PAP, EnterFunCCS is called by both the PAP entry +-- code and the function entry code; we don't want the function's +-- entry code to also update CCCS in the event that it was called via +-- a PAP, so we set the flag entering_PAP to indicate that we are +-- entering via a PAP. +enteringPAP :: Integer -> FCode () +enteringPAP n + = emit (mkStore (CmmLit (CmmLabel (mkRtsDataLabel (sLit "entering_PAP")))) + (CmmLit (CmmInt n cIntWidth))) + +ifProfiling :: FCode () -> FCode () +ifProfiling code + | opt_SccProfilingOn = code + | otherwise = nopC + +ifProfilingL :: [a] -> [a] +ifProfilingL xs + | opt_SccProfilingOn = xs + | otherwise = [] + + +--------------------------------------------------------------- +-- Initialising Cost Centres & CCSs +--------------------------------------------------------------- + +initCostCentres :: CollectedCCs -> FCode CmmAGraph +-- Emit the declarations, and return code to register them +initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs) + = getCode $ whenC opt_SccProfilingOn $ + do { mapM_ emitCostCentreDecl local_CCs + ; mapM_ emitCostCentreStackDecl singleton_CCSs + ; emit $ catAGraphs $ map mkRegisterCC local_CCs + ; emit $ catAGraphs $ map mkRegisterCCS singleton_CCSs } + + +emitCostCentreDecl :: CostCentre -> FCode () +emitCostCentreDecl cc = do + { label <- mkStringCLit (costCentreUserName cc) + ; modl <- mkStringCLit (Module.moduleNameString + (Module.moduleName (cc_mod cc))) + -- All cost centres will be in the main package, since we + -- don't normally use -auto-all or add SCCs to other packages. + -- Hence don't emit the package name in the module here. + ; let lits = [ zero, -- StgInt ccID, + label, -- char *label, + modl, -- char *module, + zero, -- StgWord time_ticks + zero64, -- StgWord64 mem_alloc + subsumed, -- StgInt is_caf + zero -- struct _CostCentre *link + ] + ; emitDataLits (mkCCLabel cc) lits + } + where + subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF + | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring + +emitCostCentreStackDecl :: CostCentreStack -> FCode () +emitCostCentreStackDecl ccs + = case maybeSingletonCCS ccs of + Just cc -> emitDataLits (mkCCSLabel ccs) (mk_lits cc) + Nothing -> pprPanic "emitCostCentreStackDecl" (ppr ccs) + where + mk_lits cc = zero : + mkCCostCentre cc : + replicate (sizeof_ccs_words - 2) zero + -- Note: to avoid making any assumptions about how the + -- C compiler (that compiles the RTS, in particular) does + -- layouts of structs containing long-longs, simply + -- pad out the struct with zero words until we hit the + -- size of the overall struct (which we get via DerivedConstants.h) + +zero = mkIntCLit 0 +zero64 = CmmInt 0 W64 + +sizeof_ccs_words :: Int +sizeof_ccs_words + -- round up to the next word. + | ms == 0 = ws + | otherwise = ws + 1 + where + (ws,ms) = SIZEOF_CostCentreStack `divMod` wORD_SIZE + +-- --------------------------------------------------------------------------- +-- Registering CCs and CCSs + +-- (cc)->link = CC_LIST; +-- CC_LIST = (cc); +-- (cc)->ccID = CC_ID++; + +mkRegisterCC :: CostCentre -> CmmAGraph +mkRegisterCC cc + = withTemp cInt $ \tmp -> + catAGraphs [ + mkStore (cmmOffsetB cc_lit oFFSET_CostCentre_link) + (CmmLoad cC_LIST bWord), + mkStore cC_LIST cc_lit, + mkAssign (CmmLocal tmp) (CmmLoad cC_ID cInt), + mkStore (cmmOffsetB cc_lit oFFSET_CostCentre_ccID) (CmmReg (CmmLocal tmp)), + mkStore cC_ID (cmmRegOffB (CmmLocal tmp) 1) + ] + where + cc_lit = CmmLit (CmmLabel (mkCCLabel cc)) + +-- (ccs)->prevStack = CCS_LIST; +-- CCS_LIST = (ccs); +-- (ccs)->ccsID = CCS_ID++; + +mkRegisterCCS :: CostCentreStack -> CmmAGraph +mkRegisterCCS ccs + = withTemp cInt $ \ tmp -> + catAGraphs [ + mkStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_prevStack) + (CmmLoad cCS_LIST bWord), + mkStore cCS_LIST ccs_lit, + mkAssign (CmmLocal tmp) (CmmLoad cCS_ID cInt), + mkStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_ccsID) (CmmReg (CmmLocal tmp)), + mkStore cCS_ID (cmmRegOffB (CmmLocal tmp) 1) + ] + where + ccs_lit = CmmLit (CmmLabel (mkCCSLabel ccs)) + + +cC_LIST = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CC_LIST"))) +cC_ID = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CC_ID"))) + +cCS_LIST = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CCS_LIST"))) +cCS_ID = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CCS_ID"))) + +-- --------------------------------------------------------------------------- +-- Set the current cost centre stack + +emitSetCCC :: CostCentre -> FCode () +emitSetCCC cc + | not opt_SccProfilingOn = nopC + | otherwise = do + tmp <- newTemp ccsType -- TODO FIXME NOW + ASSERT( sccAbleCostCentre cc ) + pushCostCentre tmp curCCS cc + emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp))) + when (isSccCountCostCentre cc) $ + emit (bumpSccCount curCCS) + +pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode () +pushCostCentre result ccs cc + = emitRtsCallWithResult result AddrHint + (sLit "PushCostCentre") [(ccs,AddrHint), + (CmmLit (mkCCostCentre cc), AddrHint)] + False + +bumpSccCount :: CmmExpr -> CmmAGraph +bumpSccCount ccs + = addToMem REP_CostCentreStack_scc_count + (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1 + +----------------------------------------------------------------------------- +-- +-- Lag/drag/void stuff +-- +----------------------------------------------------------------------------- + +-- +-- Initial value for the LDV field in a static closure +-- +staticLdvInit :: CmmLit +staticLdvInit = zeroCLit + +-- +-- Initial value of the LDV field in a dynamic closure +-- +dynLdvInit :: CmmExpr +dynLdvInit = -- (era << LDV_SHIFT) | LDV_STATE_CREATE + CmmMachOp mo_wordOr [ + CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ], + CmmLit (mkWordCLit lDV_STATE_CREATE) + ] + +-- +-- Initialise the LDV word of a new closure +-- +ldvRecordCreate :: CmmExpr -> FCode () +ldvRecordCreate closure = emit $ mkStore (ldvWord closure) dynLdvInit + +-- +-- Called when a closure is entered, marks the closure as having been "used". +-- The closure is not an 'inherently used' one. +-- The closure is not IND or IND_OLDGEN because neither is considered for LDV +-- profiling. +-- +ldvEnterClosure :: ClosureInfo -> FCode () +ldvEnterClosure closure_info = ldvEnter (cmmOffsetB (CmmReg nodeReg) (-tag)) + where tag = funTag closure_info + -- don't forget to substract node's tag + +ldvEnter :: CmmExpr -> FCode () +-- Argument is a closure pointer +ldvEnter cl_ptr + = ifProfiling $ + -- if (era > 0) { + -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | + -- era | LDV_STATE_USE } + emit (mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) + (mkStore ldv_wd new_ldv_wd) + mkNop) + where + -- don't forget to substract node's tag + ldv_wd = ldvWord cl_ptr + new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd bWord) + (CmmLit (mkWordCLit lDV_CREATE_MASK))) + (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE))) + +loadEra :: CmmExpr +loadEra = CmmMachOp (MO_UU_Conv cIntWidth wordWidth) + [CmmLoad (mkLblExpr (mkRtsDataLabel (sLit "era"))) cInt] + +ldvWord :: CmmExpr -> CmmExpr +-- Takes the address of a closure, and returns +-- the address of the LDV word in the closure +ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw + +-- LDV constants, from ghc/includes/Constants.h +lDV_SHIFT = (LDV_SHIFT :: Int) +--lDV_STATE_MASK = (LDV_STATE_MASK :: StgWord) +lDV_CREATE_MASK = (LDV_CREATE_MASK :: StgWord) +--lDV_LAST_MASK = (LDV_LAST_MASK :: StgWord) +lDV_STATE_CREATE = (LDV_STATE_CREATE :: StgWord) +lDV_STATE_USE = (LDV_STATE_USE :: StgWord) + diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs new file mode 100644 index 0000000000..e4bebb447f --- /dev/null +++ b/compiler/codeGen/StgCmmTicky.hs @@ -0,0 +1,397 @@ +{-# OPTIONS -w #-} +-- Lots of missing type sigs etc + +----------------------------------------------------------------------------- +-- +-- Code generation for ticky-ticky profiling +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmTicky ( + emitTickyCounter, + + tickyDynAlloc, + tickyAllocHeap, + tickyAllocPrim, + tickyAllocThunk, + tickyAllocPAP, + + tickySlowCall, tickyDirectCall, + + tickyPushUpdateFrame, + tickyUpdateFrameOmitted, + + tickyEnterDynCon, + tickyEnterStaticCon, + tickyEnterViaNode, + + tickyEnterFun, + tickyEnterThunk, + + tickyUpdateBhCaf, + tickyBlackHole, + tickyUnboxedTupleReturn, tickyVectoredReturn, + tickyReturnOldCon, tickyReturnNewCon, + + tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, + tickyUnknownCall, tickySlowCallPat, + + staticTickyHdr, + ) where + +#include "HsVersions.h" +#include "../includes/DerivedConstants.h" + -- For REP_xxx constants, which are MachReps + +import StgCmmClosure +import StgCmmUtils +import StgCmmMonad +import SMRep + +import StgSyn +import Cmm +import MkZipCfgCmm +import CmmUtils +import CLabel + +import Name +import Id +import StaticFlags +import BasicTypes +import FastString +import Constants +import Outputable + +-- Turgid imports for showTypeCategory +import PrelNames +import TcType +import TyCon + +import Data.Maybe + +----------------------------------------------------------------------------- +-- +-- Ticky-ticky profiling +-- +----------------------------------------------------------------------------- + +staticTickyHdr :: [CmmLit] +-- krc: not using this right now -- +-- in the new version of ticky-ticky, we +-- don't change the closure layout. +-- leave it defined, though, to avoid breaking +-- other things. +staticTickyHdr = [] + +emitTickyCounter :: ClosureInfo -> [Id] -> FCode () +emitTickyCounter cl_info args + = ifTicky $ + do { mod_name <- getModuleName + ; fun_descr_lit <- mkStringCLit (fun_descr mod_name) + ; arg_descr_lit <- mkStringCLit arg_descr + ; emitDataLits ticky_ctr_label -- Must match layout of StgEntCounter +-- krc: note that all the fields are I32 now; some were I16 before, +-- but the code generator wasn't handling that properly and it led to chaos, +-- panic and disorder. + [ mkIntCLit 0, + mkIntCLit (length args), -- Arity + mkIntCLit 0, -- XXX: we no longer know this! Words passed on stack + fun_descr_lit, + arg_descr_lit, + zeroCLit, -- Entry count + zeroCLit, -- Allocs + zeroCLit -- Link + ] } + where + name = closureName cl_info + ticky_ctr_label = mkRednCountsLabel name $ clHasCafRefs cl_info + arg_descr = map (showTypeCategory . idType) args + fun_descr mod_name = ppr_for_ticky_name mod_name name + +-- When printing the name of a thing in a ticky file, we want to +-- give the module name even for *local* things. We print +-- just "x (M)" rather that "M.x" to distinguish them from the global kind. +ppr_for_ticky_name mod_name name + | isInternalName name = showSDocDebug (ppr name <+> (parens (ppr mod_name))) + | otherwise = showSDocDebug (ppr name) + +-- ----------------------------------------------------------------------------- +-- Ticky stack frames + +tickyPushUpdateFrame = ifTicky $ bumpTickyCounter (sLit "UPDF_PUSHED_ctr") +tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter (sLit "UPDF_OMITTED_ctr") + +-- ----------------------------------------------------------------------------- +-- Ticky entries + +tickyEnterDynCon = ifTicky $ bumpTickyCounter (sLit "ENT_DYN_CON_ctr") +tickyEnterDynThunk = ifTicky $ bumpTickyCounter (sLit "ENT_DYN_THK_ctr") +tickyEnterStaticCon = ifTicky $ bumpTickyCounter (sLit "ENT_STATIC_CON_ctr") +tickyEnterStaticThunk = ifTicky $ bumpTickyCounter (sLit "ENT_STATIC_THK_ctr") +tickyEnterViaNode = ifTicky $ bumpTickyCounter (sLit "ENT_VIA_NODE_ctr") + +tickyEnterThunk :: ClosureInfo -> FCode () +tickyEnterThunk cl_info + | isStaticClosure cl_info = tickyEnterStaticThunk + | otherwise = tickyEnterDynThunk + +tickyBlackHole :: Bool{-updatable-} -> FCode () +tickyBlackHole updatable + = ifTicky (bumpTickyCounter ctr) + where + ctr | updatable = (sLit "UPD_BH_SINGLE_ENTRY_ctr") + | otherwise = (sLit "UPD_BH_UPDATABLE_ctr") + +tickyUpdateBhCaf cl_info + = ifTicky (bumpTickyCounter ctr) + where + ctr | closureUpdReqd cl_info = (sLit "UPD_CAF_BH_SINGLE_ENTRY_ctr") + | otherwise = (sLit "UPD_CAF_BH_UPDATABLE_ctr") + +tickyEnterFun :: ClosureInfo -> FCode () +tickyEnterFun cl_info + = ifTicky $ + do { bumpTickyCounter ctr + ; fun_ctr_lbl <- getTickyCtrLabel + ; registerTickyCtr fun_ctr_lbl + ; bumpTickyCounter' (cmmLabelOffB fun_ctr_lbl oFFSET_StgEntCounter_entry_count) + } + where + ctr | isStaticClosure cl_info = (sLit "ENT_STATIC_FUN_DIRECT_ctr") + | otherwise = (sLit "ENT_DYN_FUN_DIRECT_ctr") + +registerTickyCtr :: CLabel -> FCode () +-- Register a ticky counter +-- if ( ! f_ct.registeredp ) { +-- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */ +-- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */ +-- f_ct.registeredp = 1 } +registerTickyCtr ctr_lbl + = emit (mkCmmIfThen test (catAGraphs register_stmts)) + where + -- krc: code generator doesn't handle Not, so we test for Eq 0 instead + test = CmmMachOp (MO_Eq wordWidth) + [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl + oFFSET_StgEntCounter_registeredp)) bWord, + CmmLit (mkIntCLit 0)] + register_stmts + = [ mkStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_link)) + (CmmLoad ticky_entry_ctrs bWord) + , mkStore ticky_entry_ctrs (mkLblExpr ctr_lbl) + , mkStore (CmmLit (cmmLabelOffB ctr_lbl + oFFSET_StgEntCounter_registeredp)) + (CmmLit (mkIntCLit 1)) ] + ticky_entry_ctrs = mkLblExpr (mkRtsDataLabel (sLit "ticky_entry_ctrs")) + +tickyReturnOldCon, tickyReturnNewCon :: Arity -> FCode () +tickyReturnOldCon arity + = ifTicky $ do { bumpTickyCounter (sLit "RET_OLD_ctr") + ; bumpHistogram (sLit "RET_OLD_hst") arity } +tickyReturnNewCon arity + | not opt_DoTickyProfiling = nopC + | otherwise + = ifTicky $ do { bumpTickyCounter (sLit "RET_NEW_ctr") + ; bumpHistogram (sLit "RET_NEW_hst") arity } + +tickyUnboxedTupleReturn :: Int -> FCode () +tickyUnboxedTupleReturn arity + = ifTicky $ do { bumpTickyCounter (sLit "RET_UNBOXED_TUP_ctr") + ; bumpHistogram (sLit "RET_UNBOXED_TUP_hst") arity } + +tickyVectoredReturn :: Int -> FCode () +tickyVectoredReturn family_size + = ifTicky $ do { bumpTickyCounter (sLit "VEC_RETURN_ctr") + ; bumpHistogram (sLit "RET_VEC_RETURN_hst") family_size } + +-- ----------------------------------------------------------------------------- +-- Ticky calls + +-- Ticks at a *call site*: +tickyDirectCall :: Arity -> [StgArg] -> FCode () +tickyDirectCall arity args + | arity == length args = tickyKnownCallExact + | otherwise = do tickyKnownCallExtraArgs + tickySlowCallPat (map argPrimRep (drop arity args)) + +tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter (sLit "KNOWN_CALL_TOO_FEW_ARGS_ctr") +tickyKnownCallExact = ifTicky $ bumpTickyCounter (sLit "KNOWN_CALL_ctr") +tickyKnownCallExtraArgs = ifTicky $ bumpTickyCounter (sLit "KNOWN_CALL_EXTRA_ARGS_ctr") +tickyUnknownCall = ifTicky $ bumpTickyCounter (sLit "UNKNOWN_CALL_ctr") + +-- Tick for the call pattern at slow call site (i.e. in addition to +-- tickyUnknownCall, tickyKnownCallExtraArgs, etc.) +tickySlowCall :: LambdaFormInfo -> [StgArg] -> FCode () +tickySlowCall lf_info [] + = return () +tickySlowCall lf_info args + = do { if (isKnownFun lf_info) + then tickyKnownCallTooFewArgs + else tickyUnknownCall + ; tickySlowCallPat (map argPrimRep args) } + +tickySlowCallPat :: [PrimRep] -> FCode () +tickySlowCallPat args = return () +{- LATER: (introduces recursive module dependency now). + case callPattern args of + (str, True) -> bumpTickyCounter' (mkRtsSlowTickyCtrLabel pat) + (str, False) -> bumpTickyCounter (sLit "TICK_SLOW_CALL_OTHER") + +callPattern :: [CgRep] -> (String,Bool) +callPattern reps + | match == length reps = (chars, True) + | otherwise = (chars, False) + where (_,match) = findMatch reps + chars = map argChar reps + +argChar VoidArg = 'v' +argChar PtrArg = 'p' +argChar NonPtrArg = 'n' +argChar LongArg = 'l' +argChar FloatArg = 'f' +argChar DoubleArg = 'd' +-} + +-- ----------------------------------------------------------------------------- +-- Ticky allocation + +tickyDynAlloc :: ClosureInfo -> FCode () +-- Called when doing a dynamic heap allocation +tickyDynAlloc cl_info + = ifTicky $ + case smRepClosureType (closureSMRep cl_info) of + Just Constr -> tick_alloc_con + Just ConstrNoCaf -> tick_alloc_con + Just Fun -> tick_alloc_fun + Just Thunk -> tick_alloc_thk + Just ThunkSelector -> tick_alloc_thk + -- black hole + Nothing -> return () + where + -- will be needed when we fill in stubs + cl_size = closureSize cl_info + slop_size = slopSize cl_info + + tick_alloc_thk + | closureUpdReqd cl_info = tick_alloc_up_thk + | otherwise = tick_alloc_se_thk + + -- krc: changed from panic to return () + -- just to get something working + tick_alloc_con = return () + tick_alloc_fun = return () + tick_alloc_up_thk = return () + tick_alloc_se_thk = return () + + +tickyAllocPrim :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () +tickyAllocPrim hdr goods slop = ifTicky $ pprTrace "ToDo: tickyAllocPrim" empty (return ()) + +tickyAllocThunk :: CmmExpr -> CmmExpr -> FCode () +tickyAllocThunk goods slop = ifTicky $ pprTrace "ToDo: tickyAllocThunk" empty (return ()) + +tickyAllocPAP :: CmmExpr -> CmmExpr -> FCode () +tickyAllocPAP goods slop = ifTicky $ pprTrace "ToDo: tickyAllocPAP" empty (return ()) + +tickyAllocHeap :: VirtualHpOffset -> FCode () +-- Called when doing a heap check [TICK_ALLOC_HEAP] +-- Must be lazy in the amount of allocation! +tickyAllocHeap hp + = ifTicky $ + do { ticky_ctr <- getTickyCtrLabel + ; emit $ catAGraphs $ + if hp == 0 then [] -- Inside the emitMiddle to avoid control + else [ -- dependency on the argument + -- Bump the allcoation count in the StgEntCounter + addToMem REP_StgEntCounter_allocs + (CmmLit (cmmLabelOffB ticky_ctr + oFFSET_StgEntCounter_allocs)) hp, + -- Bump ALLOC_HEAP_ctr + addToMemLbl cLong (mkRtsDataLabel (sLit "ALLOC_HEAP_ctr")) 1, + -- Bump ALLOC_HEAP_tot + addToMemLbl cLong (mkRtsDataLabel (sLit "ALLOC_HEAP_tot")) hp] } + +-- ----------------------------------------------------------------------------- +-- Ticky utils + +ifTicky :: FCode () -> FCode () +ifTicky code + | opt_DoTickyProfiling = code + | otherwise = nopC + +-- All the ticky-ticky counters are declared "unsigned long" in C +bumpTickyCounter :: LitString -> FCode () +bumpTickyCounter lbl = bumpTickyCounter' (cmmLabelOffB (mkRtsDataLabel lbl) 0) + +bumpTickyCounter' :: CmmLit -> FCode () +-- krc: note that we're incrementing the _entry_count_ field of the ticky counter +bumpTickyCounter' lhs = emit (addToMem cLong (CmmLit lhs) 1) + +bumpHistogram :: LitString -> Int -> FCode () +bumpHistogram lbl n +-- = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLongWidth)) + = return () -- TEMP SPJ Apr 07 + +bumpHistogramE :: LitString -> CmmExpr -> FCode () +bumpHistogramE lbl n + = do t <- newTemp cLong + emit (mkAssign (CmmLocal t) n) + emit (mkCmmIfThen (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight]) + (mkAssign (CmmLocal t) eight)) + emit (addToMem cLong + (cmmIndexExpr cLongWidth + (CmmLit (CmmLabel (mkRtsDataLabel lbl))) + (CmmReg (CmmLocal t))) + 1) + where + eight = CmmLit (CmmInt 8 cLongWidth) + +------------------------------------------------------------------ +-- Showing the "type category" for ticky-ticky profiling + +showTypeCategory :: Type -> Char + {- {C,I,F,D} char, int, float, double + T tuple + S other single-constructor type + {c,i,f,d} unboxed ditto + t *unpacked* tuple + s *unpacked" single-cons... + + v void# + a primitive array + + E enumeration type + + dictionary, unless it's a ... + L List + > function + M other (multi-constructor) data-con type + . other type + - reserved for others to mark as "uninteresting" + -} +showTypeCategory ty + = if isDictTy ty + then '+' + else + case tcSplitTyConApp_maybe ty of + Nothing -> if isJust (tcSplitFunTy_maybe ty) + then '>' + else '.' + + Just (tycon, _) -> + let utc = getUnique tycon in + if utc == charDataConKey then 'C' + else if utc == intDataConKey then 'I' + else if utc == floatDataConKey then 'F' + else if utc == doubleDataConKey then 'D' + else if utc == charPrimTyConKey then 'c' + else if (utc == intPrimTyConKey || utc == wordPrimTyConKey + || utc == addrPrimTyConKey) then 'i' + else if utc == floatPrimTyConKey then 'f' + else if utc == doublePrimTyConKey then 'd' + else if isPrimTyCon tycon {- array, we hope -} then 'A' -- Bogus + else if isEnumerationTyCon tycon then 'E' + else if isTupleTyCon tycon then 'T' + else if isJust (tyConSingleDataCon_maybe tycon) then 'S' + else if utc == listTyConKey then 'L' + else 'M' -- oh, well... diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs new file mode 100644 index 0000000000..6cfca5f05f --- /dev/null +++ b/compiler/codeGen/StgCmmUtils.hs @@ -0,0 +1,902 @@ +----------------------------------------------------------------------------- +-- +-- Code generator utilities; mostly monadic +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmUtils ( + cgLit, mkSimpleLit, + emitDataLits, mkDataLits, + emitRODataLits, mkRODataLits, + emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, + assignTemp, newTemp, withTemp, + + newUnboxedTupleRegs, + + mkMultiAssign, mkCmmSwitch, mkCmmLitSwitch, + emitSwitch, + + tagToClosure, mkTaggedObjectLoad, + + callerSaveVolatileRegs, get_GlobalReg_addr, + + cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord, + cmmUGtWord, + cmmOffsetExprW, cmmOffsetExprB, + cmmRegOffW, cmmRegOffB, + cmmLabelOffW, cmmLabelOffB, + cmmOffsetW, cmmOffsetB, + cmmOffsetLitW, cmmOffsetLitB, + cmmLoadIndexW, + cmmConstrTag, cmmConstrTag1, + + cmmUntag, cmmIsTagged, cmmGetTag, + + addToMem, addToMemE, addToMemLbl, + mkWordCLit, + mkStringCLit, mkByteStringCLit, + packHalfWordsCLit, + blankWord, + + getSRTInfo, clHasCafRefs, srt_escape + ) where + +#include "HsVersions.h" +#include "MachRegs.h" + +import StgCmmMonad +import StgCmmClosure +import BlockId +import Cmm +import CmmExpr +import MkZipCfgCmm +import CLabel +import CmmUtils +import PprCmm ( {- instances -} ) + +import ForeignCall +import IdInfo +import Type +import TyCon +import Constants +import SMRep +import StgSyn ( SRT(..) ) +import Literal +import Digraph +import ListSetOps +import Util +import Unique +import DynFlags +import FastString +import Outputable + +import Data.Char +import Data.Bits +import Data.Word +import Data.Maybe + + +------------------------------------------------------------------------- +-- +-- Literals +-- +------------------------------------------------------------------------- + +cgLit :: Literal -> FCode CmmLit +cgLit (MachStr s) = mkByteStringCLit (bytesFS s) + -- not unpackFS; we want the UTF-8 byte stream. +cgLit other_lit = return (mkSimpleLit other_lit) + +mkSimpleLit :: Literal -> CmmLit +mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordWidth +mkSimpleLit MachNullAddr = zeroCLit +mkSimpleLit (MachInt i) = CmmInt i wordWidth +mkSimpleLit (MachInt64 i) = CmmInt i W64 +mkSimpleLit (MachWord i) = CmmInt i wordWidth +mkSimpleLit (MachWord64 i) = CmmInt i W64 +mkSimpleLit (MachFloat r) = CmmFloat r W32 +mkSimpleLit (MachDouble r) = CmmFloat r W64 +mkSimpleLit (MachLabel fs ms) = CmmLabel (mkForeignLabel fs ms is_dyn) + where + is_dyn = False -- ToDo: fix me +mkSimpleLit other = pprPanic "mkSimpleLit" (ppr other) + +mkLtOp :: Literal -> MachOp +-- On signed literals we must do a signed comparison +mkLtOp (MachInt _) = MO_S_Lt wordWidth +mkLtOp (MachFloat _) = MO_F_Lt W32 +mkLtOp (MachDouble _) = MO_F_Lt W64 +mkLtOp lit = MO_U_Lt (typeWidth (cmmLitType (mkSimpleLit lit))) + -- ToDo: seems terribly indirect! + + +--------------------------------------------------- +-- +-- Cmm data type functions +-- +--------------------------------------------------- + +-- The "B" variants take byte offsets +cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr +cmmRegOffB = cmmRegOff + +cmmOffsetB :: CmmExpr -> ByteOff -> CmmExpr +cmmOffsetB = cmmOffset + +cmmOffsetExprB :: CmmExpr -> CmmExpr -> CmmExpr +cmmOffsetExprB = cmmOffsetExpr + +cmmLabelOffB :: CLabel -> ByteOff -> CmmLit +cmmLabelOffB = cmmLabelOff + +cmmOffsetLitB :: CmmLit -> ByteOff -> CmmLit +cmmOffsetLitB = cmmOffsetLit + +----------------------- +-- The "W" variants take word offsets +cmmOffsetExprW :: CmmExpr -> CmmExpr -> CmmExpr +-- The second arg is a *word* offset; need to change it to bytes +cmmOffsetExprW e (CmmLit (CmmInt n _)) = cmmOffsetW e (fromInteger n) +cmmOffsetExprW e wd_off = cmmIndexExpr wordWidth e wd_off + +cmmOffsetW :: CmmExpr -> WordOff -> CmmExpr +cmmOffsetW e n = cmmOffsetB e (wORD_SIZE * n) + +cmmRegOffW :: CmmReg -> WordOff -> CmmExpr +cmmRegOffW reg wd_off = cmmRegOffB reg (wd_off * wORD_SIZE) + +cmmOffsetLitW :: CmmLit -> WordOff -> CmmLit +cmmOffsetLitW lit wd_off = cmmOffsetLitB lit (wORD_SIZE * wd_off) + +cmmLabelOffW :: CLabel -> WordOff -> CmmLit +cmmLabelOffW lbl wd_off = cmmLabelOffB lbl (wORD_SIZE * wd_off) + +cmmLoadIndexW :: CmmExpr -> Int -> CmmType -> CmmExpr +cmmLoadIndexW base off ty = CmmLoad (cmmOffsetW base off) ty + +----------------------- +cmmULtWord, cmmUGeWord, cmmUGtWord, cmmSubWord, + cmmNeWord, cmmEqWord, cmmOrWord, cmmAndWord + :: CmmExpr -> CmmExpr -> CmmExpr +cmmOrWord e1 e2 = CmmMachOp mo_wordOr [e1, e2] +cmmAndWord e1 e2 = CmmMachOp mo_wordAnd [e1, e2] +cmmNeWord e1 e2 = CmmMachOp mo_wordNe [e1, e2] +cmmEqWord e1 e2 = CmmMachOp mo_wordEq [e1, e2] +cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2] +cmmUGeWord e1 e2 = CmmMachOp mo_wordUGe [e1, e2] +cmmUGtWord e1 e2 = CmmMachOp mo_wordUGt [e1, e2] +--cmmShlWord e1 e2 = CmmMachOp mo_wordShl [e1, e2] +--cmmUShrWord e1 e2 = CmmMachOp mo_wordUShr [e1, e2] +cmmSubWord e1 e2 = CmmMachOp mo_wordSub [e1, e2] + +cmmNegate :: CmmExpr -> CmmExpr +cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) +cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprWidth e)) [e] + +blankWord :: CmmStatic +blankWord = CmmUninitialised wORD_SIZE + +-- Tagging -- +-- Tag bits mask +--cmmTagBits = CmmLit (mkIntCLit tAG_BITS) +cmmTagMask, cmmPointerMask :: CmmExpr +cmmTagMask = CmmLit (mkIntCLit tAG_MASK) +cmmPointerMask = CmmLit (mkIntCLit (complement tAG_MASK)) + +-- Used to untag a possibly tagged pointer +-- A static label need not be untagged +cmmUntag, cmmGetTag :: CmmExpr -> CmmExpr +cmmUntag e@(CmmLit (CmmLabel _)) = e +-- Default case +cmmUntag e = (e `cmmAndWord` cmmPointerMask) + +cmmGetTag e = (e `cmmAndWord` cmmTagMask) + +-- Test if a closure pointer is untagged +cmmIsTagged :: CmmExpr -> CmmExpr +cmmIsTagged e = (e `cmmAndWord` cmmTagMask) + `cmmNeWord` CmmLit zeroCLit + +cmmConstrTag, cmmConstrTag1 :: CmmExpr -> CmmExpr +cmmConstrTag e = (e `cmmAndWord` cmmTagMask) `cmmSubWord` (CmmLit (mkIntCLit 1)) +-- Get constructor tag, but one based. +cmmConstrTag1 e = e `cmmAndWord` cmmTagMask + +----------------------- +-- Making literals + +mkWordCLit :: StgWord -> CmmLit +mkWordCLit wd = CmmInt (fromIntegral wd) wordWidth + +packHalfWordsCLit :: (Integral a, Integral b) => a -> b -> CmmLit +-- Make a single word literal in which the lower_half_word is +-- at the lower address, and the upper_half_word is at the +-- higher address +-- ToDo: consider using half-word lits instead +-- but be careful: that's vulnerable when reversed +packHalfWordsCLit lower_half_word upper_half_word +#ifdef WORDS_BIGENDIAN + = mkWordCLit ((fromIntegral lower_half_word `shiftL` hALF_WORD_SIZE_IN_BITS) + .|. fromIntegral upper_half_word) +#else + = mkWordCLit ((fromIntegral lower_half_word) + .|. (fromIntegral upper_half_word `shiftL` hALF_WORD_SIZE_IN_BITS)) +#endif + +-------------------------------------------------------------------------- +-- +-- Incrementing a memory location +-- +-------------------------------------------------------------------------- + +addToMemLbl :: CmmType -> CLabel -> Int -> CmmAGraph +addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n + +addToMem :: CmmType -- rep of the counter + -> CmmExpr -- Address + -> Int -- What to add (a word) + -> CmmAGraph +addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) (typeWidth rep))) + +addToMemE :: CmmType -- rep of the counter + -> CmmExpr -- Address + -> CmmExpr -- What to add (a word-typed expression) + -> CmmAGraph +addToMemE rep ptr n + = mkStore ptr (CmmMachOp (MO_Add (typeWidth rep)) [CmmLoad ptr rep, n]) + + +------------------------------------------------------------------------- +-- +-- Loading a field from an object, +-- where the object pointer is itself tagged +-- +------------------------------------------------------------------------- + +mkTaggedObjectLoad :: LocalReg -> LocalReg -> WordOff -> DynTag -> CmmAGraph +-- (loadTaggedObjectField reg base off tag) generates assignment +-- reg = bitsK[ base + off - tag ] +-- where K is fixed by 'reg' +mkTaggedObjectLoad reg base offset tag + = mkAssign (CmmLocal reg) + (CmmLoad (cmmOffsetB (CmmReg (CmmLocal base)) + (wORD_SIZE*offset - tag)) + (localRegType reg)) + +------------------------------------------------------------------------- +-- +-- Converting a closure tag to a closure for enumeration types +-- (this is the implementation of tagToEnum#). +-- +------------------------------------------------------------------------- + +tagToClosure :: TyCon -> CmmExpr -> CmmExpr +tagToClosure tycon tag + = CmmLoad (cmmOffsetExprW closure_tbl tag) bWord + where closure_tbl = CmmLit (CmmLabel lbl) + lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs + +------------------------------------------------------------------------- +-- +-- Conditionals and rts calls +-- +------------------------------------------------------------------------- + +emitRtsCall :: LitString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () +emitRtsCall fun args safe = emitRtsCall' [] fun args Nothing safe + -- The 'Nothing' says "save all global registers" + +emitRtsCallWithVols :: LitString -> [(CmmExpr,ForeignHint)] -> [GlobalReg] -> Bool -> FCode () +emitRtsCallWithVols fun args vols safe + = emitRtsCall' [] fun args (Just vols) safe + +emitRtsCallWithResult :: LocalReg -> ForeignHint -> LitString + -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () +emitRtsCallWithResult res hint fun args safe + = emitRtsCall' [(res,hint)] fun args Nothing safe + +-- Make a call to an RTS C procedure +emitRtsCall' + :: [(LocalReg,ForeignHint)] + -> LitString + -> [(CmmExpr,ForeignHint)] + -> Maybe [GlobalReg] + -> Bool -- True <=> CmmSafe call + -> FCode () +emitRtsCall' res fun args _vols safe + = --error "emitRtsCall'" + do { emit caller_save + ; emit call + ; emit caller_load } + where + call = if safe then + mkCall fun_expr CCallConv res' args' undefined + else + mkUnsafeCall (ForeignTarget fun_expr + (ForeignConvention CCallConv arg_hints res_hints)) res' args' + (args', arg_hints) = unzip args + (res', res_hints) = unzip res + (caller_save, caller_load) = callerSaveVolatileRegs + fun_expr = mkLblExpr (mkRtsCodeLabel fun) + + +----------------------------------------------------------------------------- +-- +-- Caller-Save Registers +-- +----------------------------------------------------------------------------- + +-- Here we generate the sequence of saves/restores required around a +-- foreign call instruction. + +-- TODO: reconcile with includes/Regs.h +-- * Regs.h claims that BaseReg should be saved last and loaded first +-- * This might not have been tickled before since BaseReg is callee save +-- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim +callerSaveVolatileRegs :: (CmmAGraph, CmmAGraph) +callerSaveVolatileRegs = (caller_save, caller_load) + where + caller_save = catAGraphs (map callerSaveGlobalReg regs_to_save) + caller_load = catAGraphs (map callerRestoreGlobalReg regs_to_save) + + system_regs = [ Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery + {- ,SparkHd,SparkTl,SparkBase,SparkLim -} + , BaseReg ] + + regs_to_save = filter callerSaves system_regs + + callerSaveGlobalReg reg + = mkStore (get_GlobalReg_addr reg) (CmmReg (CmmGlobal reg)) + + callerRestoreGlobalReg reg + = mkAssign (CmmGlobal reg) + (CmmLoad (get_GlobalReg_addr reg) (globalRegType reg)) + +-- ----------------------------------------------------------------------------- +-- Global registers + +-- We map STG registers onto appropriate CmmExprs. Either they map +-- to real machine registers or stored as offsets from BaseReg. Given +-- a GlobalReg, get_GlobalReg_addr always produces the +-- register table address for it. +-- (See also get_GlobalReg_reg_or_addr in MachRegs) + +get_GlobalReg_addr :: GlobalReg -> CmmExpr +get_GlobalReg_addr BaseReg = regTableOffset 0 +get_GlobalReg_addr mid = get_Regtable_addr_from_offset + (globalRegType mid) (baseRegOffset mid) + +-- Calculate a literal representing an offset into the register table. +-- Used when we don't have an actual BaseReg to offset from. +regTableOffset :: Int -> CmmExpr +regTableOffset n = + CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n)) + +get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr +get_Regtable_addr_from_offset _rep offset = +#ifdef REG_Base + CmmRegOff (CmmGlobal BaseReg) offset +#else + regTableOffset offset +#endif + + +-- | Returns 'True' if this global register is stored in a caller-saves +-- machine register. + +callerSaves :: GlobalReg -> Bool + +#ifdef CALLER_SAVES_Base +callerSaves BaseReg = True +#endif +#ifdef CALLER_SAVES_Sp +callerSaves Sp = True +#endif +#ifdef CALLER_SAVES_SpLim +callerSaves SpLim = True +#endif +#ifdef CALLER_SAVES_Hp +callerSaves Hp = True +#endif +#ifdef CALLER_SAVES_HpLim +callerSaves HpLim = True +#endif +#ifdef CALLER_SAVES_CurrentTSO +callerSaves CurrentTSO = True +#endif +#ifdef CALLER_SAVES_CurrentNursery +callerSaves CurrentNursery = True +#endif +callerSaves _ = False + + +-- ----------------------------------------------------------------------------- +-- Information about global registers + +baseRegOffset :: GlobalReg -> Int + +baseRegOffset Sp = oFFSET_StgRegTable_rSp +baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim +baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1 +baseRegOffset Hp = oFFSET_StgRegTable_rHp +baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim +baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO +baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery +baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc +baseRegOffset GCEnter1 = oFFSET_stgGCEnter1 +baseRegOffset GCFun = oFFSET_stgGCFun +baseRegOffset reg = pprPanic "baseRegOffset:" (ppr reg) + +------------------------------------------------------------------------- +-- +-- Strings generate a top-level data block +-- +------------------------------------------------------------------------- + +emitDataLits :: CLabel -> [CmmLit] -> FCode () +-- Emit a data-segment data block +emitDataLits lbl lits + = emitData Data (CmmDataLabel lbl : map CmmStaticLit lits) + +mkDataLits :: CLabel -> [CmmLit] -> GenCmmTop CmmStatic info stmt +-- Emit a data-segment data block +mkDataLits lbl lits + = CmmData Data (CmmDataLabel lbl : map CmmStaticLit lits) + +emitRODataLits :: CLabel -> [CmmLit] -> FCode () +-- Emit a read-only data block +emitRODataLits lbl lits + = emitData section (CmmDataLabel lbl : map CmmStaticLit lits) + where section | any needsRelocation lits = RelocatableReadOnlyData + | otherwise = ReadOnlyData + needsRelocation (CmmLabel _) = True + needsRelocation (CmmLabelOff _ _) = True + needsRelocation _ = False + +mkRODataLits :: CLabel -> [CmmLit] -> GenCmmTop CmmStatic info stmt +mkRODataLits lbl lits + = CmmData section (CmmDataLabel lbl : map CmmStaticLit lits) + where section | any needsRelocation lits = RelocatableReadOnlyData + | otherwise = ReadOnlyData + needsRelocation (CmmLabel _) = True + needsRelocation (CmmLabelOff _ _) = True + needsRelocation _ = False + +mkStringCLit :: String -> FCode CmmLit +-- Make a global definition for the string, +-- and return its label +mkStringCLit str = mkByteStringCLit (map (fromIntegral . ord) str) + +mkByteStringCLit :: [Word8] -> FCode CmmLit +mkByteStringCLit bytes + = do { uniq <- newUnique + ; let lbl = mkStringLitLabel uniq + ; emitData ReadOnlyData [CmmDataLabel lbl, CmmString bytes] + ; return (CmmLabel lbl) } + +------------------------------------------------------------------------- +-- +-- Assigning expressions to temporaries +-- +------------------------------------------------------------------------- + +assignTemp :: CmmExpr -> FCode LocalReg +-- Make sure the argument is in a local register +assignTemp (CmmReg (CmmLocal reg)) = return reg +assignTemp e = do { uniq <- newUnique + ; let reg = LocalReg uniq (cmmExprType e) + ; emit (mkAssign (CmmLocal reg) e) + ; return reg } + +newTemp :: CmmType -> FCode LocalReg +newTemp rep = do { uniq <- newUnique + ; return (LocalReg uniq rep) } + +newUnboxedTupleRegs :: Type -> FCode ([LocalReg], [ForeignHint]) +-- Choose suitable local regs to use for the components +-- of an unboxed tuple that we are about to return to +-- the Sequel. If the Sequel is a joint point, using the +-- regs it wants will save later assignments. +newUnboxedTupleRegs res_ty + = ASSERT( isUnboxedTupleType res_ty ) + do { sequel <- getSequel + ; regs <- choose_regs sequel + ; ASSERT( regs `equalLength` reps ) + return (regs, map primRepForeignHint reps) } + where + ty_args = tyConAppArgs (repType res_ty) + reps = [ rep + | ty <- ty_args + , let rep = typePrimRep ty + , not (isVoidRep rep) ] + choose_regs (AssignTo regs _) = return regs + choose_regs _other = mapM (newTemp . primRepCmmType) reps + + + +------------------------------------------------------------------------- +-- mkMultiAssign +------------------------------------------------------------------------- + +mkMultiAssign :: [LocalReg] -> [CmmExpr] -> CmmAGraph +-- Emit code to perform the assignments in the +-- input simultaneously, using temporary variables when necessary. + +type Key = Int +type Vrtx = (Key, Stmt) -- Give each vertex a unique number, + -- for fast comparison +type Stmt = (LocalReg, CmmExpr) -- r := e + +-- We use the strongly-connected component algorithm, in which +-- * the vertices are the statements +-- * an edge goes from s1 to s2 iff +-- s1 assigns to something s2 uses +-- that is, if s1 should *follow* s2 in the final order + +mkMultiAssign [] [] = mkNop +mkMultiAssign [reg] [rhs] = mkAssign (CmmLocal reg) rhs +mkMultiAssign regs rhss = ASSERT( equalLength regs rhss ) + unscramble ([1..] `zip` (regs `zip` rhss)) + +unscramble :: [Vrtx] -> CmmAGraph +unscramble vertices + = catAGraphs (map do_component components) + where + edges :: [ (Vrtx, Key, [Key]) ] + edges = [ (vertex, key1, edges_from stmt1) + | vertex@(key1, stmt1) <- vertices ] + + edges_from :: Stmt -> [Key] + edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices, + stmt1 `mustFollow` stmt2 ] + + components :: [SCC Vrtx] + components = stronglyConnCompFromEdgedVertices edges + + -- do_components deal with one strongly-connected component + -- Not cyclic, or singleton? Just do it + do_component :: SCC Vrtx -> CmmAGraph + do_component (AcyclicSCC (_,stmt)) = mk_graph stmt + do_component (CyclicSCC []) = panic "do_component" + do_component (CyclicSCC [(_,stmt)]) = mk_graph stmt + + -- Cyclic? Then go via temporaries. Pick one to + -- break the loop and try again with the rest. + do_component (CyclicSCC ((_,first_stmt) : rest)) + = withUnique $ \u -> + let (to_tmp, from_tmp) = split u first_stmt + in mk_graph to_tmp + <*> unscramble rest + <*> mk_graph from_tmp + + split :: Unique -> Stmt -> (Stmt, Stmt) + split uniq (reg, rhs) + = ((tmp, rhs), (reg, CmmReg (CmmLocal tmp))) + where + rep = cmmExprType rhs + tmp = LocalReg uniq rep + + mk_graph :: Stmt -> CmmAGraph + mk_graph (reg, rhs) = mkAssign (CmmLocal reg) rhs + +mustFollow :: Stmt -> Stmt -> Bool +(reg, _) `mustFollow` (_, rhs) = reg `regUsedIn` rhs + +regUsedIn :: LocalReg -> CmmExpr -> Bool +reg `regUsedIn` CmmLoad e _ = reg `regUsedIn` e +reg `regUsedIn` CmmReg (CmmLocal reg') = reg == reg' +reg `regUsedIn` CmmRegOff (CmmLocal reg') _ = reg == reg' +reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es +_reg `regUsedIn` _other = False -- The CmmGlobal cases + + +------------------------------------------------------------------------- +-- mkSwitch +------------------------------------------------------------------------- + + +emitSwitch :: CmmExpr -- Tag to switch on + -> [(ConTagZ, CmmAGraph)] -- Tagged branches + -> Maybe CmmAGraph -- Default branch (if any) + -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour + -- outside this range is undefined + -> FCode () +emitSwitch tag_expr branches mb_deflt lo_tag hi_tag + = do { dflags <- getDynFlags + ; emit (mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag) } + where + via_C dflags | HscC <- hscTarget dflags = True + | otherwise = False + + +mkCmmSwitch :: Bool -- True <=> never generate a conditional tree + -> CmmExpr -- Tag to switch on + -> [(ConTagZ, CmmAGraph)] -- Tagged branches + -> Maybe CmmAGraph -- Default branch (if any) + -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour + -- outside this range is undefined + -> CmmAGraph + +-- First, two rather common cases in which there is no work to do +mkCmmSwitch _ _ [] (Just code) _ _ = code +mkCmmSwitch _ _ [(_,code)] Nothing _ _ = code + +-- Right, off we go +mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag + = withFreshLabel "switch join" $ \ join_lbl -> + label_default join_lbl mb_deflt $ \ mb_deflt -> + label_branches join_lbl branches $ \ branches -> + assignTemp' tag_expr $ \tag_expr' -> + + mk_switch tag_expr' (sortLe le branches) mb_deflt + lo_tag hi_tag via_C + -- Sort the branches before calling mk_switch + <*> mkLabel join_lbl Nothing + + where + (t1,_) `le` (t2,_) = t1 <= t2 + +mk_switch :: CmmExpr -> [(ConTagZ, BlockId)] + -> Maybe BlockId + -> ConTagZ -> ConTagZ -> Bool + -> CmmAGraph + +-- SINGLETON TAG RANGE: no case analysis to do +mk_switch _tag_expr [(tag, lbl)] _ lo_tag hi_tag _via_C + | lo_tag == hi_tag + = ASSERT( tag == lo_tag ) + mkBranch lbl + +-- SINGLETON BRANCH, NO DEFAULT: no case analysis to do +mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _ + = mkBranch lbl + -- The simplifier might have eliminated a case + -- so we may have e.g. case xs of + -- [] -> e + -- In that situation we can be sure the (:) case + -- can't happen, so no need to test + +-- SINGLETON BRANCH: one equality check to do +mk_switch tag_expr [(tag,lbl)] (Just deflt) _ _ _ + = mkCbranch cond deflt lbl + where + cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag)) + -- We have lo_tag < hi_tag, but there's only one branch, + -- so there must be a default + +-- ToDo: we might want to check for the two branch case, where one of +-- the branches is the tag 0, because comparing '== 0' is likely to be +-- more efficient than other kinds of comparison. + +-- DENSE TAG RANGE: use a switch statment. +-- +-- We also use a switch uncoditionally when compiling via C, because +-- this will get emitted as a C switch statement and the C compiler +-- should do a good job of optimising it. Also, older GCC versions +-- (2.95 in particular) have problems compiling the complicated +-- if-trees generated by this code, so compiling to a switch every +-- time works around that problem. +-- +mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C + | use_switch -- Use a switch + = let + find_branch :: ConTagZ -> Maybe BlockId + find_branch i = case (assocMaybe branches i) of + Just lbl -> Just lbl + Nothing -> mb_deflt + + -- NB. we have eliminated impossible branches at + -- either end of the range (see below), so the first + -- tag of a real branch is real_lo_tag (not lo_tag). + arms :: [Maybe BlockId] + arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]] + in + mkSwitch (cmmOffset tag_expr (- real_lo_tag)) arms + + -- if we can knock off a bunch of default cases with one if, then do so + | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches + = mkCmmIfThenElse + (cmmULtWord tag_expr (CmmLit (mkIntCLit lowest_branch))) + (mkBranch deflt) + (mk_switch tag_expr branches mb_deflt + lowest_branch hi_tag via_C) + + | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches + = mkCmmIfThenElse + (cmmUGtWord tag_expr (CmmLit (mkIntCLit highest_branch))) + (mk_switch tag_expr branches mb_deflt + lo_tag highest_branch via_C) + (mkBranch deflt) + + | otherwise -- Use an if-tree + = mkCmmIfThenElse + (cmmUGeWord tag_expr (CmmLit (mkIntCLit mid_tag))) + (mk_switch tag_expr hi_branches mb_deflt + mid_tag hi_tag via_C) + (mk_switch tag_expr lo_branches mb_deflt + lo_tag (mid_tag-1) via_C) + -- we test (e >= mid_tag) rather than (e < mid_tag), because + -- the former works better when e is a comparison, and there + -- are two tags 0 & 1 (mid_tag == 1). In this case, the code + -- generator can reduce the condition to e itself without + -- having to reverse the sense of the comparison: comparisons + -- can't always be easily reversed (eg. floating + -- pt. comparisons). + where + use_switch = {- pprTrace "mk_switch" ( + ppr tag_expr <+> text "n_tags:" <+> int n_tags <+> + text "branches:" <+> ppr (map fst branches) <+> + text "n_branches:" <+> int n_branches <+> + text "lo_tag:" <+> int lo_tag <+> + text "hi_tag:" <+> int hi_tag <+> + text "real_lo_tag:" <+> int real_lo_tag <+> + text "real_hi_tag:" <+> int real_hi_tag) $ -} + ASSERT( n_branches > 1 && n_tags > 1 ) + n_tags > 2 && (via_C || (dense && big_enough)) + -- up to 4 branches we use a decision tree, otherwise + -- a switch (== jump table in the NCG). This seems to be + -- optimal, and corresponds with what gcc does. + big_enough = n_branches > 4 + dense = n_branches > (n_tags `div` 2) + n_branches = length branches + + -- ignore default slots at each end of the range if there's + -- no default branch defined. + lowest_branch = fst (head branches) + highest_branch = fst (last branches) + + real_lo_tag + | isNothing mb_deflt = lowest_branch + | otherwise = lo_tag + + real_hi_tag + | isNothing mb_deflt = highest_branch + | otherwise = hi_tag + + n_tags = real_hi_tag - real_lo_tag + 1 + + -- INVARIANT: Provided hi_tag > lo_tag (which is true) + -- lo_tag <= mid_tag < hi_tag + -- lo_branches have tags < mid_tag + -- hi_branches have tags >= mid_tag + + (mid_tag,_) = branches !! (n_branches `div` 2) + -- 2 branches => n_branches `div` 2 = 1 + -- => branches !! 1 give the *second* tag + -- There are always at least 2 branches here + + (lo_branches, hi_branches) = span is_lo branches + is_lo (t,_) = t < mid_tag + +-------------- +mkCmmLitSwitch :: CmmExpr -- Tag to switch on + -> [(Literal, CmmAGraph)] -- Tagged branches + -> CmmAGraph -- Default branch (always) + -> CmmAGraph -- Emit the code +-- Used for general literals, whose size might not be a word, +-- where there is always a default case, and where we don't know +-- the range of values for certain. For simplicity we always generate a tree. +-- +-- ToDo: for integers we could do better here, perhaps by generalising +-- mk_switch and using that. --SDM 15/09/2004 +mkCmmLitSwitch _scrut [] deflt = deflt +mkCmmLitSwitch scrut branches deflt + = assignTemp' scrut $ \ scrut' -> + withFreshLabel "switch join" $ \ join_lbl -> + label_code join_lbl deflt $ \ deflt -> + label_branches join_lbl branches $ \ branches -> + mk_lit_switch scrut' deflt (sortLe le branches) + where + le (t1,_) (t2,_) = t1 <= t2 + +mk_lit_switch :: CmmExpr -> BlockId + -> [(Literal,BlockId)] + -> CmmAGraph +mk_lit_switch scrut deflt [(lit,blk)] + = mkCbranch + (CmmMachOp (MO_Ne rep) [scrut, CmmLit cmm_lit]) + deflt blk + where + cmm_lit = mkSimpleLit lit + rep = typeWidth (cmmLitType cmm_lit) + +mk_lit_switch scrut deflt_blk_id branches + = mkCmmIfThenElse cond + (mk_lit_switch scrut deflt_blk_id lo_branches) + (mk_lit_switch scrut deflt_blk_id hi_branches) + where + n_branches = length branches + (mid_lit,_) = branches !! (n_branches `div` 2) + -- See notes above re mid_tag + + (lo_branches, hi_branches) = span is_lo branches + is_lo (t,_) = t < mid_lit + + cond = CmmMachOp (mkLtOp mid_lit) + [scrut, CmmLit (mkSimpleLit mid_lit)] + + +-------------- +label_default :: BlockId -> Maybe CmmAGraph + -> (Maybe BlockId -> CmmAGraph) + -> CmmAGraph +label_default _ Nothing thing_inside + = thing_inside Nothing +label_default join_lbl (Just code) thing_inside + = label_code join_lbl code $ \ lbl -> + thing_inside (Just lbl) + +-------------- +label_branches :: BlockId -> [(a,CmmAGraph)] + -> ([(a,BlockId)] -> CmmAGraph) + -> CmmAGraph +label_branches _join_lbl [] thing_inside + = thing_inside [] +label_branches join_lbl ((tag,code):branches) thing_inside + = label_code join_lbl code $ \ lbl -> + label_branches join_lbl branches $ \ branches' -> + thing_inside ((tag,lbl):branches') + +-------------- +label_code :: BlockId -> CmmAGraph -> (BlockId -> CmmAGraph) -> CmmAGraph +-- (label_code J code fun) +-- generates +-- [L: code; goto J] fun L +label_code join_lbl code thing_inside + = withFreshLabel "switch" $ \lbl -> + outOfLine (mkLabel lbl Nothing <*> code <*> mkBranch join_lbl) + <*> thing_inside lbl + + +-------------- +assignTemp' :: CmmExpr -> (CmmExpr -> CmmAGraph) -> CmmAGraph +assignTemp' e thing_inside + | isTrivialCmmExpr e = thing_inside e + | otherwise = withTemp (cmmExprType e) $ \ lreg -> + let reg = CmmLocal lreg in + mkAssign reg e <*> thing_inside (CmmReg reg) + +withTemp :: CmmType -> (LocalReg -> CmmAGraph) -> CmmAGraph +withTemp rep thing_inside + = withUnique $ \uniq -> thing_inside (LocalReg uniq rep) + + +------------------------------------------------------------------------- +-- +-- Static Reference Tables +-- +------------------------------------------------------------------------- + +-- There is just one SRT for each top level binding; all the nested +-- bindings use sub-sections of this SRT. The label is passed down to +-- the nested bindings via the monad. + +getSRTInfo :: SRT -> FCode C_SRT +getSRTInfo (SRTEntries {}) = panic "getSRTInfo" + +getSRTInfo (SRT off len bmp) + | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape] + = do { id <- newUnique + ; top_srt <- getSRTLabel + ; let srt_desc_lbl = mkLargeSRTLabel id + ; emitRODataLits srt_desc_lbl + ( cmmLabelOffW top_srt off + : mkWordCLit (fromIntegral len) + : map mkWordCLit bmp) + ; return (C_SRT srt_desc_lbl 0 srt_escape) } + + | otherwise + = do { top_srt <- getSRTLabel + ; return (C_SRT top_srt off (fromIntegral (head bmp))) } + -- The fromIntegral converts to StgHalfWord + +getSRTInfo NoSRT + = -- TODO: Should we panic in this case? + -- Someone obviously thinks there should be an SRT + return NoC_SRT + + +srt_escape :: StgHalfWord +srt_escape = -1 |