diff options
author | dias@eecs.harvard.edu <unknown> | 2008-08-14 12:40:27 +0000 |
---|---|---|
committer | dias@eecs.harvard.edu <unknown> | 2008-08-14 12:40:27 +0000 |
commit | 176fa33f17dd78355cc572e006d2ab26898e2c69 (patch) | |
tree | 54f951a515eac57626f8f15d57f7bc75f1096a7a /compiler/codeGen | |
parent | e06951a75a1f519e8f015880c363a8dedc08ff9c (diff) | |
download | haskell-176fa33f17dd78355cc572e006d2ab26898e2c69.tar.gz |
Merging in the new codegen branch
This merge does not turn on the new codegen (which only compiles
a select few programs at this point),
but it does introduce some changes to the old code generator.
The high bits:
1. The Rep Swamp patch is finally here.
The highlight is that the representation of types at the
machine level has changed.
Consequently, this patch contains updates across several back ends.
2. The new Stg -> Cmm path is here, although it appears to have a
fair number of bugs lurking.
3. Many improvements along the CmmCPSZ path, including:
o stack layout
o some code for infotables, half of which is right and half wrong
o proc-point splitting
Diffstat (limited to 'compiler/codeGen')
37 files changed, 8254 insertions, 496 deletions
diff --git a/compiler/codeGen/CgBindery.lhs b/compiler/codeGen/CgBindery.lhs index 66776930c5..1928308a31 100644 --- a/compiler/codeGen/CgBindery.lhs +++ b/compiler/codeGen/CgBindery.lhs @@ -286,7 +286,7 @@ getCgIdInfo id name = idName id in if isExternalName name then do - let ext_lbl = CmmLit (CmmLabel (mkClosureLabel name)) + let ext_lbl = CmmLit (CmmLabel (mkClosureLabel name $ idCafInfo id)) return (stableIdInfo id ext_lbl (mkLFImported id)) else if isVoidArg (idCgRep id) then @@ -447,10 +447,7 @@ bindNewToTemp id return temp_reg where uniq = getUnique id - temp_reg = LocalReg uniq (argMachRep (idCgRep id)) kind - kind = if isFollowableArg (idCgRep id) - then GCKindPtr - else GCKindNonPtr + temp_reg = LocalReg uniq (argMachRep (idCgRep id)) lf_info = mkLFArgument id -- Always used of things we -- know nothing about diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs index 752769f4e3..87c69b6331 100644 --- a/compiler/codeGen/CgCallConv.hs +++ b/compiler/codeGen/CgCallConv.hs @@ -39,7 +39,6 @@ import CgUtils import CgMonad import SMRep -import MachOp import Cmm import CLabel @@ -149,7 +148,7 @@ mkLiveness :: Name -> Int -> Bitmap -> FCode Liveness mkLiveness name size bits | size > mAX_SMALL_BITMAP_SIZE -- Bitmap does not fit in one word = do { let lbl = mkBitmapLabel (getUnique name) - ; emitRODataLits lbl ( mkWordCLit (fromIntegral size) + ; emitRODataLits "mkLiveness" lbl ( mkWordCLit (fromIntegral size) : map mkWordCLit bits) ; return (BigLiveness lbl) } @@ -196,7 +195,7 @@ mkRegLiveness regs ptrs nptrs all_non_ptrs = 0xff reg_bits [] = 0 - reg_bits ((id, VanillaReg i) : regs) | isFollowableArg (idCgRep id) + reg_bits ((id, VanillaReg i _) : regs) | isFollowableArg (idCgRep id) = (1 `shiftL` (i - 1)) .|. reg_bits regs reg_bits (_ : regs) = reg_bits regs @@ -264,8 +263,8 @@ slowCallPattern _ = panic "CgStackery.slowCallPattern" ------------------------------------------------------------------------- dataReturnConvPrim :: CgRep -> CmmReg -dataReturnConvPrim PtrArg = CmmGlobal (VanillaReg 1) -dataReturnConvPrim NonPtrArg = CmmGlobal (VanillaReg 1) +dataReturnConvPrim PtrArg = CmmGlobal (VanillaReg 1 VGcPtr) +dataReturnConvPrim NonPtrArg = CmmGlobal (VanillaReg 1 VNonGcPtr) dataReturnConvPrim LongArg = CmmGlobal (LongReg 1) dataReturnConvPrim FloatArg = CmmGlobal (FloatReg 1) dataReturnConvPrim DoubleArg = CmmGlobal (DoubleReg 1) @@ -288,7 +287,7 @@ getSequelAmode = do { EndOfBlockInfo virt_sp sequel <- getEndOfBlockInfo ; case sequel of OnStack -> do { sp_rel <- getSpRelOffset virt_sp - ; returnFC (CmmLoad sp_rel wordRep) } + ; returnFC (CmmLoad sp_rel bWord) } UpdateCode -> returnFC (CmmLit (CmmLabel mkUpdInfoLabel)) CaseAlts lbl _ _ -> returnFC (CmmLit (CmmLabel lbl)) @@ -361,7 +360,7 @@ assign_regs args supply where go [] acc supply = (acc, []) -- Return the results reversed (doesn't matter) go ((VoidArg,_) : args) acc supply -- Skip void arguments; they aren't passed, and - = go args acc supply -- there's nothign to bind them to + = go args acc supply -- there's nothing to bind them to go ((rep,arg) : args) acc supply = case assign_reg rep supply of Just (reg, supply') -> go args ((arg,reg):acc) supply' @@ -370,9 +369,9 @@ assign_regs args supply assign_reg :: CgRep -> AvailRegs -> Maybe (GlobalReg, AvailRegs) assign_reg FloatArg (vs, f:fs, ds, ls) = Just (FloatReg f, (vs, fs, ds, ls)) assign_reg DoubleArg (vs, fs, d:ds, ls) = Just (DoubleReg d, (vs, fs, ds, ls)) -assign_reg LongArg (vs, fs, ds, l:ls) = Just (LongReg l, (vs, fs, ds, ls)) -assign_reg PtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls)) -assign_reg NonPtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls)) +assign_reg LongArg (vs, fs, ds, l:ls) = pprTrace "longArg" (ppr l) $ Just (LongReg l, (vs, fs, ds, ls)) +assign_reg PtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v VGcPtr, (vs, fs, ds, ls)) +assign_reg NonPtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v VNonGcPtr, (vs, fs, ds, ls)) -- PtrArg and NonPtrArg both go in a vanilla register assign_reg other not_enough_regs = Nothing @@ -430,11 +429,11 @@ mkRegTbl_allRegs regs_in_use mkRegTbl' regs_in_use vanillas floats doubles longs = (ok_vanilla, ok_float, ok_double, ok_long) where - ok_vanilla = mapCatMaybes (select VanillaReg) vanillas + ok_vanilla = mapCatMaybes (select (\i -> VanillaReg i VNonGcPtr)) vanillas + -- ptrhood isn't looked at, hence we can use any old rep. ok_float = mapCatMaybes (select FloatReg) floats ok_double = mapCatMaybes (select DoubleReg) doubles ok_long = mapCatMaybes (select LongReg) longs - -- rep isn't looked at, hence we can use any old rep. select :: (Int -> GlobalReg) -> Int{-cand-} -> Maybe Int -- one we've unboxed the Int, we make a GlobalReg diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs index 49c782e12a..859b2208fe 100644 --- a/compiler/codeGen/CgCase.lhs +++ b/compiler/codeGen/CgCase.lhs @@ -36,7 +36,6 @@ import ClosureInfo import SMRep import CmmUtils import Cmm -import MachOp import StgSyn import StaticFlags @@ -164,8 +163,8 @@ cgCase (StgOpApp op@(StgFCallOp fcall _) args _) do -- *must* be an unboxed tuple alt. -- exactly like the cgInlinePrimOp case for unboxed tuple alts.. { res_tmps <- mapFCs bindNewToTemp non_void_res_ids - ; let res_hints = map (typeHint.idType) non_void_res_ids - ; cgForeignCall (zipWith CmmKinded res_tmps res_hints) fcall args live_in_alts + ; let res_hints = map (typeForeignHint.idType) non_void_res_ids + ; cgForeignCall (zipWith CmmHinted res_tmps res_hints) fcall args live_in_alts ; cgExpr rhs } where (_, res_ids, _, rhs) = head alts @@ -340,7 +339,7 @@ cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts (_,e) <- getArgAmode arg return e do_enum_primop primop - = do tmp <- newNonPtrTemp wordRep + = do tmp <- newTemp bWord cgPrimOp [tmp] primop args live_in_alts returnFC (CmmReg (CmmLocal tmp)) @@ -612,6 +611,6 @@ restoreCurrentCostCentre Nothing _freeit = nopC restoreCurrentCostCentre (Just slot) freeit = do { sp_rel <- getSpRelOffset slot ; whenC freeit (freeStackSlots [slot]) - ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel wordRep)) } + ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel bWord)) } \end{code} diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs index 80949e7513..b7f9f3b7dc 100644 --- a/compiler/codeGen/CgClosure.lhs +++ b/compiler/codeGen/CgClosure.lhs @@ -38,7 +38,6 @@ import CgCallConv import CgUtils import ClosureInfo import SMRep -import MachOp import Cmm import CmmUtils import CLabel @@ -85,7 +84,7 @@ cgTopRhsClosure id ccs binder_info upd_flag args body = do ; mod_name <- getModuleName ; let descr = closureDescription mod_name name closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr - closure_label = mkLocalClosureLabel name + closure_label = mkLocalClosureLabel name $ idCafInfo id cg_id_info = stableIdInfo id (mkLblExpr closure_label) lf_info closure_rep = mkStaticClosureFields closure_info ccs True [] @@ -259,6 +258,7 @@ closureCodeBody binder_info cl_info cc [{- No args i.e. thunk -}] body = do -- in update frame CAF/DICT functions will be -- subsumed by this enclosing cc { enterCostCentre cl_info cc body + ; stmtsC [CmmComment $ mkFastString $ showSDoc $ ppr body] ; cgExpr body } } @@ -282,7 +282,7 @@ closureCodeBody binder_info cl_info cc args body (sp_top, stk_args) = mkVirtStkOffsets vSp other_args -- Allocate the global ticky counter - ; let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info) + ; let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info) (clHasCafRefs cl_info) ; emitTickyCounter cl_info args sp_top -- ...and establish the ticky-counter @@ -355,7 +355,8 @@ mkSlowEntryCode cl_info reg_args | otherwise = return noStmts where name = closureName cl_info - slow_lbl = mkSlowEntryLabel name + has_caf_refs = clHasCafRefs cl_info + slow_lbl = mkSlowEntryLabel name has_caf_refs load_stmts = mkStmts load_assts `plusStmts` mkStmts [stk_adj_pop, jump_to_entry] save_stmts = oneStmt stk_adj_push `plusStmts` mkStmts save_assts @@ -372,13 +373,13 @@ mkSlowEntryCode cl_info reg_args (argMachRep rep)) save_assts = zipWithEqual "mk_save" mk_save reps_w_regs stk_offsets - mk_save (rep,reg) offset = ASSERT( argMachRep rep == globalRegRep reg ) + mk_save (rep,reg) offset = ASSERT( argMachRep rep `cmmEqType` globalRegType reg ) CmmStore (cmmRegOffW spReg offset) (CmmReg (CmmGlobal reg)) stk_adj_pop = CmmAssign spReg (cmmRegOffW spReg final_stk_offset) stk_adj_push = CmmAssign spReg (cmmRegOffW spReg (- final_stk_offset)) - jump_to_entry = CmmJump (mkLblExpr (enterLocalIdLabel name)) [] + jump_to_entry = CmmJump (mkLblExpr (enterLocalIdLabel name has_caf_refs)) [] \end{code} @@ -565,7 +566,7 @@ link_caf cl_info is_upd = do -- so that the garbage collector can find them -- This must be done *before* the info table pointer is overwritten, -- because the old info table ptr is needed for reversion - ; emitRtsCallWithVols (sLit "newCAF") [CmmKinded (CmmReg nodeReg) PtrHint] [node] False + ; emitRtsCallWithVols (sLit "newCAF") [CmmHinted (CmmReg nodeReg) AddrHint] [node] False -- node is live, so save it. -- Overwrite the closure with a (static) indirection diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs index ff012ef4cf..b22e56f70c 100644 --- a/compiler/codeGen/CgCon.lhs +++ b/compiler/codeGen/CgCon.lhs @@ -47,6 +47,7 @@ import Constants import TyCon import DataCon import Id +import IdInfo import Type import PrelInfo import Outputable @@ -82,7 +83,7 @@ cgTopRhsCon id con args ; let name = idName id lf_info = mkConLFInfo con - closure_label = mkClosureLabel name + closure_label = mkClosureLabel name $ idCafInfo id caffy = any stgArgHasCafRefs args (closure_info, amodes_w_offsets) = layOutStaticConstr con amodes closure_rep = mkStaticClosureFields @@ -142,7 +143,8 @@ at all. \begin{code} buildDynCon binder cc con [] = returnFC (taggedStableIdInfo binder - (mkLblExpr (mkClosureLabel (dataConName con))) + (mkLblExpr (mkClosureLabel (dataConName con) + (idCafInfo binder))) (mkConLFInfo con) con) \end{code} @@ -174,7 +176,7 @@ buildDynCon binder cc con [arg_amode] , (_, CmmLit (CmmInt val _)) <- arg_amode , let val_int = (fromIntegral val) :: Int , val_int <= mAX_INTLIKE && val_int >= mIN_INTLIKE - = do { let intlike_lbl = mkRtsDataLabel (sLit "stg_INTLIKE_closure") + = do { let intlike_lbl = mkRtsGcPtrLabel (sLit "stg_INTLIKE_closure") offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1) -- INTLIKE closures consist of a header and one word payload intlike_amode = CmmLit (cmmLabelOffW intlike_lbl offsetW) @@ -185,7 +187,7 @@ buildDynCon binder cc con [arg_amode] , (_, CmmLit (CmmInt val _)) <- arg_amode , let val_int = (fromIntegral val) :: Int , val_int <= mAX_CHARLIKE && val_int >= mIN_CHARLIKE - = do { let charlike_lbl = mkRtsDataLabel (sLit "stg_CHARLIKE_closure") + = do { let charlike_lbl = mkRtsGcPtrLabel (sLit "stg_CHARLIKE_closure") offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1) -- CHARLIKE closures consist of a header and one word payload charlike_amode = CmmLit (cmmLabelOffW charlike_lbl offsetW) @@ -401,9 +403,8 @@ cgTyCon tycon -- code appears to put it before --- NR 16 Aug 2007 ; extra <- if isEnumerationTyCon tycon then do - tbl <- getCmm (emitRODataLits (mkLocalClosureTableLabel - (tyConName tycon)) - [ CmmLabelOff (mkLocalClosureLabel (dataConName con)) (tagForCon con) + tbl <- getCmm (emitRODataLits "cgTyCon" (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs) + [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs) (tagForCon con) | con <- tyConDataCons tycon]) return [tbl] else diff --git a/compiler/codeGen/CgExpr.lhs b/compiler/codeGen/CgExpr.lhs index f22071e2c5..3b75267385 100644 --- a/compiler/codeGen/CgExpr.lhs +++ b/compiler/codeGen/CgExpr.lhs @@ -37,7 +37,7 @@ import CgHpc import CgUtils import ClosureInfo import Cmm -import MachOp +import CmmUtils import VarSet import Literal import PrimOp @@ -48,6 +48,7 @@ import Maybes import ListSetOps import BasicTypes import Util +import FastString import Outputable \end{code} @@ -128,18 +129,15 @@ cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, nonVoidArg rep] - arg_tmps <- sequence [ - if isFollowableArg (typeCgRep (stgArgType stg_arg)) - then assignPtrTemp arg - else assignNonPtrTemp arg - | (arg, stg_arg) <- arg_exprs] - let arg_hints = zipWith CmmKinded arg_tmps (map (typeHint.stgArgType) stg_args) + arg_tmps <- sequence [ assignTemp arg + | (arg, stg_arg) <- arg_exprs] + let arg_hints = zipWith CmmHinted arg_tmps (map (typeForeignHint.stgArgType) stg_args) {- Now, allocate some result regs. -} (res_reps,res_regs,res_hints) <- newUnboxedTupleRegs res_ty ccallReturnUnboxedTuple (zip res_reps (map (CmmReg . CmmLocal) res_regs)) $ - emitForeignCall (zipWith CmmKinded res_regs res_hints) fcall + emitForeignCall (zipWith CmmHinted res_regs res_hints) fcall arg_hints emptyVarSet{-no live vars-} -- tagToEnum# is special: we need to pull the constructor out of the table, @@ -148,10 +146,7 @@ cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do cgExpr (StgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty) = ASSERT(isEnumerationTyCon tycon) do { (rep,amode) <- getArgAmode arg - ; amode' <- if isFollowableArg rep - then assignPtrTemp amode - else assignNonPtrTemp amode - -- We're going to use it twice, + ; amode' <- assignTemp amode -- We're going to use it twice, -- so save in a temp if non-trivial ; stmtC (CmmAssign nodeReg (tagToClosure tycon amode')) ; performReturn emitReturnInstr } @@ -173,9 +168,7 @@ cgExpr x@(StgOpApp op@(StgPrimOp primop) args res_ty) performReturn emitReturnInstr | ReturnsPrim rep <- result_info - = do res <- if isFollowableArg (typeCgRep res_ty) - then newPtrTemp (argMachRep (typeCgRep res_ty)) - else newNonPtrTemp (argMachRep (typeCgRep res_ty)) + = do res <- newTemp (typeCmmType res_ty) cgPrimOp [res] primop args emptyVarSet performPrimReturn (primRepToCgRep rep) (CmmReg (CmmLocal res)) @@ -186,9 +179,7 @@ cgExpr x@(StgOpApp op@(StgPrimOp primop) args res_ty) | ReturnsAlg tycon <- result_info, isEnumerationTyCon tycon -- c.f. cgExpr (...TagToEnumOp...) - = do tag_reg <- if isFollowableArg (typeCgRep res_ty) - then newPtrTemp wordRep - else newNonPtrTemp wordRep + = do tag_reg <- newTemp bWord -- The tag is a word cgPrimOp [tag_reg] primop args emptyVarSet stmtC (CmmAssign nodeReg (tagToClosure tycon @@ -455,16 +446,14 @@ cgLetNoEscapeRhs full_live_in_rhss rhs_eob_info maybe_cc_slot rec binder Little helper for primitives that return unboxed tuples. \begin{code} -newUnboxedTupleRegs :: Type -> FCode ([CgRep], [LocalReg], [MachHint]) +newUnboxedTupleRegs :: Type -> FCode ([CgRep], [LocalReg], [ForeignHint]) newUnboxedTupleRegs res_ty = let ty_args = tyConAppArgs (repType res_ty) - (reps,hints) = unzip [ (rep, typeHint ty) | ty <- ty_args, + (reps,hints) = unzip [ (rep, typeForeignHint ty) | ty <- ty_args, let rep = typeCgRep ty, nonVoidArg rep ] - make_new_temp rep = if isFollowableArg rep - then newPtrTemp (argMachRep rep) - else newNonPtrTemp (argMachRep rep) + make_new_temp rep = newTemp (argMachRep rep) in do regs <- mapM make_new_temp reps return (reps,regs,hints) diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs index b3d779e182..6e338061b4 100644 --- a/compiler/codeGen/CgForeignCall.hs +++ b/compiler/codeGen/CgForeignCall.hs @@ -34,7 +34,6 @@ import TysPrim import CLabel import Cmm import CmmUtils -import MachOp import SMRep import ForeignCall import ClosureInfo @@ -49,7 +48,7 @@ import Control.Monad -- Code generation for Foreign Calls cgForeignCall - :: CmmFormals -- where to put the results + :: HintedCmmFormals -- where to put the results -> ForeignCall -- the op -> [StgArg] -- arguments -> StgLiveVars -- live vars, in case we need to save them @@ -63,16 +62,16 @@ cgForeignCall results fcall stg_args live | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, nonVoidArg rep] - arg_hints = zipWith CmmKinded - arg_exprs (map (typeHint.stgArgType) stg_args) + arg_hints = zipWith CmmHinted + arg_exprs (map (typeForeignHint.stgArgType) stg_args) -- in emitForeignCall results fcall arg_hints live emitForeignCall - :: CmmFormals -- where to put the results + :: HintedCmmFormals -- where to put the results -> ForeignCall -- the op - -> [CmmKinded CmmExpr] -- arguments + -> [CmmHinted CmmExpr] -- arguments -> StgLiveVars -- live vars, in case we need to save them -> Code @@ -86,18 +85,18 @@ emitForeignCall results (CCall (CCallSpec target cconv safety)) args live = case target of StaticTarget lbl -> (args, CmmLit (CmmLabel (mkForeignLabel lbl call_size False))) - DynamicTarget -> case args of (CmmKinded fn _):rest -> (rest, fn) + DynamicTarget -> case args of (CmmHinted fn _):rest -> (rest, fn) -- in the stdcall calling convention, the symbol needs @size appended -- to it, where size is the total number of bytes of arguments. We -- attach this info to the CLabel here, and the CLabel pretty printer -- will generate the suffix when the label is printed. call_size - | StdCallConv <- cconv = Just (sum (map (arg_size.cmmExprRep.kindlessCmm) args)) + | StdCallConv <- cconv = Just (sum (map (arg_size.cmmExprType.hintlessCmm) args)) | otherwise = Nothing -- ToDo: this might not be correct for 64-bit API - arg_size rep = max (machRepByteWidth rep) wORD_SIZE + arg_size rep = max (widthInBytes (typeWidth rep)) wORD_SIZE emitForeignCall _ (DNCall _) _ _ = panic "emitForeignCall: DNCall" @@ -106,9 +105,9 @@ emitForeignCall _ (DNCall _) _ _ -- alternative entry point, used by CmmParse emitForeignCall' :: Safety - -> CmmFormals -- where to put the results + -> HintedCmmFormals -- where to put the results -> CmmCallTarget -- the op - -> [CmmKinded CmmExpr] -- arguments + -> [CmmHinted CmmExpr] -- arguments -> Maybe [GlobalReg] -- live vars, in case we need to save them -> C_SRT -- the SRT of the calls continuation -> CmmReturnInfo @@ -124,8 +123,8 @@ emitForeignCall' safety results target args vols srt ret | otherwise = do -- Both 'id' and 'new_base' are GCKindNonPtr because they're -- RTS only objects and are not subject to garbage collection - id <- newNonPtrTemp wordRep - new_base <- newNonPtrTemp (cmmRegRep (CmmGlobal BaseReg)) + id <- newTemp bWord + new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) temp_args <- load_args_into_temps args temp_target <- load_target_into_temp target let (caller_save, caller_load) = callerSaveVolatileRegs vols @@ -134,16 +133,16 @@ emitForeignCall' safety results target args vols srt ret -- The CmmUnsafe arguments are only correct because this part -- of the code hasn't been moved into the CPS pass yet. -- Once that happens, this function will just emit a (CmmSafe srt) call, - -- and the CPS will will be the one to convert that + -- and the CPS will be the one to convert that -- to this sequence of three CmmUnsafe calls. stmtC (CmmCall (CmmCallee suspendThread CCallConv) - [ CmmKinded id PtrHint ] - [ CmmKinded (CmmReg (CmmGlobal BaseReg)) PtrHint ] + [ CmmHinted id AddrHint ] + [ CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint ] CmmUnsafe ret) stmtC (CmmCall temp_target results temp_args CmmUnsafe ret) stmtC (CmmCall (CmmCallee resumeThread CCallConv) - [ CmmKinded new_base PtrHint ] - [ CmmKinded (CmmReg (CmmLocal id)) PtrHint ] + [ CmmHinted new_base AddrHint ] + [ CmmHinted (CmmReg (CmmLocal id)) AddrHint ] CmmUnsafe ret) -- Assign the result to BaseReg: we -- might now have a different Capability! @@ -163,9 +162,9 @@ resumeThread = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "resumeThread"))) -- This is a HACK; really it should be done in the back end, but -- it's easier to generate the temporaries here. load_args_into_temps = mapM arg_assign_temp - where arg_assign_temp (CmmKinded e hint) = do + where arg_assign_temp (CmmHinted e hint) = do tmp <- maybe_assign_temp e - return (CmmKinded tmp hint) + return (CmmHinted tmp hint) load_target_into_temp (CmmCallee expr conv) = do tmp <- maybe_assign_temp expr @@ -179,7 +178,7 @@ maybe_assign_temp e -- don't use assignTemp, it uses its own notion of "trivial" -- expressions, which are wrong here. -- this is a NonPtr because it only duplicates an existing - reg <- newNonPtrTemp (cmmExprRep e) --TODO FIXME NOW + reg <- newTemp (cmmExprType e) --TODO FIXME NOW stmtC (CmmAssign (CmmLocal reg) e) return (CmmReg (CmmLocal reg)) @@ -201,13 +200,13 @@ emitSaveThreadState = do emitCloseNursery = stmtC $ CmmStore nursery_bdescr_free (cmmOffsetW stgHp 1) emitLoadThreadState = do - tso <- newNonPtrTemp wordRep -- TODO FIXME NOW + tso <- newTemp bWord -- TODO FIXME NOW stmtsC [ -- tso = CurrentTSO; CmmAssign (CmmLocal tso) stgCurrentTSO, -- Sp = tso->sp; CmmAssign sp (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_SP) - wordRep), + bWord), -- SpLim = tso->stack + RESERVED_STACK_WORDS; CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg (CmmLocal tso)) tso_STACK) rESERVED_STACK_WORDS) @@ -216,21 +215,21 @@ emitLoadThreadState = do -- and load the current cost centre stack from the TSO when profiling: when opt_SccProfilingOn $ stmtC (CmmStore curCCSAddr - (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) wordRep)) + (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord)) emitOpenNursery = stmtsC [ -- Hp = CurrentNursery->free - 1; - CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free wordRep) (-1)), + CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free gcWord) (-1)), -- HpLim = CurrentNursery->start + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; CmmAssign hpLim (cmmOffsetExpr - (CmmLoad nursery_bdescr_start wordRep) + (CmmLoad nursery_bdescr_start bWord) (cmmOffset (CmmMachOp mo_wordMul [ - CmmMachOp (MO_S_Conv I32 wordRep) - [CmmLoad nursery_bdescr_blocks I32], + CmmMachOp (MO_SS_Conv W32 wordWidth) + [CmmLoad nursery_bdescr_blocks b32], CmmLit (mkIntCLit bLOCK_SIZE) ]) (-1) diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs index 66d41d3d96..252989105c 100644 --- a/compiler/codeGen/CgHeapery.lhs +++ b/compiler/codeGen/CgHeapery.lhs @@ -42,9 +42,9 @@ import ClosureInfo import SMRep import Cmm -import MachOp import CmmUtils import Id +import IdInfo import DataCon import TyCon import CostCentre @@ -191,7 +191,7 @@ mkStaticClosureFields cl_info ccs caf_refs payload = mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field where - info_lbl = infoTableLabelFromCI cl_info + info_lbl = infoTableLabelFromCI cl_info $ clHasCafRefs cl_info -- CAFs must have consistent layout, regardless of whether they -- are actually updatable or not. The layout of a CAF is: @@ -226,7 +226,6 @@ mkStaticClosureFields cl_info ccs caf_refs payload | caf_refs = mkIntCLit 0 | otherwise = mkIntCLit 1 - mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field @@ -245,14 +244,14 @@ mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_fi padLitToWord :: CmmLit -> [CmmLit] padLitToWord lit = lit : padding pad_length - where rep = cmmLitRep lit - pad_length = wORD_SIZE - machRepByteWidth rep :: Int + where width = typeWidth (cmmLitType lit) + pad_length = wORD_SIZE - widthInBytes width :: Int padding n | n <= 0 = [] - | n `rem` 2 /= 0 = CmmInt 0 I8 : padding (n-1) - | n `rem` 4 /= 0 = CmmInt 0 I16 : padding (n-2) - | n `rem` 8 /= 0 = CmmInt 0 I32 : padding (n-4) - | otherwise = CmmInt 0 I64 : padding (n-8) + | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1) + | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2) + | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4) + | otherwise = CmmInt 0 W64 : padding (n-8) \end{code} %************************************************************************ @@ -309,7 +308,7 @@ hpStkCheck cl_info is_fun reg_save_code code -- Strictly speaking, we should tag node here. But if -- node doesn't point to the closure, the code for the closure -- cannot depend on the value of R1 anyway, so we're safe. - closure_lbl = closureLabelFromCI cl_info + closure_lbl = closureLabelFromCI cl_info (clHasCafRefs cl_info) full_save_code = node_asst `plusStmts` reg_save_code @@ -410,7 +409,7 @@ unbxTupleHeapCheck regs ptrs nptrs fail_code code ; code } where full_fail_code = fail_code `plusStmts` oneStmt assign_liveness - assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho! + assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9 VNonGcPtr)) -- Ho ho ho! (CmmLit (mkWordCLit liveness)) liveness = mkRegLiveness regs ptrs nptrs rts_label = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut"))) @@ -495,10 +494,8 @@ hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code hpChkGen bytes liveness reentry = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assigns stg_gc_gen where - assigns = mkStmts [ - CmmAssign (CmmGlobal (VanillaReg 9)) liveness, - CmmAssign (CmmGlobal (VanillaReg 10)) reentry - ] + assigns = mkStmts [ mk_vanilla_assignment 9 liveness, + mk_vanilla_assignment 10 reentry ] -- a heap check where R1 points to the closure to enter on return, and -- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP). @@ -511,10 +508,12 @@ stkChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code stkChkGen bytes liveness reentry = do_checks' bytes (CmmLit (mkIntCLit 0)) True False assigns stg_gc_gen where - assigns = mkStmts [ - CmmAssign (CmmGlobal (VanillaReg 9)) liveness, - CmmAssign (CmmGlobal (VanillaReg 10)) reentry - ] + assigns = mkStmts [ mk_vanilla_assignment 9 liveness, + mk_vanilla_assignment 10 reentry ] + +mk_vanilla_assignment :: Int -> CmmExpr -> CmmStmt +mk_vanilla_assignment n e + = CmmAssign (CmmGlobal (VanillaReg n (vgcFlag (cmmExprType e)))) e stkChkNodePoints :: CmmExpr -> Code stkChkNodePoints bytes @@ -554,7 +553,8 @@ allocDynClosure cl_info use_cc blame_cc amodes_with_offsets -- Remember, virtHp points to last allocated word, -- ie 1 *before* the info-ptr word of new object. - info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info)) + info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info + (clHasCafRefs cl_info))) hdr_w_offsets = initDynHdr info_ptr use_cc `zip` [0..] -- SAY WHAT WE ARE ABOUT TO DO diff --git a/compiler/codeGen/CgHpc.hs b/compiler/codeGen/CgHpc.hs index 0d0fdb1183..768a307e3a 100644 --- a/compiler/codeGen/CgHpc.hs +++ b/compiler/codeGen/CgHpc.hs @@ -18,7 +18,6 @@ module CgHpc (cgTickBox, initHpc, hpcTable) where import Cmm import CLabel import Module -import MachOp import CmmUtils import CgUtils import CgMonad @@ -35,14 +34,14 @@ import Data.Word cgTickBox :: Module -> Int -> Code cgTickBox mod n = do - let tick_box = (cmmIndex I64 + let tick_box = (cmmIndex W64 (CmmLit $ CmmLabel $ mkHpcTicksLabel $ mod) (fromIntegral n) ) stmtsC [ CmmStore tick_box - (CmmMachOp (MO_Add I64) - [ CmmLoad tick_box I64 - , CmmLit (CmmInt 1 I64) + (CmmMachOp (MO_Add W64) + [ CmmLoad tick_box b64 + , CmmLit (CmmInt 1 W64) ]) ] @@ -56,7 +55,7 @@ hpcTable this_mod (HpcInfo hpc_tickCount _) = do ] emitData Data $ [ CmmDataLabel (mkHpcTicksLabel this_mod) ] ++ - [ CmmStaticLit (CmmInt 0 I64) + [ CmmStaticLit (CmmInt 0 W64) | _ <- take hpc_tickCount [0::Int ..] ] where @@ -70,24 +69,24 @@ hpcTable this_mod (NoHpcInfo {}) = error "TODO: impossible" initHpc :: Module -> HpcInfo -> Code initHpc this_mod (HpcInfo tickCount hashNo) - = do { id <- newNonPtrTemp wordRep -- TODO FIXME NOW + = do { id <- newTemp bWord ; emitForeignCall' PlayRisky - [CmmKinded id NoHint] + [CmmHinted id NoHint] (CmmCallee (CmmLit $ CmmLabel $ mkForeignLabel mod_alloc Nothing False) CCallConv ) - [ CmmKinded (mkLblExpr mkHpcModuleNameLabel) PtrHint - , CmmKinded (word32 tickCount) NoHint - , CmmKinded (word32 hashNo) NoHint - , CmmKinded (CmmLit $ CmmLabel $ mkHpcTicksLabel $ this_mod) PtrHint + [ CmmHinted (mkLblExpr mkHpcModuleNameLabel) AddrHint + , CmmHinted (word32 tickCount) NoHint + , CmmHinted (word32 hashNo) NoHint + , CmmHinted (CmmLit $ CmmLabel $ mkHpcTicksLabel $ this_mod) AddrHint ] (Just []) NoC_SRT -- No SRT b/c we PlayRisky CmmMayReturn } where - word32 i = CmmLit (CmmInt (fromIntegral (fromIntegral i :: Word32)) I32) + word32 i = CmmLit (CmmInt (fromIntegral (fromIntegral i :: Word32)) W32) mod_alloc = mkFastString "hs_hpc_module" diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 14004ceef8..9fbe4fb36d 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -40,7 +40,6 @@ import CgMonad import CmmUtils import Cmm -import MachOp import CLabel import StgSyn import Name @@ -64,13 +63,13 @@ import Outputable -- representation as a list of 'CmmAddr' is handled later -- in the pipeline by 'cmmToRawCmm'. -emitClosureCodeAndInfoTable :: ClosureInfo -> CmmFormalsWithoutKinds -> CgStmts -> Code +emitClosureCodeAndInfoTable :: ClosureInfo -> CmmFormals -> CgStmts -> Code emitClosureCodeAndInfoTable cl_info args body = do { blks <- cgStmtsToBlocks body ; info <- mkCmmInfo cl_info ; emitInfoTableAndCode (infoLblToEntryLbl info_lbl) info args blks } where - info_lbl = infoTableLabelFromCI cl_info + info_lbl = infoTableLabelFromCI cl_info $ clHasCafRefs cl_info -- We keep the *zero-indexed* tag in the srt_len field of the info -- table of a data constructor. @@ -107,17 +106,17 @@ mkCmmInfo cl_info = do LFReEntrant _ arity _ arg_descr -> FunInfo (ptrs, nptrs) srt - (argDescrType arg_descr) (fromIntegral arity) arg_descr - (CmmLabel (mkSlowEntryLabel name)) + (CmmLabel (mkSlowEntryLabel name has_caf_refs)) LFThunk _ _ _ (SelectorThunk offset) _ -> ThunkSelectorInfo (fromIntegral offset) srt LFThunk _ _ _ _ _ -> ThunkInfo (ptrs, nptrs) srt _ -> panic "unexpected lambda form in mkCmmInfo" where - info_lbl = infoTableLabelFromCI cl_info + info_lbl = infoTableLabelFromCI cl_info has_caf_refs + has_caf_refs = clHasCafRefs cl_info cl_type = smRepClosureTypeInt (closureSMRep cl_info) @@ -235,12 +234,9 @@ stack_layout ((off, bind):binds) sizeW | off == sizeW - 1 = (Just stack_bind) : (stack_layout binds (sizeW - rep_size)) where rep_size = cgRepSizeW (cgIdInfoArgRep bind) - stack_bind = LocalReg unique machRep kind + stack_bind = LocalReg unique machRep unique = getUnique (cgIdInfoId bind) machRep = argMachRep (cgIdInfoArgRep bind) - kind = if isFollowableArg (cgIdInfoArgRep bind) - then GCKindPtr - else GCKindNonPtr stack_layout binds@((off, _):_) sizeW | otherwise = Nothing : (stack_layout binds (sizeW - 1)) @@ -344,13 +340,13 @@ stdNonPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE + hALF_WORD_SIZE closureInfoPtr :: CmmExpr -> CmmExpr -- Takes a closure pointer and returns the info table pointer -closureInfoPtr e = CmmLoad e wordRep +closureInfoPtr e = CmmLoad e bWord entryCode :: CmmExpr -> CmmExpr -- Takes an info pointer (the first word of a closure) -- and returns its entry code entryCode e | tablesNextToCode = e - | otherwise = CmmLoad e wordRep + | otherwise = CmmLoad e bWord getConstrTag :: CmmExpr -> CmmExpr -- Takes a closure pointer, and return the *zero-indexed* @@ -358,7 +354,7 @@ getConstrTag :: CmmExpr -> CmmExpr -- This lives in the SRT field of the info table -- (constructors don't need SRTs). getConstrTag closure_ptr - = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableConstrTag info_table] + = CmmMachOp (MO_UU_Conv halfWordWidth wordWidth) [infoTableConstrTag info_table] where info_table = infoTable (closureInfoPtr closure_ptr) @@ -366,7 +362,7 @@ cmmGetClosureType :: CmmExpr -> CmmExpr -- Takes a closure pointer, and return the closure type -- obtained from the info table cmmGetClosureType closure_ptr - = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableClosureType info_table] + = CmmMachOp (MO_UU_Conv halfWordWidth wordWidth) [infoTableClosureType info_table] where info_table = infoTable (closureInfoPtr closure_ptr) @@ -387,21 +383,21 @@ infoTableSrtBitmap :: CmmExpr -> CmmExpr -- Takes an info table pointer (from infoTable) and returns the srt_bitmap -- field of the info table infoTableSrtBitmap info_tbl - = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) halfWordRep + = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) bHalfWord infoTableClosureType :: CmmExpr -> CmmExpr -- Takes an info table pointer (from infoTable) and returns the closure type -- field of the info table. infoTableClosureType info_tbl - = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) halfWordRep + = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) bHalfWord infoTablePtrs :: CmmExpr -> CmmExpr infoTablePtrs info_tbl - = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) halfWordRep + = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) bHalfWord infoTableNonPtrs :: CmmExpr -> CmmExpr infoTableNonPtrs info_tbl - = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) halfWordRep + = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) bHalfWord funInfoTable :: CmmExpr -> CmmExpr -- Takes the info pointer of a function, @@ -427,7 +423,7 @@ funInfoTable info_ptr emitInfoTableAndCode :: CLabel -- Label of entry or ret -> CmmInfo -- ...the info table - -> CmmFormalsWithoutKinds -- ...args + -> CmmFormals -- ...args -> [CmmBasicBlock] -- ...and body -> Code diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs index 51c07b213d..e624f4b436 100644 --- a/compiler/codeGen/CgMonad.lhs +++ b/compiler/codeGen/CgMonad.lhs @@ -74,6 +74,7 @@ import BlockId import Cmm import CmmUtils import CLabel +import PprCmm import StgSyn (SRT) import SMRep import Module @@ -746,7 +747,7 @@ emitData sect lits where data_block = CmmData sect lits -emitProc :: CmmInfo -> CLabel -> CmmFormalsWithoutKinds -> [CmmBasicBlock] -> Code +emitProc :: CmmInfo -> CLabel -> CmmFormals -> [CmmBasicBlock] -> Code emitProc info lbl args blocks = do { let proc_block = CmmProc info lbl args (ListGraph blocks) ; state <- getState @@ -767,7 +768,8 @@ getCmm code = do { state1 <- getState ; ((), state2) <- withState code (state1 { cgs_tops = nilOL }) ; setState $ state2 { cgs_tops = cgs_tops state1 } - ; return (Cmm (fromOL (cgs_tops state2))) } + ; return (Cmm (fromOL (cgs_tops state2))) + } -- ---------------------------------------------------------------------------- -- CgStmts diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs index 85a41515e6..05e45b5097 100644 --- a/compiler/codeGen/CgPrimOp.hs +++ b/compiler/codeGen/CgPrimOp.hs @@ -28,7 +28,6 @@ import CgUtils import Cmm import CLabel import CmmUtils -import MachOp import PrimOp import SMRep import Constants @@ -38,7 +37,7 @@ import FastString -- --------------------------------------------------------------------------- -- Code generation for PrimOps -cgPrimOp :: CmmFormalsWithoutKinds -- where to put the results +cgPrimOp :: CmmFormals -- where to put the results -> PrimOp -- the op -> [StgArg] -- arguments -> StgLiveVars -- live vars, in case we need to save them @@ -50,7 +49,7 @@ cgPrimOp results op args live emitPrimOp results op non_void_args live -emitPrimOp :: CmmFormalsWithoutKinds -- where to put the results +emitPrimOp :: CmmFormals -- where to put the results -> PrimOp -- the op -> [CmmExpr] -- arguments -> StgLiveVars -- live vars, in case we need to save them @@ -122,10 +121,10 @@ emitPrimOp [res] ParOp [arg] live -- later, we might want to inline it. vols <- getVolatileRegs live emitForeignCall' PlayRisky - [CmmKinded res NoHint] + [CmmHinted res NoHint] (CmmCallee newspark CCallConv) - [ (CmmKinded (CmmReg (CmmGlobal BaseReg)) PtrHint) - , (CmmKinded arg PtrHint) ] + [ (CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint) + , (CmmHinted arg AddrHint) ] (Just vols) NoC_SRT -- No SRT b/c we do PlayRisky CmmMayReturn @@ -133,7 +132,7 @@ emitPrimOp [res] ParOp [arg] live newspark = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "newSpark"))) emitPrimOp [res] ReadMutVarOp [mutv] live - = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize)) + = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)) emitPrimOp [] WriteMutVarOp [mutv,var] live = do @@ -143,8 +142,8 @@ emitPrimOp [] WriteMutVarOp [mutv,var] live [{-no results-}] (CmmCallee (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) CCallConv) - [ (CmmKinded (CmmReg (CmmGlobal BaseReg)) PtrHint) - , (CmmKinded mutv PtrHint) ] + [ (CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint) + , (CmmHinted mutv AddrHint) ] (Just vols) NoC_SRT -- No SRT b/c we do PlayRisky CmmMayReturn @@ -154,7 +153,7 @@ emitPrimOp [] WriteMutVarOp [mutv,var] live emitPrimOp [res] SizeofByteArrayOp [arg] live = stmtC $ CmmAssign (CmmLocal res) (CmmMachOp mo_wordMul [ - cmmLoadIndexW arg fixedHdrSize, + cmmLoadIndexW arg fixedHdrSize bWord, CmmLit (mkIntCLit wORD_SIZE) ]) @@ -174,14 +173,14 @@ emitPrimOp [res] ByteArrayContents_Char [arg] live -- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn) emitPrimOp [res] StableNameToIntOp [arg] live - = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize)) + = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord)) -- #define eqStableNamezh(r,sn1,sn2) \ -- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) emitPrimOp [res] EqStableNameOp [arg1,arg2] live = stmtC (CmmAssign (CmmLocal res) (CmmMachOp mo_wordEq [ - cmmLoadIndexW arg1 fixedHdrSize, - cmmLoadIndexW arg2 fixedHdrSize + cmmLoadIndexW arg1 fixedHdrSize bWord, + cmmLoadIndexW arg2 fixedHdrSize bWord ])) @@ -223,117 +222,117 @@ emitPrimOp [] WriteArrayOp [obj,ix,v] live = doWritePtrArrayOp obj ix v -- IndexXXXoffAddr -emitPrimOp res IndexOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res IndexOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res IndexOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res IndexOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res IndexOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res IndexOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args -emitPrimOp res IndexOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args -emitPrimOp res IndexOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res IndexOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args -emitPrimOp res IndexOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args -emitPrimOp res IndexOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args -emitPrimOp res IndexOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args -emitPrimOp res IndexOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res IndexOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args -emitPrimOp res IndexOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res IndexOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args +emitPrimOp res IndexOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Int args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Word args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Addr args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Float args live = doIndexOffAddrOp Nothing f32 res args +emitPrimOp res IndexOffAddrOp_Double args live = doIndexOffAddrOp Nothing f64 res args +emitPrimOp res IndexOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res IndexOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing b64 res args +emitPrimOp res IndexOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res IndexOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing b64 res args -- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr. -emitPrimOp res ReadOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res ReadOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res ReadOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res ReadOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res ReadOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res ReadOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args -emitPrimOp res ReadOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args -emitPrimOp res ReadOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args -emitPrimOp res ReadOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args -emitPrimOp res ReadOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args -emitPrimOp res ReadOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args -emitPrimOp res ReadOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args -emitPrimOp res ReadOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res ReadOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args -emitPrimOp res ReadOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res ReadOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args +emitPrimOp res ReadOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Int args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Word args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Addr args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Float args live = doIndexOffAddrOp Nothing f32 res args +emitPrimOp res ReadOffAddrOp_Double args live = doIndexOffAddrOp Nothing f64 res args +emitPrimOp res ReadOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res ReadOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing b64 res args +emitPrimOp res ReadOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res ReadOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing b64 res args -- IndexXXXArray -emitPrimOp res IndexByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res IndexByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res IndexByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res IndexByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res IndexByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res IndexByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args -emitPrimOp res IndexByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args -emitPrimOp res IndexByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res IndexByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args -emitPrimOp res IndexByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args -emitPrimOp res IndexByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args -emitPrimOp res IndexByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args -emitPrimOp res IndexByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res IndexByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args -emitPrimOp res IndexByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res IndexByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args +emitPrimOp res IndexByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Int args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Word args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Addr args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Float args live = doIndexByteArrayOp Nothing f32 res args +emitPrimOp res IndexByteArrayOp_Double args live = doIndexByteArrayOp Nothing f64 res args +emitPrimOp res IndexByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res IndexByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing b64 res args +emitPrimOp res IndexByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res IndexByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing b64 res args -- ReadXXXArray, identical to IndexXXXArray. -emitPrimOp res ReadByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res ReadByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res ReadByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res ReadByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res ReadByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res ReadByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args -emitPrimOp res ReadByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args -emitPrimOp res ReadByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args -emitPrimOp res ReadByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args -emitPrimOp res ReadByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args -emitPrimOp res ReadByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args -emitPrimOp res ReadByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args -emitPrimOp res ReadByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args -emitPrimOp res ReadByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args -emitPrimOp res ReadByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args -emitPrimOp res ReadByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args +emitPrimOp res ReadByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Int args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Word args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Addr args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Float args live = doIndexByteArrayOp Nothing f32 res args +emitPrimOp res ReadByteArrayOp_Double args live = doIndexByteArrayOp Nothing f64 res args +emitPrimOp res ReadByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res ReadByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing b64 res args +emitPrimOp res ReadByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res ReadByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing b64 res args -- WriteXXXoffAddr -emitPrimOp res WriteOffAddrOp_Char args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteOffAddrOp_WideChar args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteOffAddrOp_Int args live = doWriteOffAddrOp Nothing wordRep res args -emitPrimOp res WriteOffAddrOp_Word args live = doWriteOffAddrOp Nothing wordRep res args -emitPrimOp res WriteOffAddrOp_Addr args live = doWriteOffAddrOp Nothing wordRep res args -emitPrimOp res WriteOffAddrOp_Float args live = doWriteOffAddrOp Nothing F32 res args -emitPrimOp res WriteOffAddrOp_Double args live = doWriteOffAddrOp Nothing F64 res args -emitPrimOp res WriteOffAddrOp_StablePtr args live = doWriteOffAddrOp Nothing wordRep res args -emitPrimOp res WriteOffAddrOp_Int8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteOffAddrOp_Int16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args -emitPrimOp res WriteOffAddrOp_Int32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteOffAddrOp_Int64 args live = doWriteOffAddrOp Nothing I64 res args -emitPrimOp res WriteOffAddrOp_Word8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteOffAddrOp_Word16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args -emitPrimOp res WriteOffAddrOp_Word32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteOffAddrOp_Word64 args live = doWriteOffAddrOp Nothing I64 res args +emitPrimOp res WriteOffAddrOp_Char args live = doWriteOffAddrOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteOffAddrOp_WideChar args live = doWriteOffAddrOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteOffAddrOp_Int args live = doWriteOffAddrOp Nothing bWord res args +emitPrimOp res WriteOffAddrOp_Word args live = doWriteOffAddrOp Nothing bWord res args +emitPrimOp res WriteOffAddrOp_Addr args live = doWriteOffAddrOp Nothing bWord res args +emitPrimOp res WriteOffAddrOp_Float args live = doWriteOffAddrOp Nothing f32 res args +emitPrimOp res WriteOffAddrOp_Double args live = doWriteOffAddrOp Nothing f64 res args +emitPrimOp res WriteOffAddrOp_StablePtr args live = doWriteOffAddrOp Nothing bWord res args +emitPrimOp res WriteOffAddrOp_Int8 args live = doWriteOffAddrOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteOffAddrOp_Int16 args live = doWriteOffAddrOp (Just mo_WordTo16) b16 res args +emitPrimOp res WriteOffAddrOp_Int32 args live = doWriteOffAddrOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteOffAddrOp_Int64 args live = doWriteOffAddrOp Nothing b64 res args +emitPrimOp res WriteOffAddrOp_Word8 args live = doWriteOffAddrOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteOffAddrOp_Word16 args live = doWriteOffAddrOp (Just mo_WordTo16) b16 res args +emitPrimOp res WriteOffAddrOp_Word32 args live = doWriteOffAddrOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteOffAddrOp_Word64 args live = doWriteOffAddrOp Nothing b64 res args -- WriteXXXArray -emitPrimOp res WriteByteArrayOp_Char args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteByteArrayOp_WideChar args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteByteArrayOp_Int args live = doWriteByteArrayOp Nothing wordRep res args -emitPrimOp res WriteByteArrayOp_Word args live = doWriteByteArrayOp Nothing wordRep res args -emitPrimOp res WriteByteArrayOp_Addr args live = doWriteByteArrayOp Nothing wordRep res args -emitPrimOp res WriteByteArrayOp_Float args live = doWriteByteArrayOp Nothing F32 res args -emitPrimOp res WriteByteArrayOp_Double args live = doWriteByteArrayOp Nothing F64 res args -emitPrimOp res WriteByteArrayOp_StablePtr args live = doWriteByteArrayOp Nothing wordRep res args -emitPrimOp res WriteByteArrayOp_Int8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteByteArrayOp_Int16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args -emitPrimOp res WriteByteArrayOp_Int32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteByteArrayOp_Int64 args live = doWriteByteArrayOp Nothing I64 res args -emitPrimOp res WriteByteArrayOp_Word8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args -emitPrimOp res WriteByteArrayOp_Word16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args -emitPrimOp res WriteByteArrayOp_Word32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args -emitPrimOp res WriteByteArrayOp_Word64 args live = doWriteByteArrayOp Nothing I64 res args +emitPrimOp res WriteByteArrayOp_Char args live = doWriteByteArrayOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteByteArrayOp_WideChar args live = doWriteByteArrayOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteByteArrayOp_Int args live = doWriteByteArrayOp Nothing bWord res args +emitPrimOp res WriteByteArrayOp_Word args live = doWriteByteArrayOp Nothing bWord res args +emitPrimOp res WriteByteArrayOp_Addr args live = doWriteByteArrayOp Nothing bWord res args +emitPrimOp res WriteByteArrayOp_Float args live = doWriteByteArrayOp Nothing f32 res args +emitPrimOp res WriteByteArrayOp_Double args live = doWriteByteArrayOp Nothing f64 res args +emitPrimOp res WriteByteArrayOp_StablePtr args live = doWriteByteArrayOp Nothing bWord res args +emitPrimOp res WriteByteArrayOp_Int8 args live = doWriteByteArrayOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteByteArrayOp_Int16 args live = doWriteByteArrayOp (Just mo_WordTo16) b16 res args +emitPrimOp res WriteByteArrayOp_Int32 args live = doWriteByteArrayOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteByteArrayOp_Int64 args live = doWriteByteArrayOp Nothing b64 res args +emitPrimOp res WriteByteArrayOp_Word8 args live = doWriteByteArrayOp (Just mo_WordTo8) b8 res args +emitPrimOp res WriteByteArrayOp_Word16 args live = doWriteByteArrayOp (Just mo_WordTo16) b16 res args +emitPrimOp res WriteByteArrayOp_Word32 args live = doWriteByteArrayOp (Just mo_WordTo32) b32 res args +emitPrimOp res WriteByteArrayOp_Word64 args live = doWriteByteArrayOp Nothing b64 res args -- The rest just translate straightforwardly @@ -342,16 +341,16 @@ emitPrimOp [res] op [arg] live = stmtC (CmmAssign (CmmLocal res) arg) | Just (mop,rep) <- narrowOp op - = stmtC (CmmAssign (CmmLocal res) (CmmMachOp (mop rep wordRep) [ - CmmMachOp (mop wordRep rep) [arg]])) + = stmtC (CmmAssign (CmmLocal res) $ + CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]]) emitPrimOp [res] op args live | Just prim <- callishOp op = do vols <- getVolatileRegs live emitForeignCall' PlayRisky - [CmmKinded res NoHint] + [CmmHinted res NoHint] (CmmPrim prim) - [CmmKinded a NoHint | a<-args] -- ToDo: hints? + [CmmHinted a NoHint | a<-args] -- ToDo: hints? (Just vols) NoC_SRT -- No SRT b/c we do PlayRisky CmmMayReturn @@ -376,12 +375,13 @@ nopOp _ = False -- These PrimOps turn into double casts -narrowOp Narrow8IntOp = Just (MO_S_Conv, I8) -narrowOp Narrow16IntOp = Just (MO_S_Conv, I16) -narrowOp Narrow32IntOp = Just (MO_S_Conv, I32) -narrowOp Narrow8WordOp = Just (MO_U_Conv, I8) -narrowOp Narrow16WordOp = Just (MO_U_Conv, I16) -narrowOp Narrow32WordOp = Just (MO_U_Conv, I32) +narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width) +narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8) +narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16) +narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32) +narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8) +narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16) +narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32) narrowOp _ = Nothing -- Native word signless ops @@ -412,7 +412,7 @@ translateOp AddrRemOp = Just mo_wordURem -- Native word signed ops translateOp IntMulOp = Just mo_wordMul -translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordRep) +translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordWidth) translateOp IntQuotOp = Just mo_wordSQuot translateOp IntRemOp = Just mo_wordSRem translateOp IntNegOp = Just mo_wordSNeg @@ -445,53 +445,53 @@ translateOp AddrLtOp = Just mo_wordULt -- Char# ops -translateOp CharEqOp = Just (MO_Eq wordRep) -translateOp CharNeOp = Just (MO_Ne wordRep) -translateOp CharGeOp = Just (MO_U_Ge wordRep) -translateOp CharLeOp = Just (MO_U_Le wordRep) -translateOp CharGtOp = Just (MO_U_Gt wordRep) -translateOp CharLtOp = Just (MO_U_Lt wordRep) +translateOp CharEqOp = Just (MO_Eq wordWidth) +translateOp CharNeOp = Just (MO_Ne wordWidth) +translateOp CharGeOp = Just (MO_U_Ge wordWidth) +translateOp CharLeOp = Just (MO_U_Le wordWidth) +translateOp CharGtOp = Just (MO_U_Gt wordWidth) +translateOp CharLtOp = Just (MO_U_Lt wordWidth) -- Double ops -translateOp DoubleEqOp = Just (MO_Eq F64) -translateOp DoubleNeOp = Just (MO_Ne F64) -translateOp DoubleGeOp = Just (MO_S_Ge F64) -translateOp DoubleLeOp = Just (MO_S_Le F64) -translateOp DoubleGtOp = Just (MO_S_Gt F64) -translateOp DoubleLtOp = Just (MO_S_Lt F64) +translateOp DoubleEqOp = Just (MO_F_Eq W64) +translateOp DoubleNeOp = Just (MO_F_Ne W64) +translateOp DoubleGeOp = Just (MO_F_Ge W64) +translateOp DoubleLeOp = Just (MO_F_Le W64) +translateOp DoubleGtOp = Just (MO_F_Gt W64) +translateOp DoubleLtOp = Just (MO_F_Lt W64) -translateOp DoubleAddOp = Just (MO_Add F64) -translateOp DoubleSubOp = Just (MO_Sub F64) -translateOp DoubleMulOp = Just (MO_Mul F64) -translateOp DoubleDivOp = Just (MO_S_Quot F64) -translateOp DoubleNegOp = Just (MO_S_Neg F64) +translateOp DoubleAddOp = Just (MO_F_Add W64) +translateOp DoubleSubOp = Just (MO_F_Sub W64) +translateOp DoubleMulOp = Just (MO_F_Mul W64) +translateOp DoubleDivOp = Just (MO_F_Quot W64) +translateOp DoubleNegOp = Just (MO_F_Neg W64) -- Float ops -translateOp FloatEqOp = Just (MO_Eq F32) -translateOp FloatNeOp = Just (MO_Ne F32) -translateOp FloatGeOp = Just (MO_S_Ge F32) -translateOp FloatLeOp = Just (MO_S_Le F32) -translateOp FloatGtOp = Just (MO_S_Gt F32) -translateOp FloatLtOp = Just (MO_S_Lt F32) +translateOp FloatEqOp = Just (MO_F_Eq W32) +translateOp FloatNeOp = Just (MO_F_Ne W32) +translateOp FloatGeOp = Just (MO_F_Ge W32) +translateOp FloatLeOp = Just (MO_F_Le W32) +translateOp FloatGtOp = Just (MO_F_Gt W32) +translateOp FloatLtOp = Just (MO_F_Lt W32) -translateOp FloatAddOp = Just (MO_Add F32) -translateOp FloatSubOp = Just (MO_Sub F32) -translateOp FloatMulOp = Just (MO_Mul F32) -translateOp FloatDivOp = Just (MO_S_Quot F32) -translateOp FloatNegOp = Just (MO_S_Neg F32) +translateOp FloatAddOp = Just (MO_F_Add W32) +translateOp FloatSubOp = Just (MO_F_Sub W32) +translateOp FloatMulOp = Just (MO_F_Mul W32) +translateOp FloatDivOp = Just (MO_F_Quot W32) +translateOp FloatNegOp = Just (MO_F_Neg W32) -- Conversions -translateOp Int2DoubleOp = Just (MO_S_Conv wordRep F64) -translateOp Double2IntOp = Just (MO_S_Conv F64 wordRep) +translateOp Int2DoubleOp = Just (MO_SF_Conv wordWidth W64) +translateOp Double2IntOp = Just (MO_FS_Conv W64 wordWidth) -translateOp Int2FloatOp = Just (MO_S_Conv wordRep F32) -translateOp Float2IntOp = Just (MO_S_Conv F32 wordRep) +translateOp Int2FloatOp = Just (MO_SF_Conv wordWidth W32) +translateOp Float2IntOp = Just (MO_FS_Conv W32 wordWidth) -translateOp Float2DoubleOp = Just (MO_S_Conv F32 F64) -translateOp Double2FloatOp = Just (MO_S_Conv F64 F32) +translateOp Float2DoubleOp = Just (MO_FF_Conv W32 W64) +translateOp Double2FloatOp = Just (MO_FF_Conv W64 W32) -- Word comparisons masquerading as more exotic things. @@ -540,6 +540,10 @@ callishOp _ = Nothing ------------------------------------------------------------------------------ -- Helpers for translating various minor variants of array indexing. +-- Bytearrays outside the heap; hence non-pointers +doIndexOffAddrOp, doIndexByteArrayOp + :: Maybe MachOp -> CmmType + -> [LocalReg] -> [CmmExpr] -> Code doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx] = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx doIndexOffAddrOp _ _ _ _ @@ -550,10 +554,14 @@ doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx] doIndexByteArrayOp _ _ _ _ = panic "CgPrimOp: doIndexByteArrayOp" +doReadPtrArrayOp :: LocalReg -> CmmExpr -> CmmExpr -> Code doReadPtrArrayOp res addr idx - = mkBasicIndexedRead arrPtrsHdrSize Nothing wordRep res addr idx + = mkBasicIndexedRead arrPtrsHdrSize Nothing gcWord res addr idx +doWriteOffAddrOp, doWriteByteArrayOp + :: Maybe MachOp -> CmmType + -> [LocalReg] -> [CmmExpr] -> Code doWriteOffAddrOp maybe_pre_write_cast rep [] [addr,idx,val] = mkBasicIndexedWrite 0 maybe_pre_write_cast rep addr idx val doWriteOffAddrOp _ _ _ _ @@ -564,17 +572,22 @@ doWriteByteArrayOp maybe_pre_write_cast rep [] [addr,idx,val] doWriteByteArrayOp _ _ _ _ = panic "CgPrimOp: doWriteByteArrayOp" +doWritePtrArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> Code doWritePtrArrayOp addr idx val = do stmtC (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel))) - mkBasicIndexedWrite arrPtrsHdrSize Nothing wordRep addr idx val + mkBasicIndexedWrite arrPtrsHdrSize Nothing bWord addr idx val +mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType + -> LocalReg -> CmmExpr -> CmmExpr -> Code mkBasicIndexedRead off Nothing read_rep res base idx = stmtC (CmmAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx)) mkBasicIndexedRead off (Just cast) read_rep res base idx = stmtC (CmmAssign (CmmLocal res) (CmmMachOp cast [ cmmLoadIndexOffExpr off read_rep base idx])) +mkBasicIndexedWrite :: ByteOff -> Maybe MachOp -> CmmType + -> CmmExpr -> CmmExpr -> CmmExpr -> Code mkBasicIndexedWrite off Nothing write_rep base idx val = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) val) mkBasicIndexedWrite off (Just cast) write_rep base idx val @@ -583,11 +596,11 @@ mkBasicIndexedWrite off (Just cast) write_rep base idx val -- ---------------------------------------------------------------------------- -- Misc utils -cmmIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr +cmmIndexOffExpr :: ByteOff -> CmmType -> CmmExpr -> CmmExpr -> CmmExpr cmmIndexOffExpr off rep base idx - = cmmIndexExpr rep (cmmOffsetB base off) idx + = cmmIndexExpr (typeWidth rep) (cmmOffsetB base off) idx -cmmLoadIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr +cmmLoadIndexOffExpr :: ByteOff -> CmmType -> CmmExpr -> CmmExpr -> CmmExpr cmmLoadIndexOffExpr off rep base idx = CmmLoad (cmmIndexOffExpr off rep base idx) rep diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs index c2a8a1bd75..c85beb50aa 100644 --- a/compiler/codeGen/CgProf.hs +++ b/compiler/codeGen/CgProf.hs @@ -45,7 +45,6 @@ import CgMonad import SMRep import Cmm -import MachOp import CmmUtils import CLabel @@ -70,7 +69,7 @@ import Control.Monad -- Expression representing the current cost centre stack curCCS :: CmmExpr -curCCS = CmmLoad curCCSAddr wordRep +curCCS = CmmLoad curCCSAddr bWord -- Address of current CCS variable, for storing into curCCSAddr :: CmmExpr @@ -84,7 +83,7 @@ mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs) costCentreFrom :: CmmExpr -- A closure pointer -> CmmExpr -- The cost centre from that closure -costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) wordRep +costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) bWord staticProfHdr :: CostCentreStack -> [CmmLit] -- The profiling header words in a static closure @@ -122,13 +121,13 @@ profAlloc words ccs = ifProfiling $ stmtC (addToMemE alloc_rep (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc) - (CmmMachOp (MO_U_Conv wordRep alloc_rep) $ + (CmmMachOp (MO_UU_Conv wordWidth alloc_rep) $ [CmmMachOp mo_wordSub [words, CmmLit (mkIntCLit profHdrSize)]])) -- subtract the "profiling overhead", which is the -- profiling header in a closure. where - alloc_rep = REP_CostCentreStack_mem_alloc + alloc_rep = typeWidth REP_CostCentreStack_mem_alloc -- ---------------------------------------------------------------------- -- Setting the cost centre in a new closure @@ -162,7 +161,7 @@ emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) push_em ccs [] = return ccs push_em ccs (cc:rest) = do - tmp <- newNonPtrTemp wordRep -- TODO FIXME NOW + tmp <- newTemp bWord -- TODO FIXME NOW pushCostCentre tmp ccs cc push_em (CmmReg (CmmLocal tmp)) rest @@ -267,7 +266,7 @@ enterCostCentreThunk closure = ifProfiling $ do stmtC $ CmmStore curCCSAddr (costCentreFrom closure) -enter_ccs_fun stack = emitRtsCall (sLit "EnterFunCCS") [CmmKinded stack PtrHint] False +enter_ccs_fun stack = emitRtsCall (sLit "EnterFunCCS") [CmmHinted stack AddrHint] False -- ToDo: vols enter_ccs_fsub = enteringPAP 0 @@ -280,7 +279,7 @@ enter_ccs_fsub = enteringPAP 0 enteringPAP :: Integer -> Code enteringPAP n = stmtC (CmmStore (CmmLit (CmmLabel (mkRtsDataLabel (sLit "entering_PAP")))) - (CmmLit (CmmInt n cIntRep))) + (CmmLit (CmmInt n cIntWidth))) ifProfiling :: Code -> Code ifProfiling code @@ -340,7 +339,7 @@ emitCostCentreStackDecl ccs | otherwise = pprPanic "emitCostCentreStackDecl" (ppr ccs) zero = mkIntCLit 0 -zero64 = CmmInt 0 I64 +zero64 = CmmInt 0 W64 sizeof_ccs_words :: Int sizeof_ccs_words @@ -359,12 +358,12 @@ sizeof_ccs_words emitRegisterCC :: CostCentre -> Code emitRegisterCC cc = do - { tmp <- newNonPtrTemp cIntRep + { tmp <- newTemp cInt ; stmtsC [ CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_link) - (CmmLoad cC_LIST wordRep), + (CmmLoad cC_LIST bWord), CmmStore cC_LIST cc_lit, - CmmAssign (CmmLocal tmp) (CmmLoad cC_ID cIntRep), + CmmAssign (CmmLocal tmp) (CmmLoad cC_ID cInt), CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_ccID) (CmmReg (CmmLocal tmp)), CmmStore cC_ID (cmmRegOffB (CmmLocal tmp) 1) ] @@ -378,12 +377,12 @@ emitRegisterCC cc = do emitRegisterCCS :: CostCentreStack -> Code emitRegisterCCS ccs = do - { tmp <- newNonPtrTemp cIntRep + { tmp <- newTemp cInt ; stmtsC [ CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_prevStack) - (CmmLoad cCS_LIST wordRep), + (CmmLoad cCS_LIST bWord), CmmStore cCS_LIST ccs_lit, - CmmAssign (CmmLocal tmp) (CmmLoad cCS_ID cIntRep), + CmmAssign (CmmLocal tmp) (CmmLoad cCS_ID cInt), CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_ccsID) (CmmReg (CmmLocal tmp)), CmmStore cCS_ID (cmmRegOffB (CmmLocal tmp) 1) ] @@ -405,7 +404,7 @@ emitSetCCC :: CostCentre -> Code emitSetCCC cc | not opt_SccProfilingOn = nopC | otherwise = do - tmp <- newNonPtrTemp wordRep -- TODO FIXME NOW + tmp <- newTemp bWord -- TODO FIXME NOW ASSERT( sccAbleCostCentre cc ) pushCostCentre tmp curCCS cc stmtC (CmmStore curCCSAddr (CmmReg (CmmLocal tmp))) @@ -414,14 +413,14 @@ emitSetCCC cc pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> Code pushCostCentre result ccs cc - = emitRtsCallWithResult result PtrHint - (sLit "PushCostCentre") [CmmKinded ccs PtrHint, - CmmKinded (CmmLit (mkCCostCentre cc)) PtrHint] + = emitRtsCallWithResult result AddrHint + (sLit "PushCostCentre") [CmmHinted ccs AddrHint, + CmmHinted (CmmLit (mkCCostCentre cc)) AddrHint] False bumpSccCount :: CmmExpr -> CmmStmt bumpSccCount ccs - = addToMem REP_CostCentreStack_scc_count + = addToMem (typeWidth REP_CostCentreStack_scc_count) (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1 ----------------------------------------------------------------------------- @@ -475,13 +474,13 @@ ldvEnter cl_ptr where -- don't forget to substract node's tag ldv_wd = ldvWord cl_ptr - new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd wordRep) + new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd bWord) (CmmLit (mkWordCLit lDV_CREATE_MASK))) (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE))) loadEra :: CmmExpr -loadEra = CmmMachOp (MO_U_Conv cIntRep wordRep) - [CmmLoad (mkLblExpr (mkRtsDataLabel (sLit "era"))) cIntRep] +loadEra = CmmMachOp (MO_UU_Conv cIntWidth wordWidth) + [CmmLoad (mkLblExpr (mkRtsDataLabel $ sLit("era"))) cInt] ldvWord :: CmmExpr -> CmmExpr -- Takes the address of a closure, and returns diff --git a/compiler/codeGen/CgStackery.lhs b/compiler/codeGen/CgStackery.lhs index b8db38d4ed..d6d9e5cfad 100644 --- a/compiler/codeGen/CgStackery.lhs +++ b/compiler/codeGen/CgStackery.lhs @@ -274,7 +274,6 @@ to reflect the frame pushed. \begin{code} pushUpdateFrame :: CmmExpr -> Code -> Code - pushUpdateFrame updatee code = do { when debugIsOn $ do diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs index 475196abba..4f890998ae 100644 --- a/compiler/codeGen/CgTailCall.lhs +++ b/compiler/codeGen/CgTailCall.lhs @@ -41,6 +41,7 @@ import Type import Id import StgSyn import PrimOp +import FastString import Outputable import Control.Monad @@ -116,7 +117,7 @@ performTailCall fun_info arg_amodes pending_assts ; EndOfBlockInfo sp _ <- getEndOfBlockInfo ; this_pkg <- getThisPackage - ; case (getCallMethod fun_name lf_info (length arg_amodes)) of + ; case (getCallMethod fun_name fun_has_cafs lf_info (length arg_amodes)) of -- Node must always point to things we enter EnterIt -> do @@ -183,8 +184,10 @@ performTailCall fun_info arg_amodes pending_assts } } where - fun_name = idName (cgIdInfoId fun_info) + fun_id = cgIdInfoId fun_info + fun_name = idName fun_id lf_info = cgIdInfoLF fun_info + fun_has_cafs = idCafInfo fun_id untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg)) -- Test if closure is a constructor maybeSwitchOnCons enterClosure eob diff --git a/compiler/codeGen/CgTicky.hs b/compiler/codeGen/CgTicky.hs index 24947409fe..b23b34caa4 100644 --- a/compiler/codeGen/CgTicky.hs +++ b/compiler/codeGen/CgTicky.hs @@ -52,12 +52,12 @@ import CgMonad import SMRep import Cmm -import MachOp import CmmUtils import CLabel import Name import Id +import IdInfo import StaticFlags import BasicTypes import FastString @@ -106,7 +106,7 @@ emitTickyCounter cl_info args on_stk ] } where name = closureName cl_info - ticky_ctr_label = mkRednCountsLabel name + ticky_ctr_label = mkRednCountsLabel name NoCafRefs arg_descr = map (showTypeCategory . idType) args fun_descr mod_name = ppr_for_ticky_name mod_name name @@ -172,13 +172,13 @@ registerTickyCtr ctr_lbl = emitIf test (stmtsC register_stmts) where -- krc: code generator doesn't handle Not, so we test for Eq 0 instead - test = CmmMachOp (MO_Eq wordRep) + test = CmmMachOp (MO_Eq wordWidth) [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl - oFFSET_StgEntCounter_registeredp)) wordRep, + oFFSET_StgEntCounter_registeredp)) bWord, CmmLit (mkIntCLit 0)] register_stmts = [ CmmStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_link)) - (CmmLoad ticky_entry_ctrs wordRep) + (CmmLoad ticky_entry_ctrs bWord) , CmmStore ticky_entry_ctrs (mkLblExpr ctr_lbl) , CmmStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_registeredp)) @@ -288,13 +288,13 @@ tickyAllocHeap hp if hp == 0 then [] -- Inside the stmtC to avoid control else [ -- dependency on the argument -- Bump the allcoation count in the StgEntCounter - addToMem REP_StgEntCounter_allocs + addToMem (typeWidth REP_StgEntCounter_allocs) (CmmLit (cmmLabelOffB ticky_ctr oFFSET_StgEntCounter_allocs)) hp, -- Bump ALLOC_HEAP_ctr - addToMemLbl cLongRep (mkRtsDataLabel (sLit "ALLOC_HEAP_ctr")) 1, - -- Bump ALLOC_HEAP_tot - addToMemLbl cLongRep (mkRtsDataLabel (sLit "ALLOC_HEAP_tot")) hp] } + addToMemLbl cLongWidth (mkRtsDataLabel $ sLit "ALLOC_HEAP_ctr") 1, + -- Bump ALLOC_HEAP_tot + addToMemLbl cLongWidth (mkRtsDataLabel $ sLit "ALLOC_HEAP_tot") hp] } -- ----------------------------------------------------------------------------- -- Ticky utils @@ -304,7 +304,7 @@ ifTicky code | opt_DoTickyProfiling = code | otherwise = nopC -addToMemLbl :: MachRep -> CLabel -> Int -> CmmStmt +addToMemLbl :: Width -> CLabel -> Int -> CmmStmt addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n -- All the ticky-ticky counters are declared "unsigned long" in C @@ -313,27 +313,28 @@ bumpTickyCounter lbl = bumpTickyCounter' (cmmLabelOffB (mkRtsDataLabel lbl) 0) bumpTickyCounter' :: CmmLit -> Code -- krc: note that we're incrementing the _entry_count_ field of the ticky counter -bumpTickyCounter' lhs = stmtC (addToMem cLongRep (CmmLit lhs) 1) - -addToMemLong = addToMem cLongRep +bumpTickyCounter' lhs = stmtC (addToMemLong (CmmLit lhs) 1) bumpHistogram :: LitString -> Int -> Code bumpHistogram lbl n --- = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLongRep)) +-- = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLong)) = return () -- TEMP SPJ Apr 07 bumpHistogramE :: LitString -> CmmExpr -> Code bumpHistogramE lbl n - = do t <- newNonPtrTemp cLongRep + = do t <- newTemp cLong stmtC (CmmAssign (CmmLocal t) n) - emitIf (CmmMachOp (MO_U_Le cLongRep) [CmmReg (CmmLocal t), eight]) $ + emitIf (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight]) $ stmtC (CmmAssign (CmmLocal t) eight) - stmtC (addToMemLong (cmmIndexExpr cLongRep + stmtC (addToMemLong (cmmIndexExpr cLongWidth (CmmLit (CmmLabel (mkRtsDataLabel lbl))) (CmmReg (CmmLocal t))) 1) where - eight = CmmLit (CmmInt 8 cLongRep) + eight = CmmLit (CmmInt 8 cLongWidth) + +------------------------------------------------------------------ +addToMemLong = addToMem cLongWidth ------------------------------------------------------------------ -- Showing the "type category" for ticky-ticky profiling diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index 4de3537788..fd49cb7182 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -20,8 +20,7 @@ module CgUtils ( emitRODataLits, mkRODataLits, emitIf, emitIfThenElse, emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, - assignNonPtrTemp, newNonPtrTemp, - assignPtrTemp, newPtrTemp, + assignTemp, newTemp, emitSimultaneously, emitSwitch, emitLitSwitch, tagToClosure, @@ -47,7 +46,7 @@ module CgUtils ( packHalfWordsCLit, blankWord, - getSRTInfo + getSRTInfo, clHasCafRefs ) where #include "HsVersions.h" @@ -58,13 +57,13 @@ import CgMonad import TyCon import DataCon import Id +import IdInfo import Constants import SMRep import PprCmm ( {- instances -} ) import Cmm import CLabel import CmmUtils -import MachOp import ForeignCall import ClosureInfo import StgSyn (SRT(..)) @@ -103,24 +102,24 @@ cgLit (MachStr s) = mkByteStringCLit (bytesFS s) cgLit other_lit = return (mkSimpleLit other_lit) mkSimpleLit :: Literal -> CmmLit -mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordRep +mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordWidth mkSimpleLit MachNullAddr = zeroCLit -mkSimpleLit (MachInt i) = CmmInt i wordRep -mkSimpleLit (MachInt64 i) = CmmInt i I64 -mkSimpleLit (MachWord i) = CmmInt i wordRep -mkSimpleLit (MachWord64 i) = CmmInt i I64 -mkSimpleLit (MachFloat r) = CmmFloat r F32 -mkSimpleLit (MachDouble r) = CmmFloat r F64 +mkSimpleLit (MachInt i) = CmmInt i wordWidth +mkSimpleLit (MachInt64 i) = CmmInt i W64 +mkSimpleLit (MachWord i) = CmmInt i wordWidth +mkSimpleLit (MachWord64 i) = CmmInt i W64 +mkSimpleLit (MachFloat r) = CmmFloat r W32 +mkSimpleLit (MachDouble r) = CmmFloat r W64 mkSimpleLit (MachLabel fs ms) = CmmLabel (mkForeignLabel fs ms is_dyn) where is_dyn = False -- ToDo: fix me mkLtOp :: Literal -> MachOp -- On signed literals we must do a signed comparison -mkLtOp (MachInt _) = MO_S_Lt wordRep -mkLtOp (MachFloat _) = MO_S_Lt F32 -mkLtOp (MachDouble _) = MO_S_Lt F64 -mkLtOp lit = MO_U_Lt (cmmLitRep (mkSimpleLit lit)) +mkLtOp (MachInt _) = MO_S_Lt wordWidth +mkLtOp (MachFloat _) = MO_F_Lt W32 +mkLtOp (MachDouble _) = MO_F_Lt W64 +mkLtOp lit = MO_U_Lt (typeWidth (cmmLitType (mkSimpleLit lit))) --------------------------------------------------- @@ -151,7 +150,7 @@ cmmOffsetLitB = cmmOffsetLit cmmOffsetExprW :: CmmExpr -> CmmExpr -> CmmExpr -- The second arg is a *word* offset; need to change it to bytes cmmOffsetExprW e (CmmLit (CmmInt n _)) = cmmOffsetW e (fromInteger n) -cmmOffsetExprW e wd_off = cmmIndexExpr wordRep e wd_off +cmmOffsetExprW e wd_off = cmmIndexExpr wordWidth e wd_off cmmOffsetW :: CmmExpr -> WordOff -> CmmExpr cmmOffsetW e n = cmmOffsetB e (wORD_SIZE * n) @@ -165,9 +164,8 @@ cmmOffsetLitW lit wd_off = cmmOffsetLitB lit (wORD_SIZE * wd_off) cmmLabelOffW :: CLabel -> WordOff -> CmmLit cmmLabelOffW lbl wd_off = cmmLabelOffB lbl (wORD_SIZE * wd_off) -cmmLoadIndexW :: CmmExpr -> Int -> CmmExpr -cmmLoadIndexW base off - = CmmLoad (cmmOffsetW base off) wordRep +cmmLoadIndexW :: CmmExpr -> Int -> CmmType -> CmmExpr +cmmLoadIndexW base off ty = CmmLoad (cmmOffsetW base off) ty ----------------------- cmmNeWord, cmmEqWord, cmmOrWord, cmmAndWord :: CmmExpr -> CmmExpr -> CmmExpr @@ -184,7 +182,7 @@ cmmSubWord e1 e2 = CmmMachOp mo_wordSub [e1, e2] cmmNegate :: CmmExpr -> CmmExpr cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) -cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprRep e)) [e] +cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprWidth e)) [e] blankWord :: CmmStatic blankWord = CmmUninitialised wORD_SIZE @@ -244,7 +242,7 @@ dataConTagZ con = dataConTag con - fIRST_TAG -- Making literals mkWordCLit :: StgWord -> CmmLit -mkWordCLit wd = CmmInt (fromIntegral wd) wordRep +mkWordCLit wd = CmmInt (fromIntegral wd) wordWidth packHalfWordsCLit :: (Integral a, Integral b) => a -> b -> CmmLit -- Make a single word literal in which the lower_half_word is @@ -267,18 +265,18 @@ packHalfWordsCLit lower_half_word upper_half_word -- -------------------------------------------------------------------------- -addToMem :: MachRep -- rep of the counter +addToMem :: Width -- rep of the counter -> CmmExpr -- Address -> Int -- What to add (a word) -> CmmStmt -addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) rep)) +addToMem width ptr n = addToMemE width ptr (CmmLit (CmmInt (toInteger n) width)) -addToMemE :: MachRep -- rep of the counter +addToMemE :: Width -- rep of the counter -> CmmExpr -- Address -> CmmExpr -- What to add (a word-typed expression) -> CmmStmt -addToMemE rep ptr n - = CmmStore ptr (CmmMachOp (MO_Add rep) [CmmLoad ptr rep, n]) +addToMemE width ptr n + = CmmStore ptr (CmmMachOp (MO_Add width) [CmmLoad ptr (cmmBits width), n]) ------------------------------------------------------------------------- -- @@ -289,9 +287,9 @@ addToMemE rep ptr n tagToClosure :: TyCon -> CmmExpr -> CmmExpr tagToClosure tycon tag - = CmmLoad (cmmOffsetExprW closure_tbl tag) wordRep + = CmmLoad (cmmOffsetExprW closure_tbl tag) gcWord where closure_tbl = CmmLit (CmmLabel lbl) - lbl = mkClosureTableLabel (tyConName tycon) + lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs ------------------------------------------------------------------------- -- @@ -334,24 +332,24 @@ emitIfThenElse cond then_part else_part ; labelC join_id } -emitRtsCall :: LitString -> [CmmKinded CmmExpr] -> Bool -> Code +emitRtsCall :: LitString -> [CmmHinted CmmExpr] -> Bool -> Code emitRtsCall fun args safe = emitRtsCall' [] fun args Nothing safe -- The 'Nothing' says "save all global registers" -emitRtsCallWithVols :: LitString -> [CmmKinded CmmExpr] -> [GlobalReg] -> Bool -> Code +emitRtsCallWithVols :: LitString -> [CmmHinted CmmExpr] -> [GlobalReg] -> Bool -> Code emitRtsCallWithVols fun args vols safe = emitRtsCall' [] fun args (Just vols) safe -emitRtsCallWithResult :: LocalReg -> MachHint -> LitString - -> [CmmKinded CmmExpr] -> Bool -> Code +emitRtsCallWithResult :: LocalReg -> ForeignHint -> LitString + -> [CmmHinted CmmExpr] -> Bool -> Code emitRtsCallWithResult res hint fun args safe - = emitRtsCall' [CmmKinded res hint] fun args Nothing safe + = emitRtsCall' [CmmHinted res hint] fun args Nothing safe -- Make a call to an RTS C procedure emitRtsCall' - :: CmmFormals + :: [CmmHinted LocalReg] -> LitString - -> [CmmKinded CmmExpr] + -> [CmmHinted CmmExpr] -> Maybe [GlobalReg] -> Bool -- True <=> CmmSafe call -> Code @@ -393,7 +391,8 @@ callerSaveVolatileRegs vols = (caller_save, caller_load) vol_list = case vols of Nothing -> all_of_em; Just regs -> regs - all_of_em = [ VanillaReg n | n <- [0..mAX_Vanilla_REG] ] + all_of_em = [ VanillaReg n VNonGcPtr | n <- [0..mAX_Vanilla_REG] ] + -- The VNonGcPtr is a lie, but I don't think it matters ++ [ FloatReg n | n <- [0..mAX_Float_REG] ] ++ [ DoubleReg n | n <- [0..mAX_Double_REG] ] ++ [ LongReg n | n <- [0..mAX_Long_REG] ] @@ -407,7 +406,7 @@ callerSaveVolatileRegs vols = (caller_save, caller_load) callerRestoreGlobalReg reg next | callerSaves reg = CmmAssign (CmmGlobal reg) - (CmmLoad (get_GlobalReg_addr reg) (globalRegRep reg)) + (CmmLoad (get_GlobalReg_addr reg) (globalRegType reg)) : next | otherwise = next @@ -423,14 +422,14 @@ callerSaveVolatileRegs vols = (caller_save, caller_load) get_GlobalReg_addr :: GlobalReg -> CmmExpr get_GlobalReg_addr BaseReg = regTableOffset 0 get_GlobalReg_addr mid = get_Regtable_addr_from_offset - (globalRegRep mid) (baseRegOffset mid) + (globalRegType mid) (baseRegOffset mid) -- Calculate a literal representing an offset into the register table. -- Used when we don't have an actual BaseReg to offset from. regTableOffset n = CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n)) -get_Regtable_addr_from_offset :: MachRep -> Int -> CmmExpr +get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr get_Regtable_addr_from_offset rep offset = #ifdef REG_Base CmmRegOff (CmmGlobal BaseReg) offset @@ -448,28 +447,28 @@ callerSaves :: GlobalReg -> Bool callerSaves BaseReg = True #endif #ifdef CALLER_SAVES_R1 -callerSaves (VanillaReg 1) = True +callerSaves (VanillaReg 1 _) = True #endif #ifdef CALLER_SAVES_R2 -callerSaves (VanillaReg 2) = True +callerSaves (VanillaReg 2 _) = True #endif #ifdef CALLER_SAVES_R3 -callerSaves (VanillaReg 3) = True +callerSaves (VanillaReg 3 _) = True #endif #ifdef CALLER_SAVES_R4 -callerSaves (VanillaReg 4) = True +callerSaves (VanillaReg 4 _) = True #endif #ifdef CALLER_SAVES_R5 -callerSaves (VanillaReg 5) = True +callerSaves (VanillaReg 5 _) = True #endif #ifdef CALLER_SAVES_R6 -callerSaves (VanillaReg 6) = True +callerSaves (VanillaReg 6 _) = True #endif #ifdef CALLER_SAVES_R7 -callerSaves (VanillaReg 7) = True +callerSaves (VanillaReg 7 _) = True #endif #ifdef CALLER_SAVES_R8 -callerSaves (VanillaReg 8) = True +callerSaves (VanillaReg 8 _) = True #endif #ifdef CALLER_SAVES_F1 callerSaves (FloatReg 1) = True @@ -518,16 +517,16 @@ callerSaves _ = False baseRegOffset :: GlobalReg -> Int -baseRegOffset (VanillaReg 1) = oFFSET_StgRegTable_rR1 -baseRegOffset (VanillaReg 2) = oFFSET_StgRegTable_rR2 -baseRegOffset (VanillaReg 3) = oFFSET_StgRegTable_rR3 -baseRegOffset (VanillaReg 4) = oFFSET_StgRegTable_rR4 -baseRegOffset (VanillaReg 5) = oFFSET_StgRegTable_rR5 -baseRegOffset (VanillaReg 6) = oFFSET_StgRegTable_rR6 -baseRegOffset (VanillaReg 7) = oFFSET_StgRegTable_rR7 -baseRegOffset (VanillaReg 8) = oFFSET_StgRegTable_rR8 -baseRegOffset (VanillaReg 9) = oFFSET_StgRegTable_rR9 -baseRegOffset (VanillaReg 10) = oFFSET_StgRegTable_rR10 +baseRegOffset (VanillaReg 1 _) = oFFSET_StgRegTable_rR1 +baseRegOffset (VanillaReg 2 _) = oFFSET_StgRegTable_rR2 +baseRegOffset (VanillaReg 3 _) = oFFSET_StgRegTable_rR3 +baseRegOffset (VanillaReg 4 _) = oFFSET_StgRegTable_rR4 +baseRegOffset (VanillaReg 5 _) = oFFSET_StgRegTable_rR5 +baseRegOffset (VanillaReg 6 _) = oFFSET_StgRegTable_rR6 +baseRegOffset (VanillaReg 7 _) = oFFSET_StgRegTable_rR7 +baseRegOffset (VanillaReg 8 _) = oFFSET_StgRegTable_rR8 +baseRegOffset (VanillaReg 9 _) = oFFSET_StgRegTable_rR9 +baseRegOffset (VanillaReg 10 _) = oFFSET_StgRegTable_rR10 baseRegOffset (FloatReg 1) = oFFSET_StgRegTable_rF1 baseRegOffset (FloatReg 2) = oFFSET_StgRegTable_rF2 baseRegOffset (FloatReg 3) = oFFSET_StgRegTable_rF3 @@ -565,15 +564,15 @@ mkDataLits :: CLabel -> [CmmLit] -> GenCmmTop CmmStatic info graph mkDataLits lbl lits = CmmData Data (CmmDataLabel lbl : map CmmStaticLit lits) -emitRODataLits :: CLabel -> [CmmLit] -> Code +emitRODataLits :: String -> CLabel -> [CmmLit] -> Code -- Emit a read-only data block -emitRODataLits lbl lits +emitRODataLits caller lbl lits = emitData section (CmmDataLabel lbl : map CmmStaticLit lits) - where section | any needsRelocation lits = RelocatableReadOnlyData - | otherwise = ReadOnlyData - needsRelocation (CmmLabel _) = True - needsRelocation (CmmLabelOff _ _) = True - needsRelocation _ = False + where section | any needsRelocation lits = RelocatableReadOnlyData + | otherwise = ReadOnlyData + needsRelocation (CmmLabel _) = True + needsRelocation (CmmLabelOff _ _) = True + needsRelocation _ = False mkRODataLits :: CLabel -> [CmmLit] -> GenCmmTop CmmStatic info graph mkRODataLits lbl lits @@ -602,30 +601,17 @@ mkByteStringCLit bytes -- ------------------------------------------------------------------------- -assignNonPtrTemp :: CmmExpr -> FCode CmmExpr --- For a non-trivial expression, e, create a local --- variable and assign the expression to it -assignNonPtrTemp e - | isTrivialCmmExpr e = return e - | otherwise = do { reg <- newNonPtrTemp (cmmExprRep e) - ; stmtC (CmmAssign (CmmLocal reg) e) - ; return (CmmReg (CmmLocal reg)) } - -assignPtrTemp :: CmmExpr -> FCode CmmExpr +assignTemp :: CmmExpr -> FCode CmmExpr -- For a non-trivial expression, e, create a local -- variable and assign the expression to it -assignPtrTemp e +assignTemp e | isTrivialCmmExpr e = return e - | otherwise = do { reg <- newPtrTemp (cmmExprRep e) + | otherwise = do { reg <- newTemp (cmmExprType e) ; stmtC (CmmAssign (CmmLocal reg) e) ; return (CmmReg (CmmLocal reg)) } -newNonPtrTemp :: MachRep -> FCode LocalReg -newNonPtrTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep GCKindNonPtr) } - -newPtrTemp :: MachRep -> FCode LocalReg -newPtrTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep GCKindPtr) } - +newTemp :: CmmType -> FCode LocalReg +newTemp rep = do { uniq <- newUnique; return (LocalReg uniq rep) } ------------------------------------------------------------------------- -- @@ -727,7 +713,7 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C -- if we can knock off a bunch of default cases with one if, then do so | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches - = do { (assign_tag, tag_expr') <- assignNonPtrTemp' tag_expr + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit lowest_branch)) branch = CmmCondBranch cond deflt ; stmts <- mk_switch tag_expr' branches mb_deflt @@ -736,7 +722,7 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C } | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches - = do { (assign_tag, tag_expr') <- assignNonPtrTemp' tag_expr + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr ; let cond = cmmUGtWord tag_expr' (CmmLit (mkIntCLit highest_branch)) branch = CmmCondBranch cond deflt ; stmts <- mk_switch tag_expr' branches mb_deflt @@ -745,7 +731,7 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C } | otherwise -- Use an if-tree - = do { (assign_tag, tag_expr') <- assignNonPtrTemp' tag_expr + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr -- To avoid duplication ; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt lo_tag (mid_tag-1) via_C @@ -810,9 +796,9 @@ mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C is_lo (t,_) = t < mid_tag -assignNonPtrTemp' e +assignTemp' e | isTrivialCmmExpr e = return (CmmNop, e) - | otherwise = do { reg <- newNonPtrTemp (cmmExprRep e) + | otherwise = do { reg <- newTemp (cmmExprType e) ; return (CmmAssign (CmmLocal reg) e, CmmReg (CmmLocal reg)) } emitLitSwitch :: CmmExpr -- Tag to switch on @@ -828,7 +814,7 @@ emitLitSwitch :: CmmExpr -- Tag to switch on emitLitSwitch scrut [] deflt = emitCgStmts deflt emitLitSwitch scrut branches deflt_blk - = do { scrut' <- assignNonPtrTemp scrut + = do { scrut' <- assignTemp scrut ; deflt_blk_id <- forkCgStmts deflt_blk ; blk <- mk_lit_switch scrut' deflt_blk_id (sortLe le branches) ; emitCgStmts blk } @@ -842,8 +828,9 @@ mk_lit_switch scrut deflt_blk_id [(lit,blk)] = return (consCgStmt if_stmt blk) where cmm_lit = mkSimpleLit lit - rep = cmmLitRep cmm_lit - cond = CmmMachOp (MO_Ne rep) [scrut, CmmLit cmm_lit] + rep = cmmLitType cmm_lit + ne = if isFloatType rep then MO_F_Ne else MO_Ne + cond = CmmMachOp (ne (typeWidth rep)) [scrut, CmmLit cmm_lit] if_stmt = CmmCondBranch cond deflt_blk_id mk_lit_switch scrut deflt_blk_id branches @@ -920,11 +907,11 @@ doSimultaneously1 vertices ; stmtC from_temp } go_via_temp (CmmAssign dest src) - = do { tmp <- newNonPtrTemp (cmmRegRep dest) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong + = do { tmp <- newTemp (cmmRegType dest) -- TODO FIXME NOW if the pair of assignments move across a call this will be wrong ; stmtC (CmmAssign (CmmLocal tmp) src) ; return (CmmAssign dest (CmmReg (CmmLocal tmp))) } go_via_temp (CmmStore dest src) - = do { tmp <- newNonPtrTemp (cmmExprRep src) -- TODO FIXME NOW if the pair of assignemnts move across a call this will be wrong + = do { tmp <- newTemp (cmmExprType src) -- TODO FIXME NOW if the pair of assignemnts move across a call this will be wrong ; stmtC (CmmAssign (CmmLocal tmp) src) ; return (CmmStore dest (CmmReg (CmmLocal tmp))) } in @@ -932,7 +919,7 @@ doSimultaneously1 vertices mustFollow :: CmmStmt -> CmmStmt -> Bool CmmAssign reg _ `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt -CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprRep e)) stmt +CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprType e)) stmt CmmNop `mustFollow` stmt = False CmmComment _ `mustFollow` stmt = False @@ -952,7 +939,7 @@ reg `regUsedIn` CmmReg reg' = reg == reg' reg `regUsedIn` CmmRegOff reg' _ = reg == reg' reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es -locUsedIn :: CmmExpr -> MachRep -> CmmExpr -> Bool +locUsedIn :: CmmExpr -> CmmType -> CmmExpr -> Bool -- (locUsedIn a r e) checks whether writing to r[a] could affect the value of -- 'e'. Returns True if it's not sure. locUsedIn loc rep (CmmLit _) = False @@ -961,7 +948,7 @@ locUsedIn loc rep (CmmReg reg') = False locUsedIn loc rep (CmmRegOff reg' _) = False locUsedIn loc rep (CmmMachOp _ es) = any (locUsedIn loc rep) es -possiblySameLoc :: CmmExpr -> MachRep -> CmmExpr -> MachRep -> Bool +possiblySameLoc :: CmmExpr -> CmmType -> CmmExpr -> CmmType -> Bool -- Assumes that distinct registers (eg Hp, Sp) do not -- point to the same location, nor any offset thereof. possiblySameLoc (CmmReg r1) rep1 (CmmReg r2) rep2 = r1==r2 @@ -970,8 +957,8 @@ possiblySameLoc (CmmRegOff r1 0) rep1 (CmmReg r2) rep2 = r1==r2 possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2 = r1==r2 && end1 > start2 && end2 > start1 where - end1 = start1 + machRepByteWidth rep1 - end2 = start2 + machRepByteWidth rep2 + end1 = start1 + widthInBytes (typeWidth rep1) + end2 = start2 + widthInBytes (typeWidth rep2) possiblySameLoc l1 rep1 (CmmLit _) rep2 = False possiblySameLoc l1 rep1 l2 rep2 = True -- Conservative @@ -999,7 +986,7 @@ getSRTInfo = do | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape] -> do id <- newUnique let srt_desc_lbl = mkLargeSRTLabel id - emitRODataLits srt_desc_lbl + emitRODataLits "getSRTInfo" srt_desc_lbl ( cmmLabelOffW srt_lbl off : mkWordCLit (fromIntegral len) : map mkWordCLit bmp) @@ -1011,3 +998,9 @@ getSRTInfo = do -- The fromIntegral converts to StgHalfWord srt_escape = (-1) :: StgHalfWord + +clHasCafRefs :: ClosureInfo -> CafInfo +clHasCafRefs (ClosureInfo {closureSRT = srt}) = + case srt of NoC_SRT -> NoCafRefs + _ -> MayHaveCafRefs +clHasCafRefs (ConInfo {}) = NoCafRefs diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs index dcb41b4cc4..df32299c2a 100644 --- a/compiler/codeGen/ClosureInfo.lhs +++ b/compiler/codeGen/ClosureInfo.lhs @@ -37,7 +37,7 @@ module ClosureInfo ( slopSize, closureName, infoTableLabelFromCI, - closureLabelFromCI, closureSRT, + closureLabelFromCI, closureLFInfo, isLFThunk,closureSMRep, closureUpdReqd, closureNeedsUpdSpace, closureIsThunk, closureSingleEntry, closureReEntrant, isConstrClosure_maybe, @@ -76,6 +76,7 @@ import Packages import PackageConfig import StaticFlags import Id +import IdInfo import DataCon import Name import OccName @@ -576,28 +577,29 @@ data CallMethod Int -- Its arity getCallMethod :: Name -- Function being applied + -> CafInfo -- Can it refer to CAF's? -> LambdaFormInfo -- Its info -> Int -- Number of available arguments -> CallMethod -getCallMethod name lf_info n_args +getCallMethod name _ lf_info n_args | nodeMustPointToIt lf_info && opt_Parallel = -- If we're parallel, then we must always enter via node. -- The reason is that the closure may have been -- fetched since we allocated it. EnterIt -getCallMethod name (LFReEntrant _ arity _ _) n_args +getCallMethod name caf (LFReEntrant _ arity _ _) n_args | n_args == 0 = ASSERT( arity /= 0 ) ReturnIt -- No args at all | n_args < arity = SlowCall -- Not enough args - | otherwise = DirectEntry (enterIdLabel name) arity + | otherwise = DirectEntry (enterIdLabel name caf) arity -getCallMethod name (LFCon con) n_args +getCallMethod name _ (LFCon con) n_args = ASSERT( n_args == 0 ) ReturnCon con -getCallMethod name (LFThunk _ _ updatable std_form_info is_fun) n_args +getCallMethod name caf (LFThunk _ _ updatable std_form_info is_fun) n_args | is_fun -- it *might* be a function, so we must "call" it (which is -- always safe) = SlowCall -- We cannot just enter it [in eval/apply, the entry code @@ -620,12 +622,12 @@ getCallMethod name (LFThunk _ _ updatable std_form_info is_fun) n_args | otherwise -- Jump direct to code for single-entry thunks = ASSERT( n_args == 0 ) - JumpToIt (thunkEntryLabel name std_form_info updatable) + JumpToIt (thunkEntryLabel name caf std_form_info updatable) -getCallMethod name (LFUnknown True) n_args +getCallMethod name _ (LFUnknown True) n_args = SlowCall -- Might be a function -getCallMethod name (LFUnknown False) n_args +getCallMethod name _ (LFUnknown False) n_args | n_args > 0 = WARN( True, ppr name <+> ppr n_args ) SlowCall -- Note [Unsafe coerce complications] @@ -633,15 +635,15 @@ getCallMethod name (LFUnknown False) n_args | otherwise = EnterIt -- Not a function -getCallMethod name (LFBlackHole _) n_args +getCallMethod name _ (LFBlackHole _) n_args = SlowCall -- Presumably the black hole has by now -- been updated, but we don't know with -- what, so we slow call it -getCallMethod name (LFLetNoEscape 0) n_args +getCallMethod name _ (LFLetNoEscape 0) n_args = JumpToIt (enterReturnPtLabel (nameUnique name)) -getCallMethod name (LFLetNoEscape arity) n_args +getCallMethod name _ (LFLetNoEscape arity) n_args | n_args == arity = DirectEntry (enterReturnPtLabel (nameUnique name)) arity | otherwise = pprPanic "let-no-escape: " (ppr name <+> ppr arity) @@ -882,10 +884,10 @@ isToplevClosure _ = False Label generation. \begin{code} -infoTableLabelFromCI :: ClosureInfo -> CLabel +infoTableLabelFromCI :: ClosureInfo -> CafInfo -> CLabel infoTableLabelFromCI (ClosureInfo { closureName = name, closureLFInfo = lf_info, - closureSMRep = rep }) + closureSMRep = rep }) caf = case lf_info of LFBlackHole info -> info @@ -895,32 +897,32 @@ infoTableLabelFromCI (ClosureInfo { closureName = name, LFThunk _ _ upd_flag (ApThunk arity) _ -> mkApInfoTableLabel upd_flag arity - LFThunk{} -> mkLocalInfoTableLabel name + LFThunk{} -> mkLocalInfoTableLabel name caf - LFReEntrant _ _ _ _ -> mkLocalInfoTableLabel name + LFReEntrant _ _ _ _ -> mkLocalInfoTableLabel name caf other -> panic "infoTableLabelFromCI" infoTableLabelFromCI (ConInfo { closureCon = con, - closureSMRep = rep }) - | isStaticRep rep = mkStaticInfoTableLabel name - | otherwise = mkConInfoTableLabel name + closureSMRep = rep }) caf + | isStaticRep rep = mkStaticInfoTableLabel name caf + | otherwise = mkConInfoTableLabel name caf where name = dataConName con -- ClosureInfo for a closure (as opposed to a constructor) is always local -closureLabelFromCI (ClosureInfo { closureName = nm }) = mkLocalClosureLabel nm -closureLabelFromCI _ = panic "closureLabelFromCI" +closureLabelFromCI (ClosureInfo { closureName = nm }) caf = mkLocalClosureLabel nm caf +closureLabelFromCI _ _ = panic "closureLabelFromCI" -- thunkEntryLabel is a local help function, not exported. It's used from both -- entryLabelFromCI and getCallMethod. -thunkEntryLabel thunk_id (ApThunk arity) is_updatable +thunkEntryLabel thunk_id _ (ApThunk arity) is_updatable = enterApLabel is_updatable arity -thunkEntryLabel thunk_id (SelectorThunk offset) upd_flag +thunkEntryLabel thunk_id _ (SelectorThunk offset) upd_flag = enterSelectorLabel upd_flag offset -thunkEntryLabel thunk_id _ is_updatable - = enterIdLabel thunk_id +thunkEntryLabel thunk_id caf _ is_updatable + = enterIdLabel thunk_id caf enterApLabel is_updatable arity | tablesNextToCode = mkApInfoTableLabel is_updatable arity diff --git a/compiler/codeGen/CodeGen.lhs b/compiler/codeGen/CodeGen.lhs index 4221342d4f..14d745780d 100644 --- a/compiler/codeGen/CodeGen.lhs +++ b/compiler/codeGen/CodeGen.lhs @@ -38,7 +38,6 @@ import CLabel import Cmm import CmmUtils import PprCmm -import MachOp import StgSyn import PrelNames @@ -51,6 +50,7 @@ import CostCentre import Id import Name import OccName +import Outputable import TyCon import Module import ErrUtils @@ -198,7 +198,7 @@ mkModuleInit way cost_centre_info this_mod main_mod imported_mods hpc_info jump_to_init = stmtC (CmmJump (mkLblExpr real_init_lbl) []) - mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) wordRep + mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) bWord -- Main refers to GHC.TopHandler.runIO, so make sure we call the -- init function for GHC.TopHandler. @@ -224,7 +224,7 @@ mkModuleInit way cost_centre_info this_mod main_mod imported_mods hpc_info -- The return-code pops the work stack by -- incrementing Sp, and then jumpd to the popped item ret_code = stmtsC [ CmmAssign spReg (cmmRegOffW spReg 1) - , CmmJump (CmmLoad (cmmRegOffW spReg (-1)) wordRep) [] ] + , CmmJump (CmmLoad (cmmRegOffW spReg (-1)) bWord) [] ] rec_descent_init = if opt_SccProfilingOn || isHpcUsed hpc_info @@ -303,8 +303,8 @@ mkSRT these (id,[]) = nopC mkSRT these (id,ids) = do { ids <- mapFCs remap ids ; id <- remap id - ; emitRODataLits (mkSRTLabel (idName id)) - (map (CmmLabel . mkClosureLabel . idName) ids) + ; emitRODataLits "CodeGen.mkSRT" (mkSRTLabel (idName id) (idCafInfo id)) + (map (\id -> CmmLabel $ mkClosureLabel (idName id) (idCafInfo id)) ids) } where -- Sigh, better map all the ids against the environment in @@ -326,7 +326,7 @@ cgTopRhs bndr (StgRhsCon cc con args) cgTopRhs bndr (StgRhsClosure cc bi fvs upd_flag srt args body) = ASSERT(null fvs) -- There should be no free variables - setSRTLabel (mkSRTLabel (idName bndr)) $ + setSRTLabel (mkSRTLabel (idName bndr) (idCafInfo bndr)) $ setSRT srt $ forkStatics (cgTopRhsClosure bndr cc bi upd_flag args body) \end{code} diff --git a/compiler/codeGen/SMRep.lhs b/compiler/codeGen/SMRep.lhs index 28d17079e5..987562c364 100644 --- a/compiler/codeGen/SMRep.lhs +++ b/compiler/codeGen/SMRep.lhs @@ -24,14 +24,15 @@ module SMRep ( -- Argument/return representations CgRep(..), nonVoidArg, - argMachRep, primRepToCgRep, primRepHint, + argMachRep, primRepToCgRep, +-- Temp primRepHint, typeHint, isFollowableArg, isVoidArg, isFloatingArg, is64BitArg, separateByPtrFollowness, cgRepSizeW, cgRepSizeB, retAddrSizeW, - typeCgRep, idCgRep, tyConCgRep, typeHint, + typeCgRep, idCgRep, tyConCgRep, -- Closure repesentation SMRep(..), ClosureType(..), @@ -45,10 +46,10 @@ module SMRep ( #include "../includes/MachDeps.h" +import CmmExpr -- CmmType and friends import Id import Type import TyCon -import MachOp import StaticFlags import Constants import Outputable @@ -136,12 +137,12 @@ instance Outputable CgRep where ppr FloatArg = ptext (sLit "F_") ppr DoubleArg = ptext (sLit "D_") -argMachRep :: CgRep -> MachRep -argMachRep PtrArg = wordRep -argMachRep NonPtrArg = wordRep -argMachRep LongArg = I64 -argMachRep FloatArg = F32 -argMachRep DoubleArg = F64 +argMachRep :: CgRep -> CmmType +argMachRep PtrArg = gcWord +argMachRep NonPtrArg = bWord +argMachRep LongArg = b64 +argMachRep FloatArg = f32 +argMachRep DoubleArg = f64 argMachRep VoidArg = panic "argMachRep:VoidRep" primRepToCgRep :: PrimRep -> CgRep @@ -155,17 +156,6 @@ primRepToCgRep AddrRep = NonPtrArg primRepToCgRep FloatRep = FloatArg primRepToCgRep DoubleRep = DoubleArg -primRepHint :: PrimRep -> MachHint -primRepHint VoidRep = panic "primRepHint:VoidRep" -primRepHint PtrRep = PtrHint -primRepHint IntRep = SignedHint -primRepHint WordRep = NoHint -primRepHint Int64Rep = SignedHint -primRepHint Word64Rep = NoHint -primRepHint AddrRep = PtrHint -- NB! PtrHint, but NonPtrArg -primRepHint FloatRep = FloatHint -primRepHint DoubleRep = FloatHint - idCgRep :: Id -> CgRep idCgRep x = typeCgRep . idType $ x @@ -174,9 +164,6 @@ tyConCgRep = primRepToCgRep . tyConPrimRep typeCgRep :: Type -> CgRep typeCgRep = primRepToCgRep . typePrimRep - -typeHint :: Type -> MachHint -typeHint = primRepHint . typePrimRep \end{code} Whether or not the thing is a pointer that the garbage-collector diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs new file mode 100644 index 0000000000..56cd1d5555 --- /dev/null +++ b/compiler/codeGen/StgCmm.hs @@ -0,0 +1,400 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmm ( codeGen ) where + +#define FAST_STRING_NOT_NEEDED +#include "HsVersions.h" + +import StgCmmProf +import StgCmmMonad +import StgCmmEnv +import StgCmmBind +import StgCmmCon +import StgCmmLayout +import StgCmmHeap +import StgCmmUtils +import StgCmmClosure +import StgCmmHpc +import StgCmmTicky + +import MkZipCfgCmm +import Cmm +import CmmUtils +import CLabel +import PprCmm + +import StgSyn +import PrelNames +import DynFlags +import StaticFlags + +import HscTypes +import CostCentre +import Id +import IdInfo +import Type +import DataCon +import Name +import OccName +import TyCon +import Module +import ErrUtils +import Outputable + +codeGen :: DynFlags + -> Module + -> [TyCon] + -> [Module] -- Directly-imported modules + -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering. + -> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs + -> HpcInfo + -> IO [CmmZ] -- Output + +codeGen dflags this_mod data_tycons imported_mods + cost_centre_info stg_binds hpc_info + = do { showPass dflags "New CodeGen" + ; let way = buildTag dflags + main_mod = mainModIs dflags + +-- Why? +-- ; mapM_ (\x -> seq x (return ())) data_tycons + + ; code_stuff <- initC dflags this_mod $ do + { cmm_binds <- mapM (getCmm . cgTopBinding dflags) stg_binds + ; cmm_tycons <- mapM cgTyCon data_tycons + ; cmm_init <- getCmm (mkModuleInit way cost_centre_info + this_mod main_mod + imported_mods hpc_info) + ; return (cmm_binds ++ concat cmm_tycons ++ [cmm_init]) + } + -- Put datatype_stuff after code_stuff, because the + -- datatype closure table (for enumeration types) to + -- (say) PrelBase_True_closure, which is defined in + -- code_stuff + + -- N.B. returning '[Cmm]' and not 'Cmm' here makes it + -- possible for object splitting to split up the + -- pieces later. + + ; dumpIfSet_dyn dflags Opt_D_dump_cmmz "New Cmm" (pprCmms code_stuff) + + ; return code_stuff } + + +--------------------------------------------------------------- +-- Top-level bindings +--------------------------------------------------------------- + +{- 'cgTopBinding' is only used for top-level bindings, since they need +to be allocated statically (not in the heap) and need to be labelled. +No unboxed bindings can happen at top level. + +In the code below, the static bindings are accumulated in the +@MkCgState@, and transferred into the ``statics'' slot by @forkStatics@. +This is so that we can write the top level processing in a compositional +style, with the increasing static environment being plumbed as a state +variable. -} + +cgTopBinding :: DynFlags -> (StgBinding,[(Id,[Id])]) -> FCode () +cgTopBinding dflags (StgNonRec id rhs, _srts) + = do { id' <- maybeExternaliseId dflags id + --; mapM_ (mkSRT [id']) srts + ; (id,info) <- cgTopRhs id' rhs + ; addBindC id info -- Add the *un-externalised* Id to the envt, + -- so we find it when we look up occurrences + } + +cgTopBinding dflags (StgRec pairs, _srts) + = do { let (bndrs, rhss) = unzip pairs + ; bndrs' <- mapFCs (maybeExternaliseId dflags) bndrs + ; let pairs' = zip bndrs' rhss + --; mapM_ (mkSRT bndrs') srts + ; fixC (\ new_binds -> do + { addBindsC new_binds + ; mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' }) + ; return () } + +--mkSRT :: [Id] -> (Id,[Id]) -> FCode () +--mkSRT these (id,ids) +-- | null ids = nopC +-- | otherwise +-- = do { ids <- mapFCs remap ids +-- ; id <- remap id +-- ; emitRODataLits (mkSRTLabel (idName id) (idCafInfo id)) +-- (map (\id -> CmmLabel $ mkClosureLabel (idName id) (idCafInfo id)) ids) +-- } +-- where +-- -- Sigh, better map all the ids against the environment in +-- -- case they've been externalised (see maybeExternaliseId below). +-- remap id = case filter (==id) these of +-- (id':_) -> returnFC id' +-- [] -> do { info <- getCgIdInfo id; return (cgIdInfoId info) } + +-- Urgh! I tried moving the forkStatics call from the rhss of cgTopRhs +-- to enclose the listFCs in cgTopBinding, but that tickled the +-- statics "error" call in initC. I DON'T UNDERSTAND WHY! + +cgTopRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo) + -- The Id is passed along for setting up a binding... + -- It's already been externalised if necessary + +cgTopRhs bndr (StgRhsCon _cc con args) + = forkStatics (cgTopRhsCon bndr con args) + +cgTopRhs bndr (StgRhsClosure cc bi fvs upd_flag srt args body) + = ASSERT(null fvs) -- There should be no free variables + setSRTLabel (mkSRTLabel (idName bndr) (idCafInfo bndr)) $ + forkStatics (cgTopRhsClosure bndr cc bi upd_flag srt args body) + + + +--------------------------------------------------------------- +-- Module initialisation code +--------------------------------------------------------------- + +{- The module initialisation code looks like this, roughly: + + FN(__stginit_Foo) { + JMP_(__stginit_Foo_1_p) + } + + FN(__stginit_Foo_1_p) { + ... + } + + We have one version of the init code with a module version and the + 'way' attached to it. The version number helps to catch cases + where modules are not compiled in dependency order before being + linked: if a module has been compiled since any modules which depend on + it, then the latter modules will refer to a different version in their + init blocks and a link error will ensue. + + The 'way' suffix helps to catch cases where modules compiled in different + ways are linked together (eg. profiled and non-profiled). + + We provide a plain, unadorned, version of the module init code + which just jumps to the version with the label and way attached. The + reason for this is that when using foreign exports, the caller of + startupHaskell() must supply the name of the init function for the "top" + module in the program, and we don't want to require that this name + has the version and way info appended to it. + +We initialise the module tree by keeping a work-stack, + * pointed to by Sp + * that grows downward + * Sp points to the last occupied slot +-} + +mkModuleInit + :: String -- the "way" + -> CollectedCCs -- cost centre info + -> Module + -> Module -- name of the Main module + -> [Module] + -> HpcInfo + -> FCode () +mkModuleInit way cost_centre_info this_mod main_mod imported_mods hpc_info + = do { -- Allocate the static boolean that records if this + -- module has been registered already + emitData Data [CmmDataLabel moduleRegdLabel, + CmmStaticLit zeroCLit] + + ; init_hpc <- initHpc this_mod hpc_info + ; init_prof <- initCostCentres cost_centre_info + + -- We emit a recursive descent module search for all modules + -- and *choose* to chase it in :Main, below. + -- In this way, Hpc enabled modules can interact seamlessly with + -- not Hpc enabled moduled, provided Main is compiled with Hpc. + + ; emitSimpleProc real_init_lbl $ withFreshLabel "ret_block" $ \retId -> catAGraphs + [ check_already_done retId + , init_prof + , init_hpc + , catAGraphs $ map (registerImport way) all_imported_mods + , mkBranch retId ] + -- Make the "plain" procedure jump to the "real" init procedure + ; emitSimpleProc plain_init_lbl jump_to_init + + -- When compiling the module in which the 'main' function lives, + -- (that is, this_mod == main_mod) + -- we inject an extra stg_init procedure for stg_init_ZCMain, for the + -- RTS to invoke. We must consult the -main-is flag in case the + -- user specified a different function to Main.main + + -- Notice that the recursive descent is optional, depending on what options + -- are enabled. + + + ; whenC (this_mod == main_mod) + (emitSimpleProc plain_main_init_lbl rec_descent_init) + } + where + plain_init_lbl = mkPlainModuleInitLabel this_mod + real_init_lbl = mkModuleInitLabel this_mod way + plain_main_init_lbl = mkPlainModuleInitLabel rOOT_MAIN + + jump_to_init = mkJump (mkLblExpr real_init_lbl) [] + + + -- Main refers to GHC.TopHandler.runIO, so make sure we call the + -- init function for GHC.TopHandler. + extra_imported_mods + | this_mod == main_mod = [gHC_TOP_HANDLER] + | otherwise = [] + all_imported_mods = imported_mods ++ extra_imported_mods + + mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) bWord + check_already_done retId + = mkCmmIfThenElse (cmmNeWord (CmmLit zeroCLit) mod_reg_val) + (mkLabel retId Nothing <*> mkReturn []) mkNop + <*> -- Set mod_reg to 1 to record that we've been here + mkStore (mkLblExpr moduleRegdLabel) (CmmLit (mkIntCLit 1)) + + -- The return-code pops the work stack by + -- incrementing Sp, and then jumpd to the popped item + ret_code = mkAssign spReg (cmmRegOffW spReg 1) + <*> mkJump (CmmLoad (cmmRegOffW spReg (-1)) bWord) [] + + rec_descent_init = if opt_SccProfilingOn || isHpcUsed hpc_info + then jump_to_init + else ret_code + +----------------------- +registerImport :: String -> Module -> CmmAGraph +registerImport way mod + | mod == gHC_PRIM + = mkNop + | otherwise -- Push the init procedure onto the work stack + = mkCmmCall init_lbl [] [] NoC_SRT + where + init_lbl = mkLblExpr $ mkModuleInitLabel mod way + + + +--------------------------------------------------------------- +-- Generating static stuff for algebraic data types +--------------------------------------------------------------- + +{- [These comments are rather out of date] + +Macro Kind of constructor +CONST_INFO_TABLE@ Zero arity (no info -- compiler uses static closure) +CHARLIKE_INFO_TABLE Charlike (no info -- compiler indexes fixed array) +INTLIKE_INFO_TABLE Intlike; the one macro generates both info tbls +SPEC_INFO_TABLE SPECish, and bigger than or equal to MIN_UPD_SIZE +GEN_INFO_TABLE GENish (hence bigger than or equal to MIN_UPD_SIZE@) + +Possible info tables for constructor con: + +* _con_info: + Used for dynamically let(rec)-bound occurrences of + the constructor, and for updates. For constructors + which are int-like, char-like or nullary, when GC occurs, + the closure tries to get rid of itself. + +* _static_info: + Static occurrences of the constructor macro: STATIC_INFO_TABLE. + +For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure; +it's place is taken by the top level defn of the constructor. + +For charlike and intlike closures there is a fixed array of static +closures predeclared. +-} + +cgTyCon :: TyCon -> FCode [CmmZ] -- All constructors merged together +cgTyCon tycon + = do { constrs <- mapM (getCmm . cgDataCon) (tyConDataCons tycon) + + -- Generate a table of static closures for an enumeration type + -- Put the table after the data constructor decls, because the + -- datatype closure table (for enumeration types) + -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff + -- Note that the closure pointers are tagged. + + -- N.B. comment says to put table after constructor decls, but + -- code puts it before --- NR 16 Aug 2007 + ; extra <- cgEnumerationTyCon tycon + + ; return (extra ++ constrs) + } + +cgEnumerationTyCon :: TyCon -> FCode [CmmZ] +cgEnumerationTyCon tycon + | isEnumerationTyCon tycon + = do { tbl <- getCmm $ + emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs) + [ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs) + (tagForCon con) + | con <- tyConDataCons tycon] + ; return [tbl] } + | otherwise + = return [] + +cgDataCon :: DataCon -> FCode () +-- Generate the entry code, info tables, and (for niladic constructor) +-- the static closure, for a constructor. +cgDataCon data_con + = do { let + -- To allow the debuggers, interpreters, etc to cope with + -- static data structures (ie those built at compile + -- time), we take care that info-table contains the + -- information we need. + (static_cl_info, _) = layOutStaticConstr data_con arg_reps + (dyn_cl_info, arg_things) = layOutDynConstr data_con arg_reps + + emit_info cl_info ticky_code + = do { code_blks <- getCode (mk_code ticky_code) + ; emitClosureCodeAndInfoTable cl_info [] code_blks } + + mk_code ticky_code + = -- NB: We don't set CC when entering data (WDP 94/06) + do { ticky_code + ; ldvEnter (CmmReg nodeReg) + ; tickyReturnOldCon (length arg_things) + ; emitReturn [cmmOffsetB (CmmReg nodeReg) + (tagForCon data_con)] } + -- The case continuation code expects a tagged pointer + + arg_reps :: [(PrimRep, Type)] + arg_reps = [(typePrimRep ty, ty) | ty <- dataConRepArgTys data_con] + + -- Dynamic closure code for non-nullary constructors only + ; whenC (not (isNullaryRepDataCon data_con)) + (emit_info dyn_cl_info tickyEnterDynCon) + + -- Dynamic-Closure first, to reduce forward references + ; emit_info static_cl_info tickyEnterStaticCon } + + +--------------------------------------------------------------- +-- Stuff to support splitting +--------------------------------------------------------------- + +-- If we're splitting the object, we need to externalise all the +-- top-level names (and then make sure we only use the externalised +-- one in any C label we use which refers to this name). + +maybeExternaliseId :: DynFlags -> Id -> FCode Id +maybeExternaliseId dflags id + | dopt Opt_SplitObjs dflags, -- Externalise the name for -split-objs + isInternalName name = do { mod <- getModuleName + ; returnFC (setIdName id (externalise mod)) } + | otherwise = returnFC id + where + externalise mod = mkExternalName uniq mod new_occ loc + name = idName id + uniq = nameUnique name + new_occ = mkLocalOcc uniq (nameOccName name) + loc = nameSrcSpan name + -- We want to conjure up a name that can't clash with any + -- existing name. So we generate + -- Mod_$L243foo + -- where 243 is the unique. diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs new file mode 100644 index 0000000000..0e8d853969 --- /dev/null +++ b/compiler/codeGen/StgCmmBind.hs @@ -0,0 +1,615 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation: bindings +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmBind ( + cgTopRhsClosure, + cgBind, + emitBlackHoleCode + ) where + +#include "HsVersions.h" + +import StgCmmMonad +import StgCmmExpr +import StgCmmEnv +import StgCmmCon +import StgCmmHeap +import StgCmmProf +import StgCmmTicky +import StgCmmGran +import StgCmmLayout +import StgCmmUtils +import StgCmmClosure + +import MkZipCfgCmm +import CoreSyn ( AltCon(..) ) +import SMRep +import Cmm +import CmmUtils +import CLabel +import StgSyn +import CostCentre +import Id +import Name +import Module +import ListSetOps +import Util +import BasicTypes +import Constants +import Outputable +import FastString +import Maybes + +import Data.List + +------------------------------------------------------------------------ +-- Top-level bindings +------------------------------------------------------------------------ + +-- For closures bound at top level, allocate in static space. +-- They should have no free variables. + +cgTopRhsClosure :: Id + -> CostCentreStack -- Optional cost centre annotation + -> StgBinderInfo + -> UpdateFlag + -> SRT + -> [Id] -- Args + -> StgExpr + -> FCode (Id, CgIdInfo) + +cgTopRhsClosure id ccs binder_info upd_flag srt args body = do + { -- LAY OUT THE OBJECT + let name = idName id + ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args + ; srt_info <- getSRTInfo srt + ; mod_name <- getModuleName + ; let descr = closureDescription mod_name name + closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr + closure_label = mkLocalClosureLabel name (idCafInfo id) + cg_id_info = litIdInfo id lf_info (CmmLabel closure_label) + closure_rep = mkStaticClosureFields closure_info ccs True [] + + -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY) + ; emitDataLits closure_label closure_rep + ; forkClosureBody $ do + { node <- bindToReg id lf_info + ; closureCodeBody binder_info closure_info + ccs srt_info node args body } + + ; returnFC (id, cg_id_info) } + +------------------------------------------------------------------------ +-- Non-top-level bindings +------------------------------------------------------------------------ + +cgBind :: StgBinding -> FCode () +cgBind (StgNonRec name rhs) + = do { (name, info) <- cgRhs name rhs + ; addBindC name info } + +cgBind (StgRec pairs) + = do { new_binds <- fixC (\ new_binds -> + do { addBindsC new_binds + ; listFCs [ cgRhs b e | (b,e) <- pairs ] }) + ; addBindsC new_binds } + +-------------------- +cgRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo) + -- The Id is passed along so a binding can be set up + +cgRhs name (StgRhsCon maybe_cc con args) + = do { idinfo <- buildDynCon name maybe_cc con args + ; return (name, idinfo) } + +cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) + = mkRhsClosure name cc bi fvs upd_flag srt args body + +------------------------------------------------------------------------ +-- Non-constructor right hand sides +------------------------------------------------------------------------ + +mkRhsClosure :: Id -> CostCentreStack -> StgBinderInfo + -> [Id] -- Free vars + -> UpdateFlag -> SRT + -> [Id] -- Args + -> StgExpr + -> FCode (Id, CgIdInfo) + +{- mkRhsClosure looks for two special forms of the right-hand side: + a) selector thunks + b) AP thunks + +If neither happens, it just calls mkClosureLFInfo. You might think +that mkClosureLFInfo should do all this, but it seems wrong for the +latter to look at the structure of an expression + +Note [Selectors] +~~~~~~~~~~~~~~~~ +We look at the body of the closure to see if it's a selector---turgid, +but nothing deep. We are looking for a closure of {\em exactly} the +form: + +... = [the_fv] \ u [] -> + case the_fv of + con a_1 ... a_n -> a_i + +Note [Ap thunks] +~~~~~~~~~~~~~~~~ +A more generic AP thunk of the form + + x = [ x_1...x_n ] \.. [] -> x_1 ... x_n + +A set of these is compiled statically into the RTS, so we just use +those. We could extend the idea to thunks where some of the x_i are +global ids (and hence not free variables), but this would entail +generating a larger thunk. It might be an option for non-optimising +compilation, though. + +We only generate an Ap thunk if all the free variables are pointers, +for semi-obvious reasons. + +-} + +---------- Note [Selectors] ------------------ +mkRhsClosure bndr cc bi + [the_fv] -- Just one free var + upd_flag -- Updatable thunk + _srt + [] -- A thunk + body@(StgCase (StgApp scrutinee [{-no args-}]) + _ _ _ _ -- ignore uniq, etc. + (AlgAlt _) + [(DataAlt con, params, _use_mask, + (StgApp selectee [{-no args-}]))]) + | the_fv == scrutinee -- Scrutinee is the only free variable + && maybeToBool maybe_offset -- Selectee is a component of the tuple + && offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough + = -- NOT TRUE: ASSERT(is_single_constructor) + -- The simplifier may have statically determined that the single alternative + -- is the only possible case and eliminated the others, even if there are + -- other constructors in the datatype. It's still ok to make a selector + -- thunk in this case, because we *know* which constructor the scrutinee + -- will evaluate to. + -- + -- srt is discarded; it must be empty + cgStdThunk bndr cc bi body lf_info [StgVarArg the_fv] + where + lf_info = mkSelectorLFInfo bndr offset_into_int + (isUpdatable upd_flag) + (_, params_w_offsets) = layOutDynConstr con (addIdReps params) + -- Just want the layout + maybe_offset = assocMaybe params_w_offsets selectee + Just the_offset = maybe_offset + offset_into_int = the_offset - fixedHdrSize + +---------- Note [Ap thunks] ------------------ +mkRhsClosure bndr cc bi + fvs + upd_flag + _srt + [] -- No args; a thunk + body@(StgApp fun_id args) + + | args `lengthIs` (arity-1) + && all isFollowableArg (map idCgRep fvs) + && isUpdatable upd_flag + && arity <= mAX_SPEC_AP_SIZE + + -- Ha! an Ap thunk + = cgStdThunk bndr cc bi body lf_info payload + where + lf_info = mkApLFInfo bndr upd_flag arity + -- the payload has to be in the correct order, hence we can't + -- just use the fvs. + payload = StgVarArg fun_id : args + arity = length fvs + +---------- Default case ------------------ +mkRhsClosure bndr cc bi fvs upd_flag srt args body + = do { -- LAY OUT THE OBJECT + -- If the binder is itself a free variable, then don't store + -- it in the closure. Instead, just bind it to Node on entry. + -- NB we can be sure that Node will point to it, because we + -- havn't told mkClosureLFInfo about this; so if the binder + -- _was_ a free var of its RHS, mkClosureLFInfo thinks it *is* + -- stored in the closure itself, so it will make sure that + -- Node points to it... + ; let + is_elem = isIn "cgRhsClosure" + bndr_is_a_fv = bndr `is_elem` fvs + reduced_fvs | bndr_is_a_fv = fvs `minusList` [bndr] + | otherwise = fvs + + + -- MAKE CLOSURE INFO FOR THIS CLOSURE + ; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args + ; mod_name <- getModuleName + ; c_srt <- getSRTInfo srt + ; let name = idName bndr + descr = closureDescription mod_name name + fv_details :: [(Id, VirtualHpOffset)] + (tot_wds, ptr_wds, fv_details) + = mkVirtHeapOffsets (isLFThunk lf_info) + (addIdReps reduced_fvs) + closure_info = mkClosureInfo False -- Not static + bndr lf_info tot_wds ptr_wds + c_srt descr + + -- BUILD ITS INFO TABLE AND CODE + ; forkClosureBody $ do + { -- Bind the binder itself + -- It does no harm to have it in the envt even if + -- it's not a free variable; and we need a reg for it + node <- bindToReg bndr lf_info + + -- Bind the free variables + ; mapCs (bind_fv node) fv_details + + -- And compile the body + ; closureCodeBody bi closure_info cc c_srt node args body } + + -- BUILD THE OBJECT + ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body + ; emit (mkComment $ mkFastString "calling allocDynClosure") + ; tmp <- allocDynClosure closure_info use_cc blame_cc + (mapFst StgVarArg fv_details) + + -- RETURN + ; return (bndr, regIdInfo bndr lf_info tmp) } + where + -- A function closure pointer may be tagged, so we + -- must take it into account when accessing the free variables. + tag = tagForArity (length args) + + bind_fv node (id, off) + = do { reg <- rebindToReg id + ; emit $ mkTaggedObjectLoad reg node off tag } + +------------------------- +cgStdThunk + :: Id + -> CostCentreStack -- Optional cost centre annotation + -> StgBinderInfo -- XXX: not used?? + -> StgExpr + -> LambdaFormInfo + -> [StgArg] -- payload + -> FCode (Id, CgIdInfo) + +cgStdThunk bndr cc _bndr_info body lf_info payload + = do -- AHA! A STANDARD-FORM THUNK + { -- LAY OUT THE OBJECT + mod_name <- getModuleName + ; let (tot_wds, ptr_wds, payload_w_offsets) + = mkVirtHeapOffsets (isLFThunk lf_info) (addArgReps payload) + + descr = closureDescription mod_name (idName bndr) + closure_info = mkClosureInfo False -- Not static + bndr lf_info tot_wds ptr_wds + NoC_SRT -- No SRT for a std-form closure + descr + + ; (use_cc, blame_cc) <- chooseDynCostCentres cc [{- no args-}] body + + -- BUILD THE OBJECT + ; tmp <- allocDynClosure closure_info use_cc blame_cc payload_w_offsets + + -- RETURN + ; returnFC (bndr, regIdInfo bndr lf_info tmp) } + +mkClosureLFInfo :: Id -- The binder + -> TopLevelFlag -- True of top level + -> [Id] -- Free vars + -> UpdateFlag -- Update flag + -> [Id] -- Args + -> FCode LambdaFormInfo +mkClosureLFInfo bndr top fvs upd_flag args + | null args = return (mkLFThunk (idType bndr) top fvs upd_flag) + | otherwise = do { arg_descr <- mkArgDescr (idName bndr) args + ; return (mkLFReEntrant top fvs args arg_descr) } + + +------------------------------------------------------------------------ +-- The code for closures} +------------------------------------------------------------------------ + +closureCodeBody :: StgBinderInfo -- XXX: unused? + -> ClosureInfo -- Lots of information about this closure + -> CostCentreStack -- Optional cost centre attached to closure + -> C_SRT + -> LocalReg -- The closure itself; first argument + -- The Id is in scope already, bound to this reg + -> [Id] + -> StgExpr + -> FCode () + +{- There are two main cases for the code for closures. + +* If there are *no arguments*, then the closure is a thunk, and not in + normal form. So it should set up an update frame (if it is + shared). NB: Thunks cannot have a primitive type! + +* If there is *at least one* argument, then this closure is in + normal form, so there is no need to set up an update frame. + + The Macros for GrAnSim are produced at the beginning of the + argSatisfactionCheck (by calling fetchAndReschedule). + There info if Node points to closure is available. -- HWL -} + +closureCodeBody _binder_info cl_info cc srt node args body + | null args -- No args i.e. thunk + = do { code <- getCode $ thunkCode cl_info cc srt node body + ; emitClosureCodeAndInfoTable cl_info [node] code } + +closureCodeBody _binder_info cl_info cc srt node args body + = ASSERT( length args > 0 ) + do { -- Allocate the global ticky counter, + -- and establish the ticky-counter + -- label for this block + let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info) $ clHasCafRefs cl_info + ; emitTickyCounter cl_info args + ; setTickyCtrLabel ticky_ctr_lbl $ do + +-- -- XXX: no slow-entry code for now +-- -- Emit the slow-entry code +-- { reg_save_code <- mkSlowEntryCode cl_info reg_args + + -- Emit the main entry code + ; let node_points = nodeMustPointToIt (closureLFInfo cl_info) + ; arg_regs <- bindArgsToRegs args + ; blks <- forkProc $ getCode $ do + { enterCostCentre cl_info cc body + ; tickyEnterFun cl_info + ; whenC node_points (ldvEnterClosure cl_info) + ; granYield arg_regs node_points + + -- Main payload + ; entryHeapCheck node arg_regs srt $ + cgExpr body } + + ; emitClosureCodeAndInfoTable cl_info (node:arg_regs) blks + } + +{- +----------------------------------------- +-- The "slow entry" code for a function. This entry point takes its +-- arguments on the stack. It loads the arguments into registers +-- according to the calling convention, and jumps to the function's +-- normal entry point. The function's closure is assumed to be in +-- R1/node. +-- +-- The slow entry point is used in two places: +-- +-- (a) unknown calls: eg. stg_PAP_entry +-- (b) returning from a heap-check failure + +mkSlowEntryCode :: ClosureInfo -> [(Id,GlobalReg)] -> FCode CmmStmts +-- If this function doesn't have a specialised ArgDescr, we need +-- to generate the function's arg bitmap, slow-entry code, and +-- register-save code for the heap-check failure +-- Here, we emit the slow-entry code, and +-- return the register-save assignments +mkSlowEntryCode cl_info reg_args + | Just (_, ArgGen _) <- closureFunInfo cl_info + = do { emitSimpleProc slow_lbl (emitStmts load_stmts) + ; return save_stmts } + | otherwise = return noStmts + where + name = closureName cl_info + slow_lbl = mkSlowEntryLabel name + + load_stmts = mkStmts load_assts `plusStmts` mkStmts [stk_adj_pop, jump_to_entry] + save_stmts = oneStmt stk_adj_push `plusStmts` mkStmts save_assts + + reps_w_regs :: [(CgRep,GlobalReg)] + reps_w_regs = [(idCgRep id, reg) | (id,reg) <- reverse reg_args] + (final_stk_offset, stk_offsets) + = mapAccumL (\off (rep,_) -> (off + cgRepSizeW rep, off)) + 0 reps_w_regs + + load_assts = zipWithEqual "mk_load" mk_load reps_w_regs stk_offsets + mk_load (rep,reg) offset = CmmAssign (CmmGlobal reg) + (CmmLoad (cmmRegOffW spReg offset) + (argMachRep rep)) + + save_assts = zipWithEqual "mk_save" mk_save reps_w_regs stk_offsets + mk_save (rep,reg) offset = ASSERT( argMachRep rep == globalRegType reg ) + CmmStore (cmmRegOffW spReg offset) + (CmmReg (CmmGlobal reg)) + + stk_adj_pop = CmmAssign spReg (cmmRegOffW spReg final_stk_offset) + stk_adj_push = CmmAssign spReg (cmmRegOffW spReg (- final_stk_offset)) + jump_to_entry = CmmJump (mkLblExpr (enterLocalIdLabel name)) [] +-} + +----------------------------------------- +thunkCode :: ClosureInfo -> CostCentreStack -> C_SRT -> LocalReg -> StgExpr -> FCode () +thunkCode cl_info cc srt node body + = do { let node_points = nodeMustPointToIt (closureLFInfo cl_info) + + ; tickyEnterThunk cl_info + ; ldvEnterClosure cl_info -- NB: Node always points when profiling + ; granThunk node_points + + -- Heap overflow check + ; entryHeapCheck node [] srt $ do + { -- Overwrite with black hole if necessary + -- but *after* the heap-overflow check + whenC (blackHoleOnEntry cl_info && node_points) + (blackHoleIt cl_info) + + -- Push update frame + ; setupUpdate cl_info node + + -- We only enter cc after setting up update so + -- that cc of enclosing scope will be recorded + -- in update frame CAF/DICT functions will be + -- subsumed by this enclosing cc + ; enterCostCentre cl_info cc body + + ; cgExpr body } } + + +------------------------------------------------------------------------ +-- Update and black-hole wrappers +------------------------------------------------------------------------ + +blackHoleIt :: ClosureInfo -> FCode () +-- Only called for closures with no args +-- Node points to the closure +blackHoleIt closure_info = emitBlackHoleCode (closureSingleEntry closure_info) + +emitBlackHoleCode :: Bool -> FCode () +emitBlackHoleCode is_single_entry + | eager_blackholing = do + tickyBlackHole (not is_single_entry) + emit (mkStore (CmmReg nodeReg) (CmmLit (CmmLabel bh_lbl))) + | otherwise = + nopC + where + bh_lbl | is_single_entry = mkRtsDataLabel (sLit "stg_SE_BLACKHOLE_info") + | otherwise = mkRtsDataLabel (sLit "stg_BLACKHOLE_info") + + -- If we wanted to do eager blackholing with slop filling, + -- we'd need to do it at the *end* of a basic block, otherwise + -- we overwrite the free variables in the thunk that we still + -- need. We have a patch for this from Andy Cheadle, but not + -- incorporated yet. --SDM [6/2004] + -- + -- Profiling needs slop filling (to support LDV profiling), so + -- currently eager blackholing doesn't work with profiling. + -- + -- Previously, eager blackholing was enabled when ticky-ticky + -- was on. But it didn't work, and it wasn't strictly necessary + -- to bring back minimal ticky-ticky, so now EAGER_BLACKHOLING + -- is unconditionally disabled. -- krc 1/2007 + + eager_blackholing = False + +setupUpdate :: ClosureInfo -> LocalReg -> FCode () + -- Nota Bene: this function does not change Node (even if it's a CAF), + -- so that the cost centre in the original closure can still be + -- extracted by a subsequent enterCostCentre +setupUpdate closure_info node + | closureReEntrant closure_info + = return () + + | not (isStaticClosure closure_info) + = if closureUpdReqd closure_info + then do { tickyPushUpdateFrame; pushUpdateFrame node } + else tickyUpdateFrameOmitted + + | otherwise -- A static closure + = do { tickyUpdateBhCaf closure_info + + ; if closureUpdReqd closure_info + then do -- Blackhole the (updatable) CAF: + { upd_closure <- link_caf closure_info True + ; pushUpdateFrame upd_closure } + else tickyUpdateFrameOmitted + } + +pushUpdateFrame :: LocalReg -> FCode () +pushUpdateFrame cl_reg + = emit (mkAddToContext (mkLblExpr mkUpdInfoLabel) + [CmmReg (CmmLocal cl_reg)]) + +----------------------------------------------------------------------------- +-- Entering a CAF +-- +-- When a CAF is first entered, it creates a black hole in the heap, +-- and updates itself with an indirection to this new black hole. +-- +-- We update the CAF with an indirection to a newly-allocated black +-- hole in the heap. We also set the blocking queue on the newly +-- allocated black hole to be empty. +-- +-- Why do we make a black hole in the heap when we enter a CAF? +-- +-- - for a generational garbage collector, which needs a fast +-- test for whether an updatee is in an old generation or not +-- +-- - for the parallel system, which can implement updates more +-- easily if the updatee is always in the heap. (allegedly). +-- +-- When debugging, we maintain a separate CAF list so we can tell when +-- a CAF has been garbage collected. + +-- newCAF must be called before the itbl ptr is overwritten, since +-- newCAF records the old itbl ptr in order to do CAF reverting +-- (which Hugs needs to do in order that combined mode works right.) +-- + +-- ToDo [Feb 04] This entire link_caf nonsense could all be moved +-- into the "newCAF" RTS procedure, which we call anyway, including +-- the allocation of the black-hole indirection closure. +-- That way, code size would fall, the CAF-handling code would +-- be closer together, and the compiler wouldn't need to know +-- about off_indirectee etc. + +link_caf :: ClosureInfo + -> Bool -- True <=> updatable, False <=> single-entry + -> FCode LocalReg -- Returns amode for closure to be updated +-- To update a CAF we must allocate a black hole, link the CAF onto the +-- CAF list, then update the CAF to point to the fresh black hole. +-- This function returns the address of the black hole, so it can be +-- updated with the new value when available. The reason for all of this +-- is that we only want to update dynamic heap objects, not static ones, +-- so that generational GC is easier. +link_caf cl_info is_upd = do + { -- Alloc black hole specifying CC_HDR(Node) as the cost centre + ; let use_cc = costCentreFrom (CmmReg nodeReg) + blame_cc = use_cc + ; hp_rel <- allocDynClosure bh_cl_info use_cc blame_cc [] + + -- Call the RTS function newCAF to add the CAF to the CafList + -- so that the garbage collector can find them + -- This must be done *before* the info table pointer is overwritten, + -- because the old info table ptr is needed for reversion + ; emitRtsCallWithVols (sLit "newCAF") [(CmmReg nodeReg,AddrHint)] [node] False + -- node is live, so save it. + + -- Overwrite the closure with a (static) indirection + -- to the newly-allocated black hole + ; emit (mkStore (cmmRegOffW nodeReg off_indirectee) (CmmReg (CmmLocal hp_rel)) <*> + mkStore (CmmReg nodeReg) ind_static_info) + + ; return hp_rel } + where + bh_cl_info :: ClosureInfo + bh_cl_info | is_upd = cafBlackHoleClosureInfo cl_info + | otherwise = seCafBlackHoleClosureInfo cl_info + + ind_static_info :: CmmExpr + ind_static_info = mkLblExpr mkIndStaticInfoLabel + + off_indirectee :: WordOff + off_indirectee = fixedHdrSize + oFFSET_StgInd_indirectee*wORD_SIZE + + +------------------------------------------------------------------------ +-- Profiling +------------------------------------------------------------------------ + +-- For "global" data constructors the description is simply occurrence +-- name of the data constructor itself. Otherwise it is determined by +-- @closureDescription@ from the let binding information. + +closureDescription :: Module -- Module + -> Name -- Id of closure binding + -> String + -- Not called for StgRhsCon which have global info tables built in + -- CgConTbls.lhs with a description generated from the data constructor +closureDescription mod_name name + = showSDocDump (char '<' <> + (if isExternalName name + then ppr name -- ppr will include the module name prefix + else pprModule mod_name <> char '.' <> ppr name) <> + char '>') + -- showSDocDump, because we want to see the unique on the Name. + diff --git a/compiler/codeGen/StgCmmBind.hs-boot b/compiler/codeGen/StgCmmBind.hs-boot new file mode 100644 index 0000000000..5840e990c8 --- /dev/null +++ b/compiler/codeGen/StgCmmBind.hs-boot @@ -0,0 +1,6 @@ +module StgCmmBind where + +import StgCmmMonad( FCode ) +import StgSyn( StgBinding ) + +cgBind :: StgBinding -> FCode () diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs new file mode 100644 index 0000000000..c32d7cd857 --- /dev/null +++ b/compiler/codeGen/StgCmmClosure.hs @@ -0,0 +1,1100 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation: +-- +-- The types LambdaFormInfo +-- ClosureInfo +-- +-- Nothing monadic in here! +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + + +module StgCmmClosure ( + SMRep, + DynTag, tagForCon, isSmallFamily, + ConTagZ, dataConTagZ, + + ArgDescr(..), Liveness(..), + C_SRT(..), needsSRT, + + isVoidRep, isGcPtrRep, addIdReps, addArgReps, + argPrimRep, + + LambdaFormInfo, -- Abstract + StandardFormInfo, -- ...ditto... + mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo, + mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape, + lfDynTag, + + ClosureInfo, + mkClosureInfo, mkConInfo, maybeIsLFCon, + + closureSize, closureNonHdrSize, + closureGoodStuffSize, closurePtrsSize, + slopSize, + + closureName, infoTableLabelFromCI, + closureLabelFromCI, + closureTypeInfo, + closureLFInfo, isLFThunk,closureSMRep, closureUpdReqd, + closureNeedsUpdSpace, closureIsThunk, + closureSingleEntry, closureReEntrant, isConstrClosure_maybe, + closureFunInfo, isStandardFormThunk, isKnownFun, + funTag, tagForArity, + + enterIdLabel, enterLocalIdLabel, + + nodeMustPointToIt, + CallMethod(..), getCallMethod, + + blackHoleOnEntry, + + getClosureType, + + isToplevClosure, + closureValDescr, closureTypeDescr, -- profiling + + isStaticClosure, + cafBlackHoleClosureInfo, seCafBlackHoleClosureInfo, + + staticClosureNeedsLink, clHasCafRefs + ) where + +#include "../includes/MachDeps.h" + +#define FAST_STRING_NOT_NEEDED +#include "HsVersions.h" + +import ClosureInfo (ArgDescr(..), C_SRT(..), Liveness(..)) + -- XXX temporary becuase FunInfo needs this one + +import StgSyn +import SMRep +import Cmm ( ClosureTypeInfo(..) ) +import CmmExpr + +import CLabel +import StaticFlags +import Id +import IdInfo +import DataCon +import Name +import OccName +import Type +import TypeRep +import TcType +import TyCon +import BasicTypes +import Outputable +import Constants + + +----------------------------------------------------------------------------- +-- Representations +----------------------------------------------------------------------------- + +addIdReps :: [Id] -> [(PrimRep, Id)] +addIdReps ids = [(idPrimRep id, id) | id <- ids] + +addArgReps :: [StgArg] -> [(PrimRep, StgArg)] +addArgReps args = [(argPrimRep arg, arg) | arg <- args] + +argPrimRep :: StgArg -> PrimRep +argPrimRep arg = typePrimRep (stgArgType arg) + +isVoidRep :: PrimRep -> Bool +isVoidRep VoidRep = True +isVoidRep _other = False + +isGcPtrRep :: PrimRep -> Bool +isGcPtrRep PtrRep = True +isGcPtrRep _ = False + + +----------------------------------------------------------------------------- +-- LambdaFormInfo +----------------------------------------------------------------------------- + +-- Information about an identifier, from the code generator's point of +-- view. Every identifier is bound to a LambdaFormInfo in the +-- environment, which gives the code generator enough info to be able to +-- tail call or return that identifier. + +data LambdaFormInfo + = LFReEntrant -- Reentrant closure (a function) + TopLevelFlag -- True if top level + !Int -- Arity. Invariant: always > 0 + !Bool -- True <=> no fvs + ArgDescr -- Argument descriptor (should really be in ClosureInfo) + + | LFThunk -- Thunk (zero arity) + TopLevelFlag + !Bool -- True <=> no free vars + !Bool -- True <=> updatable (i.e., *not* single-entry) + StandardFormInfo + !Bool -- True <=> *might* be a function type + + | LFCon -- A saturated constructor application + DataCon -- The constructor + + | LFUnknown -- Used for function arguments and imported things. + -- We know nothing about this closure. + -- Treat like updatable "LFThunk"... + -- Imported things which we *do* know something about use + -- one of the other LF constructors (eg LFReEntrant for + -- known functions) + !Bool -- True <=> *might* be a function type + -- The False case is good when we want to enter it, + -- because then we know the entry code will do + -- For a function, the entry code is the fast entry point + + | LFUnLifted -- A value of unboxed type; + -- always a value, neeeds evaluation + + | LFLetNoEscape -- See LetNoEscape module for precise description + + | LFBlackHole -- Used for the closures allocated to hold the result + -- of a CAF. We want the target of the update frame to + -- be in the heap, so we make a black hole to hold it. + CLabel -- Flavour (info label, eg CAF_BLACKHOLE_info). + + +------------------------- +-- An ArgDsecr describes the argument pattern of a function + +{- XXX -- imported from old ClosureInfo for now +data ArgDescr + = ArgSpec -- Fits one of the standard patterns + !StgHalfWord -- RTS type identifier ARG_P, ARG_N, ... + + | ArgGen -- General case + Liveness -- Details about the arguments +-} + +{- XXX -- imported from old ClosureInfo for now +------------------------- +-- We represent liveness bitmaps as a Bitmap (whose internal +-- representation really is a bitmap). These are pinned onto case return +-- vectors to indicate the state of the stack for the garbage collector. +-- +-- In the compiled program, liveness bitmaps that fit inside a single +-- word (StgWord) are stored as a single word, while larger bitmaps are +-- stored as a pointer to an array of words. + +data Liveness + = SmallLiveness -- Liveness info that fits in one word + StgWord -- Here's the bitmap + + | BigLiveness -- Liveness info witha a multi-word bitmap + CLabel -- Label for the bitmap +-} + +------------------------- +-- StandardFormInfo tells whether this thunk has one of +-- a small number of standard forms + +data StandardFormInfo + = NonStandardThunk + -- Not of of the standard forms + + | SelectorThunk + -- A SelectorThunk is of form + -- case x of + -- con a1,..,an -> ak + -- and the constructor is from a single-constr type. + WordOff -- 0-origin offset of ak within the "goods" of + -- constructor (Recall that the a1,...,an may be laid + -- out in the heap in a non-obvious order.) + + | ApThunk + -- An ApThunk is of form + -- x1 ... xn + -- The code for the thunk just pushes x2..xn on the stack and enters x1. + -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled + -- in the RTS to save space. + Int -- Arity, n + + +------------------------------------------------------ +-- Building LambdaFormInfo +------------------------------------------------------ + +mkLFArgument :: Id -> LambdaFormInfo +mkLFArgument id + | isUnLiftedType ty = LFUnLifted + | might_be_a_function ty = LFUnknown True + | otherwise = LFUnknown False + where + ty = idType id + +------------- +mkLFLetNoEscape :: LambdaFormInfo +mkLFLetNoEscape = LFLetNoEscape + +------------- +mkLFReEntrant :: TopLevelFlag -- True of top level + -> [Id] -- Free vars + -> [Id] -- Args + -> ArgDescr -- Argument descriptor + -> LambdaFormInfo + +mkLFReEntrant top fvs args arg_descr + = LFReEntrant top (length args) (null fvs) arg_descr + +------------- +mkLFThunk :: Type -> TopLevelFlag -> [Id] -> UpdateFlag -> LambdaFormInfo +mkLFThunk thunk_ty top fvs upd_flag + = ASSERT( not (isUpdatable upd_flag) || not (isUnLiftedType thunk_ty) ) + LFThunk top (null fvs) + (isUpdatable upd_flag) + NonStandardThunk + (might_be_a_function thunk_ty) + +-------------- +might_be_a_function :: Type -> Bool +-- Return False only if we are *sure* it's a data type +-- Look through newtypes etc as much as poss +might_be_a_function ty + = case splitTyConApp_maybe (repType ty) of + Just (tc, _) -> not (isDataTyCon tc) + Nothing -> True + +------------- +mkConLFInfo :: DataCon -> LambdaFormInfo +mkConLFInfo con = LFCon con + +------------- +mkSelectorLFInfo :: Id -> Int -> Bool -> LambdaFormInfo +mkSelectorLFInfo id offset updatable + = LFThunk NotTopLevel False updatable (SelectorThunk offset) + (might_be_a_function (idType id)) + +------------- +mkApLFInfo :: Id -> UpdateFlag -> Arity -> LambdaFormInfo +mkApLFInfo id upd_flag arity + = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity) + (might_be_a_function (idType id)) + +------------- +mkLFImported :: Id -> LambdaFormInfo +mkLFImported id + | Just con <- isDataConWorkId_maybe id + , isNullaryRepDataCon con + = LFCon con -- An imported nullary constructor + -- We assume that the constructor is evaluated so that + -- the id really does point directly to the constructor + + | arity > 0 + = LFReEntrant TopLevel arity True (panic "arg_descr") + + | otherwise + = mkLFArgument id -- Not sure of exact arity + where + arity = idArity id + +----------------------------------------------------- +-- Dynamic pointer tagging +----------------------------------------------------- + +type ConTagZ = Int -- A *zero-indexed* contructor tag + +type DynTag = Int -- The tag on a *pointer* + -- (from the dynamic-tagging paper) + +{- Note [Data constructor dynamic tags] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The family size of a data type (the number of constructors) +can be either: + * small, if the family size < 2**tag_bits + * big, otherwise. + +Small families can have the constructor tag in the tag bits. +Big families only use the tag value 1 to represent evaluatedness. -} + +isSmallFamily :: Int -> Bool +isSmallFamily fam_size = fam_size <= mAX_PTR_TAG + +-- We keep the *zero-indexed* tag in the srt_len field of the info +-- table of a data constructor. +dataConTagZ :: DataCon -> ConTagZ +dataConTagZ con = dataConTag con - fIRST_TAG + +tagForCon :: DataCon -> DynTag +tagForCon con + | isSmallFamily fam_size = con_tag + 1 + | otherwise = 1 + where + con_tag = dataConTagZ con + fam_size = tyConFamilySize (dataConTyCon con) + +tagForArity :: Int -> DynTag +tagForArity arity | isSmallFamily arity = arity + | otherwise = 0 + +lfDynTag :: LambdaFormInfo -> DynTag +lfDynTag (LFCon con) = tagForCon con +lfDynTag (LFReEntrant _ arity _ _) = tagForArity arity +lfDynTag _other = 0 + + +----------------------------------------------------------------------------- +-- Observing LambdaFormInfo +----------------------------------------------------------------------------- + +------------- +maybeIsLFCon :: LambdaFormInfo -> Maybe DataCon +maybeIsLFCon (LFCon con) = Just con +maybeIsLFCon _ = Nothing + +------------ +isLFThunk :: LambdaFormInfo -> Bool +isLFThunk (LFThunk _ _ _ _ _) = True +isLFThunk (LFBlackHole _) = True + -- return True for a blackhole: this function is used to determine + -- whether to use the thunk header in SMP mode, and a blackhole + -- must have one. +isLFThunk _ = False + + +----------------------------------------------------------------------------- +-- Choosing SM reps +----------------------------------------------------------------------------- + +chooseSMRep + :: Bool -- True <=> static closure + -> LambdaFormInfo + -> WordOff -> WordOff -- Tot wds, ptr wds + -> SMRep + +chooseSMRep is_static lf_info tot_wds ptr_wds + = let + nonptr_wds = tot_wds - ptr_wds + closure_type = getClosureType is_static ptr_wds lf_info + in + GenericRep is_static ptr_wds nonptr_wds closure_type + +-- We *do* get non-updatable top-level thunks sometimes. eg. f = g +-- gets compiled to a jump to g (if g has non-zero arity), instead of +-- messing around with update frames and PAPs. We set the closure type +-- to FUN_STATIC in this case. + +getClosureType :: Bool -> WordOff -> LambdaFormInfo -> ClosureType +getClosureType is_static ptr_wds lf_info + = case lf_info of + LFCon {} | is_static && ptr_wds == 0 -> ConstrNoCaf + | otherwise -> Constr + LFReEntrant {} -> Fun + LFThunk _ _ _ (SelectorThunk {}) _ -> ThunkSelector + LFThunk {} -> Thunk + _ -> panic "getClosureType" + + +----------------------------------------------------------------------------- +-- nodeMustPointToIt +----------------------------------------------------------------------------- + +-- Be sure to see the stg-details notes about these... + +nodeMustPointToIt :: LambdaFormInfo -> Bool +nodeMustPointToIt (LFReEntrant top _ no_fvs _) + = not no_fvs || -- Certainly if it has fvs we need to point to it + isNotTopLevel top + -- If it is not top level we will point to it + -- We can have a \r closure with no_fvs which + -- is not top level as special case cgRhsClosure + -- has been dissabled in favour of let floating + + -- For lex_profiling we also access the cost centre for a + -- non-inherited function i.e. not top level + -- the not top case above ensures this is ok. + +nodeMustPointToIt (LFCon _) = True + + -- Strictly speaking, the above two don't need Node to point + -- to it if the arity = 0. But this is a *really* unlikely + -- situation. If we know it's nil (say) and we are entering + -- it. Eg: let x = [] in x then we will certainly have inlined + -- x, since nil is a simple atom. So we gain little by not + -- having Node point to known zero-arity things. On the other + -- hand, we do lose something; Patrick's code for figuring out + -- when something has been updated but not entered relies on + -- having Node point to the result of an update. SLPJ + -- 27/11/92. + +nodeMustPointToIt (LFThunk _ no_fvs updatable NonStandardThunk _) + = updatable || not no_fvs || opt_SccProfilingOn + -- For the non-updatable (single-entry case): + -- + -- True if has fvs (in which case we need access to them, and we + -- should black-hole it) + -- or profiling (in which case we need to recover the cost centre + -- from inside it) + +nodeMustPointToIt (LFThunk {}) -- Node must point to a standard-form thunk + = True + +nodeMustPointToIt (LFUnknown _) = True +nodeMustPointToIt LFUnLifted = False +nodeMustPointToIt (LFBlackHole _) = True -- BH entry may require Node to point +nodeMustPointToIt LFLetNoEscape = False + +----------------------------------------------------------------------------- +-- getCallMethod +----------------------------------------------------------------------------- + +{- The entry conventions depend on the type of closure being entered, +whether or not it has free variables, and whether we're running +sequentially or in parallel. + +Closure Node Argument Enter +Characteristics Par Req'd Passing Via +------------------------------------------------------------------------------- +Unknown & no & yes & stack & node +Known fun (>1 arg), no fvs & no & no & registers & fast entry (enough args) + & slow entry (otherwise) +Known fun (>1 arg), fvs & no & yes & registers & fast entry (enough args) +0 arg, no fvs \r,\s & no & no & n/a & direct entry +0 arg, no fvs \u & no & yes & n/a & node +0 arg, fvs \r,\s & no & yes & n/a & direct entry +0 arg, fvs \u & no & yes & n/a & node + +Unknown & yes & yes & stack & node +Known fun (>1 arg), no fvs & yes & no & registers & fast entry (enough args) + & slow entry (otherwise) +Known fun (>1 arg), fvs & yes & yes & registers & node +0 arg, no fvs \r,\s & yes & no & n/a & direct entry +0 arg, no fvs \u & yes & yes & n/a & node +0 arg, fvs \r,\s & yes & yes & n/a & node +0 arg, fvs \u & yes & yes & n/a & node +\end{tabular} + +When black-holing, single-entry closures could also be entered via node +(rather than directly) to catch double-entry. -} + +data CallMethod + = EnterIt -- No args, not a function + + | JumpToIt -- A join point + + | ReturnIt -- It's a value (function, unboxed value, + -- or constructor), so just return it. + + | SlowCall -- Unknown fun, or known fun with + -- too few args. + + | DirectEntry -- Jump directly, with args in regs + CLabel -- The code label + Int -- Its arity + +getCallMethod :: Name -- Function being applied + -> CafInfo -- Can it refer to CAF's? + -> LambdaFormInfo -- Its info + -> Int -- Number of available arguments + -> CallMethod + +getCallMethod _name _ lf_info _n_args + | nodeMustPointToIt lf_info && opt_Parallel + = -- If we're parallel, then we must always enter via node. + -- The reason is that the closure may have been + -- fetched since we allocated it. + EnterIt + +getCallMethod name caf (LFReEntrant _ arity _ _) n_args + | n_args == 0 = ASSERT( arity /= 0 ) + ReturnIt -- No args at all + | n_args < arity = SlowCall -- Not enough args + | otherwise = DirectEntry (enterIdLabel name caf) arity + +getCallMethod _name _ LFUnLifted n_args + = ASSERT( n_args == 0 ) ReturnIt + +getCallMethod _name _ (LFCon _) n_args + = ASSERT( n_args == 0 ) ReturnIt + +getCallMethod name caf (LFThunk _ _ updatable std_form_info is_fun) n_args + | is_fun -- it *might* be a function, so we must "call" it (which is always safe) + = SlowCall -- We cannot just enter it [in eval/apply, the entry code + -- is the fast-entry code] + + -- Since is_fun is False, we are *definitely* looking at a data value + | updatable || opt_DoTickyProfiling -- to catch double entry + {- OLD: || opt_SMP + I decided to remove this, because in SMP mode it doesn't matter + if we enter the same thunk multiple times, so the optimisation + of jumping directly to the entry code is still valid. --SDM + -} + = EnterIt + -- We used to have ASSERT( n_args == 0 ), but actually it is + -- possible for the optimiser to generate + -- let bot :: Int = error Int "urk" + -- in (bot `cast` unsafeCoerce Int (Int -> Int)) 3 + -- This happens as a result of the case-of-error transformation + -- So the right thing to do is just to enter the thing + + | otherwise -- Jump direct to code for single-entry thunks + = ASSERT( n_args == 0 ) + DirectEntry (thunkEntryLabel name caf std_form_info updatable) 0 + +getCallMethod _name _ (LFUnknown True) _n_args + = SlowCall -- might be a function + +getCallMethod name _ (LFUnknown False) n_args + = ASSERT2 ( n_args == 0, ppr name <+> ppr n_args ) + EnterIt -- Not a function + +getCallMethod _name _ (LFBlackHole _) _n_args + = SlowCall -- Presumably the black hole has by now + -- been updated, but we don't know with + -- what, so we slow call it + +getCallMethod _name _ LFLetNoEscape _n_args + = JumpToIt + +isStandardFormThunk :: LambdaFormInfo -> Bool +isStandardFormThunk (LFThunk _ _ _ (SelectorThunk _) _) = True +isStandardFormThunk (LFThunk _ _ _ (ApThunk _) _) = True +isStandardFormThunk _other_lf_info = False + +isKnownFun :: LambdaFormInfo -> Bool +isKnownFun (LFReEntrant _ _ _ _) = True +isKnownFun LFLetNoEscape = True +isKnownFun _ = False + +----------------------------------------------------------------------------- +-- staticClosureRequired +----------------------------------------------------------------------------- + +{- staticClosureRequired is never called (hence commented out) + + SimonMar writes (Sept 07) It's an optimisation we used to apply at + one time, I believe, but it got lost probably in the rewrite of + the RTS/code generator. I left that code there to remind me to + look into whether it was worth doing sometime + +{- Avoiding generating entries and info tables + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +At present, for every function we generate all of the following, +just in case. But they aren't always all needed, as noted below: + +[NB1: all of this applies only to *functions*. Thunks always +have closure, info table, and entry code.] + +[NB2: All are needed if the function is *exported*, just to play safe.] + +* Fast-entry code ALWAYS NEEDED + +* Slow-entry code + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) we're in the parallel world and the function has free vars + [Reason: in parallel world, we always enter functions + with free vars via the closure.] + +* The function closure + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) if the function has free vars (ie not top level) + + Why case (a) here? Because if the arg-satis check fails, + UpdatePAP stuffs a pointer to the function closure in the PAP. + [Could be changed; UpdatePAP could stuff in a code ptr instead, + but doesn't seem worth it.] + + [NB: these conditions imply that we might need the closure + without the slow-entry code. Here's how. + + f x y = let g w = ...x..y..w... + in + ...(g t)... + + Here we need a closure for g which contains x and y, + but since the calls are all saturated we just jump to the + fast entry point for g, with R1 pointing to the closure for g.] + + +* Standard info table + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) the function has free vars (ie not top level) + + NB. In the sequential world, (c) is only required so that the function closure has + an info table to point to, to keep the storage manager happy. + If (c) alone is true we could fake up an info table by choosing + one of a standard family of info tables, whose entry code just + bombs out. + + [NB In the parallel world (c) is needed regardless because + we enter functions with free vars via the closure.] + + If (c) is retained, then we'll sometimes generate an info table + (for storage mgr purposes) without slow-entry code. Then we need + to use an error label in the info table to substitute for the absent + slow entry code. +-} + +staticClosureRequired + :: Name + -> StgBinderInfo + -> LambdaFormInfo + -> Bool +staticClosureRequired binder bndr_info + (LFReEntrant top_level _ _ _) -- It's a function + = ASSERT( isTopLevel top_level ) + -- Assumption: it's a top-level, no-free-var binding + not (satCallsOnly bndr_info) + +staticClosureRequired binder other_binder_info other_lf_info = True +-} + +----------------------------------------------------------------------------- +-- Data types for closure information} +----------------------------------------------------------------------------- + + +{- Information about a closure, from the code generator's point of view. + +A ClosureInfo decribes the info pointer of a closure. It has +enough information + a) to construct the info table itself + b) to allocate a closure containing that info pointer (i.e. + it knows the info table label) + +We make a ClosureInfo for + - each let binding (both top level and not) + - each data constructor (for its shared static and + dynamic info tables) +-} + +data ClosureInfo + = ClosureInfo { + closureName :: !Name, -- The thing bound to this closure + closureLFInfo :: !LambdaFormInfo, -- NOTE: not an LFCon (see below) + closureSMRep :: !SMRep, -- representation used by storage mgr + closureSRT :: !C_SRT, -- What SRT applies to this closure + closureType :: !Type, -- Type of closure (ToDo: remove) + closureDescr :: !String -- closure description (for profiling) + } + + -- Constructor closures don't have a unique info table label (they use + -- the constructor's info table), and they don't have an SRT. + | ConInfo { + closureCon :: !DataCon, + closureSMRep :: !SMRep + } + +{- XXX temp imported from old ClosureInfo +-- C_SRT is what StgSyn.SRT gets translated to... +-- we add a label for the table, and expect only the 'offset/length' form + +data C_SRT = NoC_SRT + | C_SRT !CLabel !WordOff !StgHalfWord {-bitmap or escape-} + deriving (Eq) + +instance Outputable C_SRT where + ppr (NoC_SRT) = ptext SLIT("_no_srt_") + ppr (C_SRT label off bitmap) = parens (ppr label <> comma <> ppr off <> comma <> text (show bitmap)) +-} + +needsSRT :: C_SRT -> Bool +needsSRT NoC_SRT = False +needsSRT (C_SRT _ _ _) = True + + +-------------------------------------- +-- Building ClosureInfos +-------------------------------------- + +mkClosureInfo :: Bool -- Is static + -> Id + -> LambdaFormInfo + -> Int -> Int -- Total and pointer words + -> C_SRT + -> String -- String descriptor + -> ClosureInfo +mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info descr + = ClosureInfo { closureName = name, + closureLFInfo = lf_info, + closureSMRep = sm_rep, + closureSRT = srt_info, + closureType = idType id, + closureDescr = descr } + where + name = idName id + sm_rep = chooseSMRep is_static lf_info tot_wds ptr_wds + +mkConInfo :: Bool -- Is static + -> DataCon + -> Int -> Int -- Total and pointer words + -> ClosureInfo +mkConInfo is_static data_con tot_wds ptr_wds + = ConInfo { closureSMRep = sm_rep, + closureCon = data_con } + where + sm_rep = chooseSMRep is_static (mkConLFInfo data_con) tot_wds ptr_wds + + +-- We need a black-hole closure info to pass to @allocDynClosure@ when we +-- want to allocate the black hole on entry to a CAF. These are the only +-- ways to build an LFBlackHole, maintaining the invariant that it really +-- is a black hole and not something else. + +cafBlackHoleClosureInfo :: ClosureInfo -> ClosureInfo +cafBlackHoleClosureInfo (ClosureInfo { closureName = nm, + closureType = ty }) + = ClosureInfo { closureName = nm, + closureLFInfo = LFBlackHole mkCAFBlackHoleInfoTableLabel, + closureSMRep = BlackHoleRep, + closureSRT = NoC_SRT, + closureType = ty, + closureDescr = "" } +cafBlackHoleClosureInfo _ = panic "cafBlackHoleClosureInfo" + +seCafBlackHoleClosureInfo :: ClosureInfo -> ClosureInfo +seCafBlackHoleClosureInfo (ClosureInfo { closureName = nm, + closureType = ty }) + = ClosureInfo { closureName = nm, + closureLFInfo = LFBlackHole mkSECAFBlackHoleInfoTableLabel, + closureSMRep = BlackHoleRep, + closureSRT = NoC_SRT, + closureType = ty, + closureDescr = "" } +seCafBlackHoleClosureInfo _ = panic "seCafBlackHoleClosureInfo" + +-------------------------------------- +-- Extracting ClosureTypeInfo +-------------------------------------- + +closureTypeInfo :: ClosureInfo -> ClosureTypeInfo +closureTypeInfo cl_info + = case cl_info of + ConInfo { closureCon = con } + -> ConstrInfo (ptrs, nptrs) + (fromIntegral (dataConTagZ con)) + con_name + where + con_name = panic "closureTypeInfo" + -- Was: + -- cstr <- mkByteStringCLit $ dataConIdentity con + -- con_name = makeRelativeRefTo info_lbl cstr + + ClosureInfo { closureName = name, + closureLFInfo = LFReEntrant _ arity _ arg_descr, + closureSRT = srt } + -> FunInfo (ptrs, nptrs) + srt + (fromIntegral arity) + arg_descr + (CmmLabel (mkSlowEntryLabel name (clHasCafRefs cl_info))) + + ClosureInfo { closureLFInfo = LFThunk _ _ _ (SelectorThunk offset) _, + closureSRT = srt } + -> ThunkSelectorInfo (fromIntegral offset) srt + + ClosureInfo { closureLFInfo = LFThunk {}, + closureSRT = srt } + -> ThunkInfo (ptrs, nptrs) srt + + _ -> panic "unexpected lambda form in mkCmmInfo" + where +-- info_lbl = infoTableLabelFromCI cl_info + ptrs = fromIntegral $ closurePtrsSize cl_info + size = fromIntegral $ closureNonHdrSize cl_info + nptrs = size - ptrs + +-------------------------------------- +-- Functions about closure *sizes* +-------------------------------------- + +closureSize :: ClosureInfo -> WordOff +closureSize cl_info = hdr_size + closureNonHdrSize cl_info + where hdr_size | closureIsThunk cl_info = thunkHdrSize + | otherwise = fixedHdrSize + -- All thunks use thunkHdrSize, even if they are non-updatable. + -- this is because we don't have separate closure types for + -- updatable vs. non-updatable thunks, so the GC can't tell the + -- difference. If we ever have significant numbers of non- + -- updatable thunks, it might be worth fixing this. + +closureNonHdrSize :: ClosureInfo -> WordOff +closureNonHdrSize cl_info + = tot_wds + computeSlopSize tot_wds cl_info + where + tot_wds = closureGoodStuffSize cl_info + +closureGoodStuffSize :: ClosureInfo -> WordOff +closureGoodStuffSize cl_info + = let (ptrs, nonptrs) = sizes_from_SMRep (closureSMRep cl_info) + in ptrs + nonptrs + +closurePtrsSize :: ClosureInfo -> WordOff +closurePtrsSize cl_info + = let (ptrs, _) = sizes_from_SMRep (closureSMRep cl_info) + in ptrs + +-- not exported: +sizes_from_SMRep :: SMRep -> (WordOff,WordOff) +sizes_from_SMRep (GenericRep _ ptrs nonptrs _) = (ptrs, nonptrs) +sizes_from_SMRep BlackHoleRep = (0, 0) + +-- Computing slop size. WARNING: this looks dodgy --- it has deep +-- knowledge of what the storage manager does with the various +-- representations... +-- +-- Slop Requirements: every thunk gets an extra padding word in the +-- header, which takes the the updated value. + +slopSize :: ClosureInfo -> WordOff +slopSize cl_info = computeSlopSize payload_size cl_info + where payload_size = closureGoodStuffSize cl_info + +computeSlopSize :: WordOff -> ClosureInfo -> WordOff +computeSlopSize payload_size cl_info + = max 0 (minPayloadSize smrep updatable - payload_size) + where + smrep = closureSMRep cl_info + updatable = closureNeedsUpdSpace cl_info + +closureNeedsUpdSpace :: ClosureInfo -> Bool +-- We leave space for an update if either (a) the closure is updatable +-- or (b) it is a static thunk. This is because a static thunk needs +-- a static link field in a predictable place (after the slop), regardless +-- of whether it is updatable or not. +closureNeedsUpdSpace (ClosureInfo { closureLFInfo = + LFThunk TopLevel _ _ _ _ }) = True +closureNeedsUpdSpace cl_info = closureUpdReqd cl_info + +minPayloadSize :: SMRep -> Bool -> WordOff +minPayloadSize smrep updatable + = case smrep of + BlackHoleRep -> min_upd_size + GenericRep _ _ _ _ | updatable -> min_upd_size + GenericRep True _ _ _ -> 0 -- static + GenericRep False _ _ _ -> mIN_PAYLOAD_SIZE + -- ^^^^^___ dynamic + where + min_upd_size = + ASSERT(mIN_PAYLOAD_SIZE <= sIZEOF_StgSMPThunkHeader) + 0 -- check that we already have enough + -- room for mIN_SIZE_NonUpdHeapObject, + -- due to the extra header word in SMP + +-------------------------------------- +-- Other functions over ClosureInfo +-------------------------------------- + +blackHoleOnEntry :: ClosureInfo -> Bool +-- Static closures are never themselves black-holed. +-- Updatable ones will be overwritten with a CAFList cell, which points to a +-- black hole; +-- Single-entry ones have no fvs to plug, and we trust they don't form part +-- of a loop. + +blackHoleOnEntry ConInfo{} = False +blackHoleOnEntry (ClosureInfo { closureLFInfo = lf_info, closureSMRep = rep }) + | isStaticRep rep + = False -- Never black-hole a static closure + + | otherwise + = case lf_info of + LFReEntrant _ _ _ _ -> False + LFLetNoEscape -> False + LFThunk _ no_fvs updatable _ _ + -> if updatable + then not opt_OmitBlackHoling + else opt_DoTickyProfiling || not no_fvs + -- the former to catch double entry, + -- and the latter to plug space-leaks. KSW/SDM 1999-04. + + _other -> panic "blackHoleOnEntry" -- Should never happen + + +staticClosureNeedsLink :: ClosureInfo -> Bool +-- A static closure needs a link field to aid the GC when traversing +-- the static closure graph. But it only needs such a field if either +-- a) it has an SRT +-- b) it's a constructor with one or more pointer fields +-- In case (b), the constructor's fields themselves play the role +-- of the SRT. +staticClosureNeedsLink (ClosureInfo { closureSRT = srt }) + = needsSRT srt +staticClosureNeedsLink (ConInfo { closureSMRep = sm_rep, closureCon = con }) + = not (isNullaryRepDataCon con) && not_nocaf_constr + where + not_nocaf_constr = + case sm_rep of + GenericRep _ _ _ ConstrNoCaf -> False + _other -> True + +isStaticClosure :: ClosureInfo -> Bool +isStaticClosure cl_info = isStaticRep (closureSMRep cl_info) + +closureUpdReqd :: ClosureInfo -> Bool +closureUpdReqd ClosureInfo{ closureLFInfo = lf_info } = lfUpdatable lf_info +closureUpdReqd ConInfo{} = False + +lfUpdatable :: LambdaFormInfo -> Bool +lfUpdatable (LFThunk _ _ upd _ _) = upd +lfUpdatable (LFBlackHole _) = True + -- Black-hole closures are allocated to receive the results of an + -- alg case with a named default... so they need to be updated. +lfUpdatable _ = False + +closureIsThunk :: ClosureInfo -> Bool +closureIsThunk ClosureInfo{ closureLFInfo = lf_info } = isLFThunk lf_info +closureIsThunk ConInfo{} = False + +closureSingleEntry :: ClosureInfo -> Bool +closureSingleEntry (ClosureInfo { closureLFInfo = LFThunk _ _ upd _ _}) = not upd +closureSingleEntry _ = False + +closureReEntrant :: ClosureInfo -> Bool +closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant _ _ _ _ }) = True +closureReEntrant _ = False + +isConstrClosure_maybe :: ClosureInfo -> Maybe DataCon +isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con +isConstrClosure_maybe _ = Nothing + +closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr) +closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info +closureFunInfo _ = Nothing + +lfFunInfo :: LambdaFormInfo -> Maybe (Int, ArgDescr) +lfFunInfo (LFReEntrant _ arity _ arg_desc) = Just (arity, arg_desc) +lfFunInfo _ = Nothing + +funTag :: ClosureInfo -> DynTag +funTag (ClosureInfo { closureLFInfo = lf_info }) = lfDynTag lf_info +funTag (ConInfo {}) = panic "funTag" + +isToplevClosure :: ClosureInfo -> Bool +isToplevClosure (ClosureInfo { closureLFInfo = lf_info }) + = case lf_info of + LFReEntrant TopLevel _ _ _ -> True + LFThunk TopLevel _ _ _ _ -> True + _other -> False +isToplevClosure _ = False + +-------------------------------------- +-- Label generation +-------------------------------------- + +infoTableLabelFromCI :: ClosureInfo -> CLabel +infoTableLabelFromCI cl@(ClosureInfo { closureName = name, + closureLFInfo = lf_info }) + = case lf_info of + LFBlackHole info -> info + + LFThunk _ _ upd_flag (SelectorThunk offset) _ -> + mkSelectorInfoLabel upd_flag offset + + LFThunk _ _ upd_flag (ApThunk arity) _ -> + mkApInfoTableLabel upd_flag arity + + LFThunk{} -> mkLocalInfoTableLabel name $ clHasCafRefs cl + + LFReEntrant _ _ _ _ -> mkLocalInfoTableLabel name $ clHasCafRefs cl + + _other -> panic "infoTableLabelFromCI" + +infoTableLabelFromCI cl@(ConInfo { closureCon = con, closureSMRep = rep }) + | isStaticRep rep = mkStaticInfoTableLabel name $ clHasCafRefs cl + | otherwise = mkConInfoTableLabel name $ clHasCafRefs cl + where + name = dataConName con + +-- ClosureInfo for a closure (as opposed to a constructor) is always local +closureLabelFromCI :: ClosureInfo -> CLabel +closureLabelFromCI cl@(ClosureInfo { closureName = nm }) = + mkLocalClosureLabel nm $ clHasCafRefs cl +closureLabelFromCI _ = panic "closureLabelFromCI" + +thunkEntryLabel :: Name -> CafInfo -> StandardFormInfo -> Bool -> CLabel +-- thunkEntryLabel is a local help function, not exported. It's used from both +-- entryLabelFromCI and getCallMethod. +thunkEntryLabel _thunk_id _ (ApThunk arity) upd_flag + = enterApLabel upd_flag arity +thunkEntryLabel _thunk_id _ (SelectorThunk offset) upd_flag + = enterSelectorLabel upd_flag offset +thunkEntryLabel thunk_id c _ _ + = enterIdLabel thunk_id c + +enterApLabel :: Bool -> Arity -> CLabel +enterApLabel is_updatable arity + | tablesNextToCode = mkApInfoTableLabel is_updatable arity + | otherwise = mkApEntryLabel is_updatable arity + +enterSelectorLabel :: Bool -> WordOff -> CLabel +enterSelectorLabel upd_flag offset + | tablesNextToCode = mkSelectorInfoLabel upd_flag offset + | otherwise = mkSelectorEntryLabel upd_flag offset + +enterIdLabel :: Name -> CafInfo -> CLabel +enterIdLabel id c + | tablesNextToCode = mkInfoTableLabel id c + | otherwise = mkEntryLabel id c + +enterLocalIdLabel :: Name -> CafInfo -> CLabel +enterLocalIdLabel id c + | tablesNextToCode = mkLocalInfoTableLabel id c + | otherwise = mkLocalEntryLabel id c + + +-------------------------------------- +-- Profiling +-------------------------------------- + +-- Profiling requires two pieces of information to be determined for +-- each closure's info table --- description and type. + +-- The description is stored directly in the @CClosureInfoTable@ when the +-- info table is built. + +-- The type is determined from the type information stored with the @Id@ +-- in the closure info using @closureTypeDescr@. + +closureValDescr, closureTypeDescr :: ClosureInfo -> String +closureValDescr (ClosureInfo {closureDescr = descr}) + = descr +closureValDescr (ConInfo {closureCon = con}) + = occNameString (getOccName con) + +closureTypeDescr (ClosureInfo { closureType = ty }) + = getTyDescription ty +closureTypeDescr (ConInfo { closureCon = data_con }) + = occNameString (getOccName (dataConTyCon data_con)) + +getTyDescription :: Type -> String +getTyDescription ty + = case (tcSplitSigmaTy ty) of { (_, _, tau_ty) -> + case tau_ty of + TyVarTy _ -> "*" + AppTy fun _ -> getTyDescription fun + FunTy _ res -> '-' : '>' : fun_result res + TyConApp tycon _ -> getOccString tycon + PredTy sty -> getPredTyDescription sty + ForAllTy _ ty -> getTyDescription ty + } + where + fun_result (FunTy _ res) = '>' : fun_result res + fun_result other = getTyDescription other + +getPredTyDescription :: PredType -> String +getPredTyDescription (ClassP cl _) = getOccString cl +getPredTyDescription (IParam ip _) = getOccString (ipNameName ip) +getPredTyDescription (EqPred ty1 _ty2) = getTyDescription ty1 -- Urk? + + +-------------------------------------- +-- SRTs/CAFs +-------------------------------------- + +-- This is horrible, but we need to know whether a closure may have CAFs. +clHasCafRefs :: ClosureInfo -> CafInfo +clHasCafRefs (ClosureInfo {closureSRT = srt}) = + case srt of NoC_SRT -> NoCafRefs + _ -> MayHaveCafRefs +clHasCafRefs (ConInfo {}) = NoCafRefs diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs new file mode 100644 index 0000000000..de1d77ad20 --- /dev/null +++ b/compiler/codeGen/StgCmmCon.hs @@ -0,0 +1,216 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C--: code generation for constructors +-- +-- This module provides the support code for StgCmm to deal with with +-- constructors on the RHSs of let(rec)s. +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmCon ( + cgTopRhsCon, buildDynCon, bindConArgs + ) where + +#include "HsVersions.h" + +import StgSyn +import CoreSyn ( AltCon(..) ) + +import StgCmmMonad +import StgCmmEnv +import StgCmmHeap +import StgCmmUtils +import StgCmmClosure +import StgCmmProf + +import Cmm +import CLabel +import SMRep +import CostCentre +import Constants +import DataCon +import FastString +import Id +import Literal +import PrelInfo +import Outputable +import Util ( lengthIs ) +import Char ( ord ) + + +--------------------------------------------------------------- +-- Top-level constructors +--------------------------------------------------------------- + +cgTopRhsCon :: Id -- Name of thing bound to this RHS + -> DataCon -- Id + -> [StgArg] -- Args + -> FCode (Id, CgIdInfo) +cgTopRhsCon id con args + = do { +#if mingw32_TARGET_OS + -- Windows DLLs have a problem with static cross-DLL refs. + ; this_pkg <- getThisPackage + ; ASSERT( not (isDllConApp this_pkg con args) ) return () +#endif + ; ASSERT( args `lengthIs` dataConRepArity con ) return () + + -- LAY IT OUT + ; let + name = idName id + lf_info = mkConLFInfo con + closure_label = mkClosureLabel name $ idCafInfo id + caffy = any stgArgHasCafRefs args + (closure_info, nv_args_w_offsets) + = layOutStaticConstr con (addArgReps args) + + get_lit (arg, _offset) = do { CmmLit lit <- getArgAmode arg + ; return lit } + + ; payload <- mapM get_lit nv_args_w_offsets + -- NB1: nv_args_w_offsets is sorted into ptrs then non-ptrs + -- NB2: all the amodes should be Lits! + + ; let closure_rep = mkStaticClosureFields + closure_info + dontCareCCS -- Because it's static data + caffy -- Has CAF refs + payload + + -- BUILD THE OBJECT + ; emitDataLits closure_label closure_rep + + -- RETURN + ; return (id, litIdInfo id lf_info (CmmLabel closure_label)) } + + +--------------------------------------------------------------- +-- Lay out and allocate non-top-level constructors +--------------------------------------------------------------- + +buildDynCon :: Id -- Name of the thing to which this constr will + -- be bound + -> CostCentreStack -- Where to grab cost centre from; + -- current CCS if currentOrSubsumedCCS + -> DataCon -- The data constructor + -> [StgArg] -- Its args + -> FCode CgIdInfo -- Return details about how to find it + +{- We used to pass a boolean indicating whether all the +args were of size zero, so we could use a static +construtor; but I concluded that it just isn't worth it. +Now I/O uses unboxed tuples there just aren't any constructors +with all size-zero args. + +The reason for having a separate argument, rather than looking at +the addr modes of the args is that we may be in a "knot", and +premature looking at the args will cause the compiler to black-hole! +-} + + +-------- buildDynCon: Nullary constructors -------------- +-- First we deal with the case of zero-arity constructors. They +-- will probably be unfolded, so we don't expect to see this case much, +-- if at all, but it does no harm, and sets the scene for characters. +-- +-- In the case of zero-arity constructors, or, more accurately, those +-- which have exclusively size-zero (VoidRep) args, we generate no code +-- at all. + +buildDynCon binder _cc con [] + = return (litIdInfo binder (mkConLFInfo con) + (CmmLabel (mkClosureLabel (dataConName con) (idCafInfo binder)))) + +-------- buildDynCon: Charlike and Intlike constructors ----------- +{- The following three paragraphs about @Char@-like and @Int@-like +closures are obsolete, but I don't understand the details well enough +to properly word them, sorry. I've changed the treatment of @Char@s to +be analogous to @Int@s: only a subset is preallocated, because @Char@ +has now 31 bits. Only literals are handled here. -- Qrczak + +Now for @Char@-like closures. We generate an assignment of the +address of the closure to a temporary. It would be possible simply to +generate no code, and record the addressing mode in the environment, +but we'd have to be careful if the argument wasn't a constant --- so +for simplicity we just always asssign to a temporary. + +Last special case: @Int@-like closures. We only special-case the +situation in which the argument is a literal in the range +@mIN_INTLIKE@..@mAX_INTLILKE@. NB: for @Char@-like closures we can +work with any old argument, but for @Int@-like ones the argument has +to be a literal. Reason: @Char@ like closures have an argument type +which is guaranteed in range. + +Because of this, we use can safely return an addressing mode. -} + +buildDynCon binder _cc con [arg] + | maybeIntLikeCon con + , StgLitArg (MachInt val) <- arg + , val <= fromIntegral mAX_INTLIKE -- Comparisons at type Integer! + , val >= fromIntegral mIN_INTLIKE -- ...ditto... + = do { let intlike_lbl = mkRtsGcPtrLabel (sLit "stg_INTLIKE_closure") + val_int = fromIntegral val :: Int + offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1) + -- INTLIKE closures consist of a header and one word payload + intlike_amode = cmmLabelOffW intlike_lbl offsetW + ; return (litIdInfo binder (mkConLFInfo con) intlike_amode) } + +buildDynCon binder _cc con [arg] + | maybeCharLikeCon con + , StgLitArg (MachChar val) <- arg + , let val_int = ord val :: Int + , val_int <= mAX_CHARLIKE + , val_int >= mIN_CHARLIKE + = do { let charlike_lbl = mkRtsGcPtrLabel (sLit "stg_CHARLIKE_closure") + offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1) + -- CHARLIKE closures consist of a header and one word payload + charlike_amode = cmmLabelOffW charlike_lbl offsetW + ; return (litIdInfo binder (mkConLFInfo con) charlike_amode) } + +-------- buildDynCon: the general case ----------- +buildDynCon binder ccs con args + = do { let (cl_info, args_w_offsets) = layOutDynConstr con (addArgReps args) + -- No void args in args_w_offsets + ; tmp <- allocDynClosure cl_info use_cc blame_cc args_w_offsets + ; return (regIdInfo binder lf_info tmp) } + where + lf_info = mkConLFInfo con + + use_cc -- cost-centre to stick in the object + | currentOrSubsumedCCS ccs = curCCS + | otherwise = CmmLit (mkCCostCentreStack ccs) + + blame_cc = use_cc -- cost-centre on which to blame the alloc (same) + + +--------------------------------------------------------------- +-- Binding constructor arguments +--------------------------------------------------------------- + +bindConArgs :: AltCon -> LocalReg -> [Id] -> FCode [LocalReg] +-- bindConArgs is called from cgAlt of a case +-- (bindConArgs con args) augments the environment with bindings for the +-- binders args, assuming that we have just returned from a 'case' which +-- found a con +bindConArgs (DataAlt con) base args + = ASSERT(not (isUnboxedTupleCon con)) + mapM bind_arg args_w_offsets + where + (_, args_w_offsets) = layOutDynConstr con (addIdReps args) + + tag = tagForCon con + + -- The binding below forces the masking out of the tag bits + -- when accessing the constructor field. + bind_arg :: (Id, VirtualHpOffset) -> FCode LocalReg + bind_arg (arg, offset) + = do { emit $ mkTaggedObjectLoad (idToReg arg) base offset tag + ; bindArgToReg arg } + +bindConArgs _other_con _base args + = ASSERT( null args ) return [] + + + diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs new file mode 100644 index 0000000000..c43bf80174 --- /dev/null +++ b/compiler/codeGen/StgCmmEnv.hs @@ -0,0 +1,209 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation: the binding environment +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmEnv ( + CgIdInfo, + + cgIdInfoId, cgIdInfoLF, + + litIdInfo, lneIdInfo, regIdInfo, + idInfoToAmode, + + addBindC, addBindsC, + + bindArgsToRegs, bindToReg, rebindToReg, + bindArgToReg, idToReg, + getArgAmode, getNonVoidArgAmodes, + getCgIdInfo, + maybeLetNoEscape, + ) where + +#include "HsVersions.h" + +import StgCmmMonad +import StgCmmUtils +import StgCmmClosure + +import CLabel + +import BlockId +import Cmm +import CmmUtils +import FastString +import PprCmm ( {- instance Outputable -} ) +import Id +import VarEnv +import Maybes +import Name +import StgSyn +import Outputable + + + +------------------------------------- +-- Manipulating CgIdInfo +------------------------------------- + +mkCgIdInfo :: Id -> LambdaFormInfo -> CmmExpr -> CgIdInfo +mkCgIdInfo id lf expr + = CgIdInfo { cg_id = id, cg_loc = CmmLoc expr, + cg_lf = lf, cg_rep = idPrimRep id, + cg_tag = lfDynTag lf } + +lneIdInfo :: Id -> [LocalReg] -> CgIdInfo +lneIdInfo id regs + = CgIdInfo { cg_id = id, cg_loc = LneLoc blk_id regs, + cg_lf = lf, cg_rep = idPrimRep id, + cg_tag = lfDynTag lf } + where + lf = mkLFLetNoEscape + blk_id = mkBlockId (idUnique id) + +litIdInfo :: Id -> LambdaFormInfo -> CmmLit -> CgIdInfo +litIdInfo id lf_info lit = mkCgIdInfo id lf_info (CmmLit lit) + +regIdInfo :: Id -> LambdaFormInfo -> LocalReg -> CgIdInfo +regIdInfo id lf_info reg = mkCgIdInfo id lf_info (CmmReg (CmmLocal reg)) + +idInfoToAmode :: CgIdInfo -> CmmExpr +-- Returns a CmmExpr for the *tagged* pointer +idInfoToAmode (CgIdInfo { cg_loc = CmmLoc e, cg_tag = tag }) + = addDynTag e tag +idInfoToAmode cg_info + = pprPanic "idInfoToAmode" (ppr (cg_id cg_info)) -- LneLoc + +addDynTag :: CmmExpr -> DynTag -> CmmExpr +-- A tag adds a byte offset to the pointer +addDynTag expr tag = cmmOffsetB expr tag + +cgIdInfoId :: CgIdInfo -> Id +cgIdInfoId = cg_id + +cgIdInfoLF :: CgIdInfo -> LambdaFormInfo +cgIdInfoLF = cg_lf + +maybeLetNoEscape :: CgIdInfo -> Maybe (BlockId, [LocalReg]) +maybeLetNoEscape (CgIdInfo { cg_loc = LneLoc blk_id args}) = Just (blk_id, args) +maybeLetNoEscape _other = Nothing + + + +--------------------------------------------------------- +-- The binding environment +-- +-- There are three basic routines, for adding (addBindC), +-- modifying(modifyBindC) and looking up (getCgIdInfo) bindings. +--------------------------------------------------------- + +addBindC :: Id -> CgIdInfo -> FCode () +addBindC name stuff_to_bind = do + binds <- getBinds + setBinds $ extendVarEnv binds name stuff_to_bind + +addBindsC :: [(Id, CgIdInfo)] -> FCode () +addBindsC new_bindings = do + binds <- getBinds + let new_binds = foldl (\ binds (name,info) -> extendVarEnv binds name info) + binds + new_bindings + setBinds new_binds + +getCgIdInfo :: Id -> FCode CgIdInfo +getCgIdInfo id + = do { -- Try local bindings first + ; local_binds <- getBinds + ; case lookupVarEnv local_binds id of { + Just info -> return info ; + Nothing -> do + + { -- Try top-level bindings + static_binds <- getStaticBinds + ; case lookupVarEnv static_binds id of { + Just info -> return info ; + Nothing -> + + -- Should be imported; make up a CgIdInfo for it + let + name = idName id + in + if isExternalName name then do + let ext_lbl = CmmLabel (mkClosureLabel name $ idCafInfo id) + return (litIdInfo id (mkLFImported id) ext_lbl) + else + -- Bug + cgLookupPanic id + }}}} + +cgLookupPanic :: Id -> FCode a +cgLookupPanic id + = do static_binds <- getStaticBinds + local_binds <- getBinds + srt <- getSRTLabel + pprPanic "StgCmmEnv: variable not found" + (vcat [ppr id, + ptext (sLit "static binds for:"), + vcat [ ppr (cg_id info) | info <- varEnvElts static_binds ], + ptext (sLit "local binds for:"), + vcat [ ppr (cg_id info) | info <- varEnvElts local_binds ], + ptext (sLit "SRT label") <+> pprCLabel srt + ]) + + +-------------------- +getArgAmode :: StgArg -> FCode CmmExpr +getArgAmode (StgVarArg var) = do { info <- getCgIdInfo var; return (idInfoToAmode info) } +getArgAmode (StgLitArg lit) = return (CmmLit (mkSimpleLit lit)) +getArgAmode (StgTypeArg _) = panic "getArgAmode: type arg" + +getNonVoidArgAmodes :: [StgArg] -> FCode [CmmExpr] +-- NB: Filters out void args, +-- so the result list may be shorter than the argument list +getNonVoidArgAmodes [] = return [] +getNonVoidArgAmodes (arg:args) + | isVoidRep (argPrimRep arg) = getNonVoidArgAmodes args + | otherwise = do { amode <- getArgAmode arg + ; amodes <- getNonVoidArgAmodes args + ; return ( amode : amodes ) } + + +------------------------------------------------------------------------ +-- Interface functions for binding and re-binding names +------------------------------------------------------------------------ + +bindToReg :: Id -> LambdaFormInfo -> FCode LocalReg +-- Bind an Id to a fresh LocalReg +bindToReg id lf_info + = do { let reg = idToReg id + ; addBindC id (regIdInfo id lf_info reg) + ; return reg } + +rebindToReg :: Id -> FCode LocalReg +-- Like bindToReg, but the Id is already in scope, so +-- get its LF info from the envt +rebindToReg id + = do { info <- getCgIdInfo id + ; bindToReg id (cgIdInfoLF info) } + +bindArgToReg :: Id -> FCode LocalReg +bindArgToReg id = bindToReg id (mkLFArgument id) + +bindArgsToRegs :: [Id] -> FCode [LocalReg] +bindArgsToRegs args = mapM bindArgToReg args + +idToReg :: Id -> LocalReg +-- Make a register from an Id, typically a function argument, +-- free variable, or case binder +-- +-- We re-use the Unique from the Id to make it easier to see what is going on +-- +-- By now the Ids should be uniquely named; else one would worry +-- about accidental collision +idToReg id = LocalReg (idUnique id) + (primRepCmmType (idPrimRep id)) + + diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs new file mode 100644 index 0000000000..74c69b7216 --- /dev/null +++ b/compiler/codeGen/StgCmmExpr.hs @@ -0,0 +1,451 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C-- code generation: expressions +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmExpr ( cgExpr ) where + +#define FAST_STRING_NOT_NEEDED +#include "HsVersions.h" + +import {-# SOURCE #-} StgCmmBind ( cgBind ) + +import StgCmmMonad +import StgCmmHeap +import StgCmmEnv +import StgCmmCon +import StgCmmProf +import StgCmmLayout +import StgCmmPrim +import StgCmmHpc +import StgCmmTicky +import StgCmmUtils +import StgCmmClosure + +import StgSyn + +import MkZipCfgCmm +import BlockId +import Cmm() +import CmmExpr +import CoreSyn +import DataCon +import Id +import TyCon +import CostCentre ( CostCentreStack, currentCCS ) +import Maybes +import Util +import FastString +import Outputable + +------------------------------------------------------------------------ +-- cgExpr: the main function +------------------------------------------------------------------------ + +cgExpr :: StgExpr -> FCode () + +cgExpr (StgApp fun args) = cgIdApp fun args +cgExpr (StgOpApp op args ty) = cgOpApp op args ty +cgExpr (StgConApp con args) = cgConApp con args + +cgExpr (StgSCC cc expr) = do { emitSetCCC cc; cgExpr expr } +cgExpr (StgTick m n expr) = do { emit (mkTickBox m n); cgExpr expr } +cgExpr (StgLit lit) = emitReturn [CmmLit (mkSimpleLit lit)] + +cgExpr (StgLet binds expr) = do { emit (mkComment $ mkFastString "calling cgBind"); cgBind binds; emit (mkComment $ mkFastString "calling cgExpr"); cgExpr expr } +cgExpr (StgLetNoEscape _ _ binds expr) = do { cgLneBinds binds; cgExpr expr } + +cgExpr (StgCase expr _live_vars _save_vars bndr srt alt_type alts) + = cgCase expr bndr srt alt_type alts + +cgExpr (StgLam {}) = panic "cgExpr: StgLam" + +------------------------------------------------------------------------ +-- Let no escape +------------------------------------------------------------------------ + +{- Generating code for a let-no-escape binding, aka join point is very +very similar to whatwe do for a case expression. The duality is +between + let-no-escape x = b + in e +and + case e of ... -> b + +That is, the RHS of 'x' (ie 'b') will execute *later*, just like +the alternative of the case; it needs to be compiled in an environment +in which all volatile bindings are forgotten, and the free vars are +bound only to stable things like stack locations.. The 'e' part will +execute *next*, just like the scrutinee of a case. -} + +------------------------- +cgLneBinds :: StgBinding -> FCode () +cgLneBinds (StgNonRec bndr rhs) + = do { local_cc <- saveCurrentCostCentre + -- See Note [Saving the current cost centre] + ; (bndr,info) <- cgLetNoEscapeRhs local_cc bndr rhs + ; addBindC bndr info } + +cgLneBinds (StgRec pairs) + = do { local_cc <- saveCurrentCostCentre + ; new_bindings <- fixC (\ new_bindings -> do + { addBindsC new_bindings + ; listFCs [ cgLetNoEscapeRhs local_cc b e + | (b,e) <- pairs ] }) + + ; addBindsC new_bindings } + +------------------------- +cgLetNoEscapeRhs + :: Maybe LocalReg -- Saved cost centre + -> Id + -> StgRhs + -> FCode (Id, CgIdInfo) + +cgLetNoEscapeRhs local_cc bndr (StgRhsClosure cc _bi _ _upd srt args body) + = cgLetNoEscapeClosure bndr local_cc cc srt args body +cgLetNoEscapeRhs local_cc bndr (StgRhsCon cc con args) + = cgLetNoEscapeClosure bndr local_cc cc NoSRT [] (StgConApp con args) + -- For a constructor RHS we want to generate a single chunk of + -- code which can be jumped to from many places, which will + -- return the constructor. It's easy; just behave as if it + -- was an StgRhsClosure with a ConApp inside! + +------------------------- +cgLetNoEscapeClosure + :: Id -- binder + -> Maybe LocalReg -- Slot for saved current cost centre + -> CostCentreStack -- XXX: *** NOT USED *** why not? + -> SRT + -> [Id] -- Args (as in \ args -> body) + -> StgExpr -- Body (as in above) + -> FCode (Id, CgIdInfo) + +cgLetNoEscapeClosure bndr cc_slot _unused_cc srt args body + = do { arg_regs <- forkProc $ do + { restoreCurrentCostCentre cc_slot + ; arg_regs <- bindArgsToRegs args + ; c_srt <- getSRTInfo srt + ; altHeapCheck arg_regs c_srt (cgExpr body) + -- Using altHeapCheck just reduces + -- instructions to save on stack + ; return arg_regs } + ; return (bndr, lneIdInfo bndr arg_regs) } + + +------------------------------------------------------------------------ +-- Case expressions +------------------------------------------------------------------------ + +{- Note [Compiling case expressions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It is quite interesting to decide whether to put a heap-check at the +start of each alternative. Of course we certainly have to do so if +the case forces an evaluation, or if there is a primitive op which can +trigger GC. + +A more interesting situation is this (a Plan-B situation) + + !P!; + ...P... + case x# of + 0# -> !Q!; ...Q... + default -> !R!; ...R... + +where !x! indicates a possible heap-check point. The heap checks +in the alternatives *can* be omitted, in which case the topmost +heapcheck will take their worst case into account. + +In favour of omitting !Q!, !R!: + + - *May* save a heap overflow test, + if ...P... allocates anything. + + - We can use relative addressing from a single Hp to + get at all the closures so allocated. + + - No need to save volatile vars etc across heap checks + in !Q!, !R! + +Against omitting !Q!, !R! + + - May put a heap-check into the inner loop. Suppose + the main loop is P -> R -> P -> R... + Q is the loop exit, and only it does allocation. + This only hurts us if P does no allocation. If P allocates, + then there is a heap check in the inner loop anyway. + + - May do more allocation than reqd. This sometimes bites us + badly. For example, nfib (ha!) allocates about 30\% more space if the + worst-casing is done, because many many calls to nfib are leaf calls + which don't need to allocate anything. + + We can un-allocate, but that costs an instruction + +Neither problem hurts us if there is only one alternative. + +Suppose the inner loop is P->R->P->R etc. Then here is +how many heap checks we get in the *inner loop* under various +conditions + + Alooc Heap check in branches (!Q!, !R!)? + P Q R yes no (absorb to !P!) +-------------------------------------- + n n n 0 0 + n y n 0 1 + n . y 1 1 + y . y 2 1 + y . n 1 1 + +Best choices: absorb heap checks from Q and R into !P! iff + a) P itself does some allocation +or + b) P does allocation, or there is exactly one alternative + +We adopt (b) because that is more likely to put the heap check at the +entry to a function, when not many things are live. After a bunch of +single-branch cases, we may have lots of things live + +Hence: two basic plans for + + case e of r { alts } + +------ Plan A: the general case --------- + + ...save current cost centre... + + ...code for e, + with sequel (SetLocals r) + + ...restore current cost centre... + ...code for alts... + ...alts do their own heap checks + +------ Plan B: special case when --------- + (i) e does not allocate or call GC + (ii) either upstream code performs allocation + or there is just one alternative + + Then heap allocation in the (single) case branch + is absorbed by the upstream check. + Very common example: primops on unboxed values + + ...code for e, + with sequel (SetLocals r)... + + ...code for alts... + ...no heap check... +-} + + + +------------------------------------- +data GcPlan + = GcInAlts -- Put a GC check at the start the case alternatives, + [LocalReg] -- which binds these registers + SRT -- using this SRT + | NoGcInAlts -- The scrutinee is a primitive value, or a call to a + -- primitive op which does no GC. Absorb the allocation + -- of the case alternative(s) into the upstream check + +------------------------------------- +cgCase :: StgExpr -> Id -> SRT -> AltType -> [StgAlt] -> FCode () +cgCase scrut bndr srt alt_type alts + = do { up_hp_usg <- getVirtHp -- Upstream heap usage + ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts + alt_regs = map idToReg ret_bndrs + simple_scrut = isSimpleScrut scrut alt_type + gc_plan | not simple_scrut = GcInAlts alt_regs srt + | isSingleton alts = NoGcInAlts + | up_hp_usg > 0 = NoGcInAlts + | otherwise = GcInAlts alt_regs srt + + ; mb_cc <- maybeSaveCostCentre simple_scrut + ; c_srt <- getSRTInfo srt + ; withSequel (AssignTo alt_regs c_srt) + (cgExpr scrut) + ; restoreCurrentCostCentre mb_cc + + ; bindArgsToRegs ret_bndrs + ; cgAlts gc_plan bndr alt_type alts } + +----------------- +maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg) +maybeSaveCostCentre simple_scrut + | simple_scrut = saveCurrentCostCentre + | otherwise = return Nothing + + + +----------------- +isSimpleScrut :: StgExpr -> AltType -> Bool +-- Simple scrutinee, does not allocate +isSimpleScrut (StgOpApp _ _ _) _ = True +isSimpleScrut (StgLit _) _ = True +isSimpleScrut (StgApp _ []) (PrimAlt _) = True +isSimpleScrut _ _ = False + +----------------- +chooseReturnBndrs :: Id -> AltType -> [StgAlt] -> [Id] +-- These are the binders of a case that are assigned +-- by the evaluation of the scrutinee +-- Only non-void ones come back +chooseReturnBndrs bndr (PrimAlt _) _alts + = nonVoidIds [bndr] + +chooseReturnBndrs _bndr (UbxTupAlt _) [(_, ids, _, _)] + = nonVoidIds ids -- 'bndr' is not assigned! + +chooseReturnBndrs bndr (AlgAlt _) _alts + = [bndr] -- Only 'bndr' is assigned + +chooseReturnBndrs bndr PolyAlt _alts + = [bndr] -- Only 'bndr' is assigned + +chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs" + -- UbxTupALt has only one alternative + +nonVoidIds :: [Id] -> [Id] +nonVoidIds ids = [id | id <- ids, not (isVoidRep (idPrimRep id))] + +------------------------------------- +cgAlts :: GcPlan -> Id -> AltType -> [StgAlt] -> FCode () +-- At this point the result of the case are in the binders +cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)] + = maybeAltHeapCheck gc_plan (cgExpr rhs) + +cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)] + = maybeAltHeapCheck gc_plan (cgExpr rhs) + -- Here bndrs are *already* in scope, so don't rebind them + +cgAlts gc_plan bndr (PrimAlt _) alts + = do { tagged_cmms <- cgAltRhss gc_plan bndr alts + + ; let bndr_reg = CmmLocal (idToReg bndr) + (DEFAULT,deflt) = head tagged_cmms + -- PrimAlts always have a DEFAULT case + -- and it always comes first + + tagged_cmms' = [(lit,code) + | (LitAlt lit, code) <- tagged_cmms] + ; emit (mkCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt) } + +cgAlts gc_plan bndr (AlgAlt tycon) alts + = do { tagged_cmms <- cgAltRhss gc_plan bndr alts + + ; let fam_sz = tyConFamilySize tycon + bndr_reg = CmmLocal (idToReg bndr) + mb_deflt = case tagged_cmms of + ((DEFAULT,rhs) : _) -> Just rhs + _other -> Nothing + -- DEFAULT is always first, if present + + branches = [ (dataConTagZ con, cmm) + | (DataAlt con, cmm) <- tagged_cmms ] + + -- Is the constructor tag in the node reg? + ; if isSmallFamily fam_sz + then let -- Yes, bndr_reg has constr. tag in ls bits + tag_expr = cmmConstrTag1 (CmmReg bndr_reg) + branches' = [(tag+1,branch) | (tag,branch) <- branches] + in + emitSwitch tag_expr branches' mb_deflt 1 fam_sz + + else -- No, get tag from info table + let -- Note that ptr _always_ has tag 1 + -- when the family size is big enough + untagged_ptr = cmmRegOffB bndr_reg (-1) + tag_expr = getConstrTag (untagged_ptr) + in + emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) } + +cgAlts _ _ _ _ = panic "cgAlts" + -- UbxTupAlt and PolyAlt have only one alternative + +------------------- +cgAltRhss :: GcPlan -> Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] +cgAltRhss gc_plan bndr alts + = forkAlts (map cg_alt alts) + where + base_reg = idToReg bndr + cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph) + cg_alt (con, bndrs, _uses, rhs) + = getCodeR $ + maybeAltHeapCheck gc_plan $ + do { bindConArgs con base_reg bndrs + ; cgExpr rhs + ; return con } + +maybeAltHeapCheck :: GcPlan -> FCode a -> FCode a +maybeAltHeapCheck NoGcInAlts code + = code +maybeAltHeapCheck (GcInAlts regs srt) code + = do { c_srt <- getSRTInfo srt + ; altHeapCheck regs c_srt code } + +----------------------------------------------------------------------------- +-- Tail calls +----------------------------------------------------------------------------- + +cgConApp :: DataCon -> [StgArg] -> FCode () +cgConApp con stg_args + = ASSERT( stg_args `lengthIs` dataConRepArity con ) + do { idinfo <- buildDynCon (dataConWorkId con) currentCCS con stg_args + -- The first "con" says that the name bound to this closure is + -- is "con", which is a bit of a fudge, but it only affects profiling + + ; emitReturn [idInfoToAmode idinfo] } + +cgIdApp :: Id -> [StgArg] -> FCode () +cgIdApp fun_id args + = do { fun_info <- getCgIdInfo fun_id + ; case maybeLetNoEscape fun_info of + Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args + Nothing -> cgTailCall fun_id fun_info args } + +cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode () +cgLneJump blk_id lne_regs args -- Join point; discard sequel + = do { cmm_args <- getNonVoidArgAmodes args + ; emit (mkMultiAssign lne_regs cmm_args + <*> mkBranch blk_id) } + +cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode () +cgTailCall fun_id fun_info args + = case (getCallMethod fun_name (idCafInfo fun_id) lf_info (length args)) of + + -- A value in WHNF, so we can just return it. + ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged? + + EnterIt -> ASSERT( null args ) -- Discarding arguments + do { [ret,call] <- forkAlts [ + getCode $ emitReturn [fun], -- Is tagged; no need to untag + getCode $ emitCall (entryCode fun) [fun]] -- Not tagged + ; emit (mkCmmIfThenElse (cmmIsTagged fun) ret call) } + + SlowCall -> do -- A slow function call via the RTS apply routines + { tickySlowCall lf_info args + ; slowCall fun args } + + -- A direct function call (possibly with some left-over arguments) + DirectEntry lbl arity -> do + { tickyDirectCall arity args + ; if node_points then + do call <- getCode $ directCall lbl arity args + emit (mkAssign nodeReg fun <*> call) + -- directCall lbl (arity+1) (StgVarArg fun_id : args)) + -- >>= (emit . (mkComment (mkFastString "DirectEntry") <*>)) + else directCall lbl arity args } + + JumpToIt {} -> panic "cgTailCall" -- ??? + + where + fun_name = idName fun_id + fun = idInfoToAmode fun_info + lf_info = cgIdInfoLF fun_info + node_points = nodeMustPointToIt lf_info + + + diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs new file mode 100644 index 0000000000..2d5d79e6ff --- /dev/null +++ b/compiler/codeGen/StgCmmForeign.hs @@ -0,0 +1,316 @@ +{-# OPTIONS -w #-} +-- Lots of missing type sigs etc + +----------------------------------------------------------------------------- +-- +-- Code generation for foreign calls. +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmForeign ( + cgForeignCall, + emitPrimCall, emitCCall, + emitSaveThreadState, -- will be needed by the Cmm parser + emitLoadThreadState, -- ditto + emitCloseNursery, + emitOpenNursery, + ) where + +#include "HsVersions.h" + +import StgSyn +import StgCmmProf +import StgCmmEnv +import StgCmmMonad +import StgCmmUtils +import StgCmmClosure + +import MkZipCfgCmm +import Cmm +import CmmUtils +import Type +import TysPrim +import CLabel +import SMRep +import ForeignCall +import Constants +import StaticFlags +import Maybes +import Outputable + +import Control.Monad + +----------------------------------------------------------------------------- +-- Code generation for Foreign Calls +----------------------------------------------------------------------------- + +cgForeignCall :: [LocalReg] -- r1,r2 where to put the results + -> [ForeignHint] + -> ForeignCall -- the op + -> [StgArg] -- x,y arguments + -> FCode () +-- Emits code for an unsafe foreign call: r1, r2 = foo( x, y, z ) + +cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_args + = do { cmm_args <- getFCallArgs stg_args + ; let (args, arg_hints) = unzip cmm_args + fc = ForeignConvention cconv arg_hints result_hints + (call_args, cmm_target) + = case target of + StaticTarget lbl -> (args, CmmLit (CmmLabel + (mkForeignLabel lbl (call_size args) False))) + DynamicTarget -> case args of fn:rest -> (rest, fn) + call_target = ForeignTarget cmm_target fc + + ; srt <- getSRTInfo (panic "emitForeignCall") -- SLPJ: Not sure what SRT + -- is right here + ; emitForeignCall safety results call_target call_args srt CmmMayReturn } + where + -- in the stdcall calling convention, the symbol needs @size appended + -- to it, where size is the total number of bytes of arguments. We + -- attach this info to the CLabel here, and the CLabel pretty printer + -- will generate the suffix when the label is printed. + call_size args + | StdCallConv <- cconv = Just (sum (map arg_size args)) + | otherwise = Nothing + + -- ToDo: this might not be correct for 64-bit API + arg_size arg = max (widthInBytes $ typeWidth $ cmmExprType arg) wORD_SIZE + +cgForeignCall _ _ (DNCall _) _ + = panic "cgForeignCall: DNCall" + +emitCCall :: [(CmmFormal,ForeignHint)] + -> CmmExpr + -> [(CmmActual,ForeignHint)] + -> FCode () +emitCCall hinted_results fn hinted_args + = emitForeignCall PlayRisky results (ForeignTarget fn fc) args + NoC_SRT -- No SRT b/c we PlayRisky + CmmMayReturn + where + (args, arg_hints) = unzip hinted_args + (results, result_hints) = unzip hinted_results + target = ForeignTarget fn fc + fc = ForeignConvention CCallConv arg_hints result_hints + + +emitPrimCall :: CmmFormal -> CallishMachOp -> CmmActuals -> FCode () +emitPrimCall res op args + = emitForeignCall PlayRisky [res] (PrimTarget op) args NoC_SRT CmmMayReturn + +-- alternative entry point, used by CmmParse +emitForeignCall + :: Safety + -> CmmFormals -- where to put the results + -> MidCallTarget -- the op + -> CmmActuals -- arguments + -> C_SRT -- the SRT of the calls continuation + -> CmmReturnInfo -- This can say "never returns" + -- only RTS procedures do this + -> FCode () +emitForeignCall safety results target args _srt _ret + | not (playSafe safety) = trace "emitForeignCall; ret is undone" $ do + let (caller_save, caller_load) = callerSaveVolatileRegs + emit caller_save + emit (mkUnsafeCall target results args) + emit caller_load + + | otherwise = panic "ToDo: emitForeignCall'" + +{- + | otherwise = do + -- Both 'id' and 'new_base' are KindNonPtr because they're + -- RTS only objects and are not subject to garbage collection + id <- newTemp bWord + new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) + temp_target <- load_target_into_temp target + let (caller_save, caller_load) = callerSaveVolatileRegs + emitSaveThreadState + emit caller_save + -- The CmmUnsafe arguments are only correct because this part + -- of the code hasn't been moved into the CPS pass yet. + -- Once that happens, this function will just emit a (CmmSafe srt) call, + -- and the CPS will will be the one to convert that + -- to this sequence of three CmmUnsafe calls. + emit (mkCmmCall (CmmCallee suspendThread CCallConv) + [ (id,AddrHint) ] + [ (CmmReg (CmmGlobal BaseReg), AddrHint) ] + CmmUnsafe + ret) + emit (mkCmmCall temp_target results args CmmUnsafe ret) + emit (mkCmmCall (CmmCallee resumeThread CCallConv) + [ (new_base, AddrHint) ] + [ (CmmReg (CmmLocal id), AddrHint) ] + CmmUnsafe + ret ) + -- Assign the result to BaseReg: we + -- might now have a different Capability! + emit (mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base))) + emit caller_load + emitLoadThreadState + +suspendThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("suspendThread"))) +resumeThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("resumeThread"))) +-} + + +{- +-- THINK ABOUT THIS (used to happen) +-- we might need to load arguments into temporaries before +-- making the call, because certain global registers might +-- overlap with registers that the C calling convention uses +-- for passing arguments. +-- +-- This is a HACK; really it should be done in the back end, but +-- it's easier to generate the temporaries here. +load_args_into_temps = mapM arg_assign_temp + where arg_assign_temp (e,hint) = do + tmp <- maybe_assign_temp e + return (tmp,hint) + +load_target_into_temp (CmmCallee expr conv) = do + tmp <- maybe_assign_temp expr + return (CmmCallee tmp conv) +load_target_into_temp other_target = + return other_target + +maybe_assign_temp e + | hasNoGlobalRegs e = return e + | otherwise = do + -- don't use assignTemp, it uses its own notion of "trivial" + -- expressions, which are wrong here. + -- this is a NonPtr because it only duplicates an existing + reg <- newTemp (cmmExprType e) --TODO FIXME NOW + emit (mkAssign (CmmLocal reg) e) + return (CmmReg (CmmLocal reg)) +-} + +-- ----------------------------------------------------------------------------- +-- Save/restore the thread state in the TSO + +-- This stuff can't be done in suspendThread/resumeThread, because it +-- refers to global registers which aren't available in the C world. + +emitSaveThreadState :: FCode () +emitSaveThreadState = do + -- CurrentTSO->sp = Sp; + emit $ mkStore (cmmOffset stgCurrentTSO tso_SP) stgSp + emitCloseNursery + -- and save the current cost centre stack in the TSO when profiling: + when opt_SccProfilingOn $ + emit (mkStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS) + + -- CurrentNursery->free = Hp+1; +emitCloseNursery :: FCode () +emitCloseNursery = emit $ mkStore nursery_bdescr_free (cmmOffsetW stgHp 1) + +emitLoadThreadState :: FCode () +emitLoadThreadState = do + tso <- newTemp gcWord -- TODO FIXME NOW + emit $ catAGraphs [ + -- tso = CurrentTSO; + mkAssign (CmmLocal tso) stgCurrentTSO, + -- Sp = tso->sp; + mkAssign sp (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_SP) + bWord), + -- SpLim = tso->stack + RESERVED_STACK_WORDS; + mkAssign spLim (cmmOffsetW (cmmOffset (CmmReg (CmmLocal tso)) tso_STACK) + rESERVED_STACK_WORDS) + ] + emitOpenNursery + -- and load the current cost centre stack from the TSO when profiling: + when opt_SccProfilingOn $ + emit (mkStore curCCSAddr + (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) ccsType)) + +emitOpenNursery :: FCode () +emitOpenNursery = emit $ catAGraphs [ + -- Hp = CurrentNursery->free - 1; + mkAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free bWord) (-1)), + + -- HpLim = CurrentNursery->start + + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; + mkAssign hpLim + (cmmOffsetExpr + (CmmLoad nursery_bdescr_start bWord) + (cmmOffset + (CmmMachOp mo_wordMul [ + CmmMachOp (MO_SS_Conv W32 wordWidth) + [CmmLoad nursery_bdescr_blocks b32], + CmmLit (mkIntCLit bLOCK_SIZE) + ]) + (-1) + ) + ) + ] + + +nursery_bdescr_free = cmmOffset stgCurrentNursery oFFSET_bdescr_free +nursery_bdescr_start = cmmOffset stgCurrentNursery oFFSET_bdescr_start +nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks + +tso_SP = tsoFieldB oFFSET_StgTSO_sp +tso_STACK = tsoFieldB oFFSET_StgTSO_stack +tso_CCCS = tsoProfFieldB oFFSET_StgTSO_CCCS + +-- The TSO struct has a variable header, and an optional StgTSOProfInfo in +-- the middle. The fields we're interested in are after the StgTSOProfInfo. +tsoFieldB :: ByteOff -> ByteOff +tsoFieldB off + | opt_SccProfilingOn = off + sIZEOF_StgTSOProfInfo + fixedHdrSize * wORD_SIZE + | otherwise = off + fixedHdrSize * wORD_SIZE + +tsoProfFieldB :: ByteOff -> ByteOff +tsoProfFieldB off = off + fixedHdrSize * wORD_SIZE + +stgSp = CmmReg sp +stgHp = CmmReg hp +stgCurrentTSO = CmmReg currentTSO +stgCurrentNursery = CmmReg currentNursery + +sp = CmmGlobal Sp +spLim = CmmGlobal SpLim +hp = CmmGlobal Hp +hpLim = CmmGlobal HpLim +currentTSO = CmmGlobal CurrentTSO +currentNursery = CmmGlobal CurrentNursery + +-- ----------------------------------------------------------------------------- +-- For certain types passed to foreign calls, we adjust the actual +-- value passed to the call. For ByteArray#/Array# we pass the +-- address of the actual array, not the address of the heap object. + +getFCallArgs :: [StgArg] -> FCode [(CmmExpr, ForeignHint)] +-- (a) Drop void args +-- (b) Add foriegn-call shim code +-- It's (b) that makes this differ from getNonVoidArgAmodes + +getFCallArgs args + = do { mb_cmms <- mapM get args + ; return (catMaybes mb_cmms) } + where + get arg | isVoidRep arg_rep + = return Nothing + | otherwise + = do { cmm <- getArgAmode arg + ; return (Just (add_shim arg_ty cmm, hint)) } + where + arg_ty = stgArgType arg + arg_rep = typePrimRep arg_ty + hint = typeForeignHint arg_ty + +add_shim :: Type -> CmmExpr -> CmmExpr +add_shim arg_ty expr + | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon + = cmmOffsetB expr arrPtrsHdrSize + + | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon + = cmmOffsetB expr arrWordsHdrSize + + | otherwise = expr + where + tycon = tyConAppTyCon (repType arg_ty) + -- should be a tycon app, since this is a foreign call diff --git a/compiler/codeGen/StgCmmGran.hs b/compiler/codeGen/StgCmmGran.hs new file mode 100644 index 0000000000..5fad2bfc09 --- /dev/null +++ b/compiler/codeGen/StgCmmGran.hs @@ -0,0 +1,131 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow -2006 +-- +-- Code generation relaed to GpH +-- (a) parallel +-- (b) GranSim +-- +----------------------------------------------------------------------------- + +{-# OPTIONS -w #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + +module StgCmmGran ( + staticGranHdr,staticParHdr, + granThunk, granYield, + doGranAllocate + ) where + +-- This entire module consists of no-op stubs at the moment +-- GranSim worked once, but it certainly doesn't any more +-- I've left the calls, though, in case anyone wants to resurrect it + +import StgCmmMonad +import Id +import Cmm + +staticGranHdr :: [CmmLit] +staticGranHdr = [] + +staticParHdr :: [CmmLit] +staticParHdr = [] + +doGranAllocate :: VirtualHpOffset -> FCode () +-- Must be lazy in the amount of allocation +doGranAllocate n = return () + +granFetchAndReschedule :: [(Id,GlobalReg)] -> Bool -> FCode () +granFetchAndReschedule regs node_reqd = return () + +granYield :: [LocalReg] -> Bool -> FCode () +granYield regs node_reqd = return () + +granThunk :: Bool -> FCode () +granThunk node_points = return () + +----------------------------------------------------------------- +{- ------- Everything below here is commented out ------------- +----------------------------------------------------------------- + +-- Parallel header words in a static closure +staticParHdr :: [CmmLit] +-- Parallel header words in a static closure +staticParHdr = [] + +staticGranHdr :: [CmmLit] +-- Gransim header words in a static closure +staticGranHdr = [] + +doGranAllocate :: CmmExpr -> Code +-- macro DO_GRAN_ALLOCATE +doGranAllocate hp + | not opt_GranMacros = nopC + | otherwise = panic "doGranAllocate" + + + +------------------------- +granThunk :: Bool -> FCode () +-- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node +-- (we prefer fetchAndReschedule-style context switches to yield ones) +granThunk node_points + | node_points = granFetchAndReschedule [] node_points + | otherwise = granYield [] node_points + +granFetchAndReschedule :: [(Id,GlobalReg)] -- Live registers + -> Bool -- Node reqd? + -> Code +-- Emit code for simulating a fetch and then reschedule. +granFetchAndReschedule regs node_reqd + | opt_GranMacros && (node `elem` map snd regs || node_reqd) + = do { fetch + ; reschedule liveness node_reqd } + | otherwise + = nopC + where + liveness = mkRegLiveness regs 0 0 + +fetch = panic "granFetch" + -- Was: absC (CMacroStmt GRAN_FETCH []) + --HWL: generate GRAN_FETCH macro for GrAnSim + -- currently GRAN_FETCH and GRAN_FETCH_AND_RESCHEDULE are miai + +reschedule liveness node_reqd = panic "granReschedule" + -- Was: absC (CMacroStmt GRAN_RESCHEDULE [ + -- mkIntCLit (I# (word2Int# liveness_mask)), + -- mkIntCLit (if node_reqd then 1 else 0)]) + + +------------------------- +-- The @GRAN_YIELD@ macro is taken from JSM's code for Concurrent Haskell. It +-- allows to context-switch at places where @node@ is not alive (it uses the +-- @Continue@ rather than the @EnterNodeCode@ function in the RTS). We emit +-- this kind of macro at the beginning of the following kinds of basic bocks: +-- \begin{itemize} +-- \item Slow entry code where node is not alive (see @CgClosure.lhs@). Normally +-- we use @fetchAndReschedule@ at a slow entry code. +-- \item Fast entry code (see @CgClosure.lhs@). +-- \item Alternatives in case expressions (@CLabelledCode@ structures), provided +-- that they are not inlined (see @CgCases.lhs@). These alternatives will +-- be turned into separate functions. + +granYield :: [(Id,GlobalReg)] -- Live registers + -> Bool -- Node reqd? + -> Code + +granYield regs node_reqd + | opt_GranMacros && node_reqd = yield liveness + | otherwise = nopC + where + liveness = mkRegLiveness regs 0 0 + +yield liveness = panic "granYield" + -- Was : absC (CMacroStmt GRAN_YIELD + -- [mkIntCLit (I# (word2Int# liveness_mask))]) + +-} diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs new file mode 100644 index 0000000000..6a8a4354e1 --- /dev/null +++ b/compiler/codeGen/StgCmmHeap.hs @@ -0,0 +1,519 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C--: heap management functions +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmHeap ( + getVirtHp, setVirtHp, setRealHp, + getHpRelOffset, hpRel, + + entryHeapCheck, altHeapCheck, + + layOutDynConstr, layOutStaticConstr, + mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure, + + allocDynClosure, emitSetDynHdr + ) where + +#include "HsVersions.h" + +import StgSyn +import CLabel +import StgCmmLayout +import StgCmmUtils +import StgCmmMonad +import StgCmmProf +import StgCmmTicky +import StgCmmGran +import StgCmmClosure +import StgCmmEnv + +import MkZipCfgCmm + +import SMRep +import CmmExpr +import CmmUtils +import DataCon +import TyCon +import CostCentre +import Outputable +import FastString( LitString, mkFastString, sLit ) +import Constants +import Data.List + + +----------------------------------------------------------- +-- Layout of heap objects +----------------------------------------------------------- + +layOutDynConstr, layOutStaticConstr + :: DataCon -> [(PrimRep, a)] + -> (ClosureInfo, [(a, VirtualHpOffset)]) +-- No Void arguments in result + +layOutDynConstr = layOutConstr False +layOutStaticConstr = layOutConstr True + +layOutConstr :: Bool -> DataCon -> [(PrimRep, a)] + -> (ClosureInfo, [(a, VirtualHpOffset)]) +layOutConstr is_static data_con args + = (mkConInfo is_static data_con tot_wds ptr_wds, + things_w_offsets) + where + (tot_wds, -- #ptr_wds + #nonptr_wds + ptr_wds, -- #ptr_wds + things_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} args + + +----------------------------------------------------------- +-- Initialise dynamic heap objects +----------------------------------------------------------- + +allocDynClosure + :: ClosureInfo + -> CmmExpr -- Cost Centre to stick in the object + -> CmmExpr -- Cost Centre to blame for this alloc + -- (usually the same; sometimes "OVERHEAD") + + -> [(StgArg, VirtualHpOffset)] -- Offsets from start of the object + -- ie Info ptr has offset zero. + -- No void args in here + -> FCode LocalReg + +-- allocDynClosure allocates the thing in the heap, +-- and modifies the virtual Hp to account for this. + +-- Note [Return a LocalReg] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- allocDynClosure returns a LocalReg, not a (Hp+8) CmmExpr. +-- Reason: +-- ...allocate object... +-- obj = Hp + 8 +-- y = f(z) +-- ...here obj is still valid, +-- but Hp+8 means something quite different... + + +allocDynClosure cl_info use_cc _blame_cc args_w_offsets + = do { virt_hp <- getVirtHp + + -- SAY WHAT WE ARE ABOUT TO DO + ; tickyDynAlloc cl_info + ; profDynAlloc cl_info use_cc + -- ToDo: This is almost certainly wrong + -- We're ignoring blame_cc. But until we've + -- fixed the boxing hack in chooseDynCostCentres etc, + -- we're worried about making things worse by "fixing" + -- this part to use blame_cc! + + -- FIND THE OFFSET OF THE INFO-PTR WORD + ; let info_offset = virt_hp + 1 + -- info_offset is the VirtualHpOffset of the first + -- word of the new object + -- Remember, virtHp points to last allocated word, + -- ie 1 *before* the info-ptr word of new object. + + info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info)) + + -- ALLOCATE THE OBJECT + ; base <- getHpRelOffset info_offset + ; emit (mkComment $ mkFastString "allocDynClosure") + ; emitSetDynHdr base info_ptr use_cc + ; let (args, offsets) = unzip args_w_offsets + ; cmm_args <- mapM getArgAmode args -- No void args + ; hpStore base cmm_args offsets + + -- BUMP THE VIRTUAL HEAP POINTER + ; setVirtHp (virt_hp + closureSize cl_info) + + -- Assign to a temporary and return + -- Note [Return a LocalReg] + ; hp_rel <- getHpRelOffset info_offset + ; assignTemp hp_rel } + +emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () +emitSetDynHdr base info_ptr ccs + = hpStore base header [0..] + where + header :: [CmmExpr] + header = [info_ptr] ++ dynProfHdr ccs + -- ToDo: Gransim stuff + -- ToDo: Parallel stuff + -- No ticky header + +hpStore :: CmmExpr -> [CmmExpr] -> [VirtualHpOffset] -> FCode () +-- Store the item (expr,off) in base[off] +hpStore base vals offs + = emit (catAGraphs (zipWith mk_store vals offs)) + where + mk_store val off = mkStore (cmmOffsetW base off) val + + +----------------------------------------------------------- +-- Layout of static closures +----------------------------------------------------------- + +-- Make a static closure, adding on any extra padding needed for CAFs, +-- and adding a static link field if necessary. + +mkStaticClosureFields + :: ClosureInfo + -> CostCentreStack + -> Bool -- Has CAF refs + -> [CmmLit] -- Payload + -> [CmmLit] -- The full closure +mkStaticClosureFields cl_info ccs caf_refs payload + = mkStaticClosure info_lbl ccs payload padding_wds + static_link_field saved_info_field + where + info_lbl = infoTableLabelFromCI cl_info + + -- CAFs must have consistent layout, regardless of whether they + -- are actually updatable or not. The layout of a CAF is: + -- + -- 3 saved_info + -- 2 static_link + -- 1 indirectee + -- 0 info ptr + -- + -- the static_link and saved_info fields must always be in the same + -- place. So we use closureNeedsUpdSpace rather than + -- closureUpdReqd here: + + is_caf = closureNeedsUpdSpace cl_info + + padding_wds + | not is_caf = [] + | otherwise = ASSERT(null payload) [mkIntCLit 0] + + static_link_field + | is_caf || staticClosureNeedsLink cl_info = [static_link_value] + | otherwise = [] + + saved_info_field + | is_caf = [mkIntCLit 0] + | otherwise = [] + + -- for a static constructor which has NoCafRefs, we set the + -- static link field to a non-zero value so the garbage + -- collector will ignore it. + static_link_value + | caf_refs = mkIntCLit 0 + | otherwise = mkIntCLit 1 + + +mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit] + -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] +mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field + = [CmmLabel info_lbl] + ++ variable_header_words + ++ payload + ++ padding_wds + ++ static_link_field + ++ saved_info_field + where + variable_header_words + = staticGranHdr + ++ staticParHdr + ++ staticProfHdr ccs + ++ staticTickyHdr + +----------------------------------------------------------- +-- Heap overflow checking +----------------------------------------------------------- + +{- Note [Heap checks] + ~~~~~~~~~~~~~~~~~~ +Heap checks come in various forms. We provide the following entry +points to the runtime system, all of which use the native C-- entry +convention. + + * gc() performs garbage collection and returns + nothing to its caller + + * A series of canned entry points like + r = gc_1p( r ) + where r is a pointer. This performs gc, and + then returns its argument r to its caller. + + * A series of canned entry points like + gcfun_2p( f, x, y ) + where f is a function closure of arity 2 + This performs garbage collection, keeping alive the + three argument ptrs, and then tail-calls f(x,y) + +These are used in the following circumstances + +* entryHeapCheck: Function entry + (a) With a canned GC entry sequence + f( f_clo, x:ptr, y:ptr ) { + Hp = Hp+8 + if Hp > HpLim goto L + ... + L: HpAlloc = 8 + jump gcfun_2p( f_clo, x, y ) } + Note the tail call to the garbage collector; + it should do no register shuffling + + (b) No canned sequence + f( f_clo, x:ptr, y:ptr, ...etc... ) { + T: Hp = Hp+8 + if Hp > HpLim goto L + ... + L: HpAlloc = 8 + call gc() -- Needs an info table + goto T } + +* altHeapCheck: Immediately following an eval + Started as + case f x y of r { (p,q) -> rhs } + (a) With a canned sequence for the results of f + (which is the very common case since + all boxed cases return just one pointer + ... + r = f( x, y ) + K: -- K needs an info table + Hp = Hp+8 + if Hp > HpLim goto L + ...code for rhs... + + L: r = gc_1p( r ) + goto K } + + Here, the info table needed by the call + to gc_1p should be the *same* as the + one for the call to f; the C-- optimiser + spots this sharing opportunity + + (b) No canned sequence for results of f + Note second info table + ... + (r1,r2,r3) = call f( x, y ) + K: + Hp = Hp+8 + if Hp > HpLim goto L + ...code for rhs... + + L: call gc() -- Extra info table here + goto K + +* generalHeapCheck: Anywhere else + e.g. entry to thunk + case branch *not* following eval, + or let-no-escape + Exactly the same as the previous case: + + K: -- K needs an info table + Hp = Hp+8 + if Hp > HpLim goto L + ... + + L: call gc() + goto K +-} + +-------------------------------------------------------------- +-- A heap/stack check at a function or thunk entry point. + +entryHeapCheck :: LocalReg -- Function + -> [LocalReg] -- Args (empty for thunk) + -> C_SRT + -> FCode () + -> FCode () + +entryHeapCheck fun args srt code + = heapCheck gc_call code -- The 'fun' keeps relevant CAFs alive + where + gc_call + | null args = mkJump (CmmReg (CmmGlobal GCEnter1)) [CmmReg (CmmLocal fun)] + | otherwise = case gc_lbl args of + Just lbl -> mkJump (CmmLit (CmmLabel (mkRtsCodeLabel lbl))) + (map (CmmReg . CmmLocal) (fun:args)) + Nothing -> mkCmmCall generic_gc [] [] srt + + gc_lbl :: [LocalReg] -> Maybe LitString + gc_lbl [reg] + | isGcPtrType ty = Just (sLit "stg_gc_unpt_r1") -- "stg_gc_fun_1p" + | isFloatType ty = case width of + W32 -> Just (sLit "stg_gc_f1") -- "stg_gc_fun_f1" + W64 -> Just (sLit "stg_gc_d1") -- "stg_gc_fun_d1" + _other -> Nothing + | otherwise = case width of + W32 -> Just (sLit "stg_gc_unbx_r1") -- "stg_gc_fun_unbx_r1" + W64 -> Just (sLit "stg_gc_l1") -- "stg_gc_fun_unbx_l1" + _other -> Nothing -- Narrow cases + where + ty = localRegType reg + width = typeWidth ty + + gc_lbl regs = gc_lbl_ptrs (map (isGcPtrType . localRegType) regs) + + gc_lbl_ptrs :: [Bool] -> Maybe LitString + -- JD: TEMPORARY -- UNTIL THOSE FUNCTIONS EXIST... + --gc_lbl_ptrs [True,True] = Just (sLit "stg_gc_fun_2p") + --gc_lbl_ptrs [True,True,True] = Just (sLit "stg_gc_fun_3p") + gc_lbl_ptrs _ = Nothing + + +altHeapCheck :: [LocalReg] -> C_SRT -> FCode a -> FCode a +altHeapCheck regs srt code + = heapCheck gc_call code + where + gc_call + | null regs = mkCmmCall generic_gc [] [] srt + + | Just gc_lbl <- rts_label regs -- Canned call + = mkCmmCall (CmmLit (CmmLabel (mkRtsCodeLabel gc_lbl))) + regs + (map (CmmReg . CmmLocal) regs) + srt + | otherwise -- No canned call, and non-empty live vars + = mkCmmCall generic_gc [] [] srt + + rts_label [reg] + | isGcPtrType ty = Just (sLit "stg_gc_unpt_r1") + | isFloatType ty = case width of + W32 -> Just (sLit "stg_gc_f1") + W64 -> Just (sLit "stg_gc_d1") + _other -> Nothing + | otherwise = case width of + W32 -> Just (sLit "stg_gc_unbx_r1") + W64 -> Just (sLit "stg_gc_unbx_l1") + _other -> Nothing -- Narrow cases + where + ty = localRegType reg + width = typeWidth ty + + rts_label _ = Nothing + + +generic_gc :: CmmExpr -- The generic GC procedure; no params, no resuls +generic_gc = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_fun"))) + +------------------------------- +heapCheck :: CmmAGraph -> FCode a -> FCode a +heapCheck do_gc code + = getHeapUsage $ \ hpHw -> + do { emit (do_checks hpHw do_gc) + -- Emit heap checks, but be sure to do it lazily so + -- that the conditionals on hpHw don't cause a black hole + ; tickyAllocHeap hpHw + ; doGranAllocate hpHw + ; setRealHp hpHw + ; code } + +do_checks :: WordOff -- Heap headroom + -> CmmAGraph -- What to do on failure + -> CmmAGraph +do_checks 0 _ + = mkNop +do_checks alloc do_gc + = withFreshLabel "gc" $ \ blk_id -> + mkLabel blk_id Nothing + <*> mkAssign hpReg bump_hp + <*> mkCmmIfThen hp_oflo + (save_alloc + <*> do_gc + <*> mkBranch blk_id) + -- Bump heap pointer, and test for heap exhaustion + -- Note that we don't move the heap pointer unless the + -- stack check succeeds. Otherwise we might end up + -- with slop at the end of the current block, which can + -- confuse the LDV profiler. + where + alloc_lit = CmmLit (mkIntCLit (alloc*wORD_SIZE)) -- Bytes + bump_hp = cmmOffsetExprB (CmmReg hpReg) alloc_lit + + -- Hp overflow if (Hp > HpLim) + -- (Hp has been incremented by now) + -- HpLim points to the LAST WORD of valid allocation space. + hp_oflo = CmmMachOp mo_wordUGt + [CmmReg hpReg, CmmReg (CmmGlobal HpLim)] + + save_alloc = mkAssign (CmmGlobal HpAlloc) alloc_lit + +{- + +{- Unboxed tuple alternatives and let-no-escapes (the two most annoying +constructs to generate code for!) For unboxed tuple returns, there +are an arbitrary number of possibly unboxed return values, some of +which will be in registers, and the others will be on the stack. We +always organise the stack-resident fields into pointers & +non-pointers, and pass the number of each to the heap check code. -} + +unbxTupleHeapCheck + :: [(Id, GlobalReg)] -- Live registers + -> WordOff -- no. of stack slots containing ptrs + -> WordOff -- no. of stack slots containing nonptrs + -> CmmAGraph -- code to insert in the failure path + -> FCode () + -> FCode () + +unbxTupleHeapCheck regs ptrs nptrs fail_code code + -- We can't manage more than 255 pointers/non-pointers + -- in a generic heap check. + | ptrs > 255 || nptrs > 255 = panic "altHeapCheck" + | otherwise + = initHeapUsage $ \ hpHw -> do + { codeOnly $ do { do_checks 0 {- no stack check -} hpHw + full_fail_code rts_label + ; tickyAllocHeap hpHw } + ; setRealHp hpHw + ; code } + where + full_fail_code = fail_code `plusStmts` oneStmt assign_liveness + assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho! + (CmmLit (mkWordCLit liveness)) + liveness = mkRegLiveness regs ptrs nptrs + rts_label = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_ut"))) + + +{- Old Gransim comment -- I have no idea whether it still makes sense (SLPJ Sep07) +For GrAnSim the code for doing a heap check and doing a context switch +has been separated. Especially, the HEAP_CHK macro only performs a +heap check. THREAD_CONTEXT_SWITCH should be used for doing a context +switch. GRAN_FETCH_AND_RESCHEDULE must be put at the beginning of +every slow entry code in order to simulate the fetching of +closures. If fetching is necessary (i.e. current closure is not local) +then an automatic context switch is done. -} + + +When failing a check, we save a return address on the stack and +jump to a pre-compiled code fragment that saves the live registers +and returns to the scheduler. + +The return address in most cases will be the beginning of the basic +block in which the check resides, since we need to perform the check +again on re-entry because someone else might have stolen the resource +in the meantime. + +%************************************************************************ +%* * + Generic Heap/Stack Checks - used in the RTS +%* * +%************************************************************************ + +\begin{code} +hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () +hpChkGen bytes liveness reentry + = do_checks' bytes True assigns stg_gc_gen + where + assigns = mkStmts [ + CmmAssign (CmmGlobal (VanillaReg 9)) liveness, + CmmAssign (CmmGlobal (VanillaReg 10)) reentry + ] + +-- a heap check where R1 points to the closure to enter on return, and +-- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP). +hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> FCode () +hpChkNodePointsAssignSp0 bytes sp0 + = do_checks' bytes True assign stg_gc_enter1 + where assign = oneStmt (CmmStore (CmmReg spReg) sp0) + +stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel (sLit "stg_gc_gen"))) +\end{code} + +-} diff --git a/compiler/codeGen/StgCmmHpc.hs b/compiler/codeGen/StgCmmHpc.hs new file mode 100644 index 0000000000..0205bd0911 --- /dev/null +++ b/compiler/codeGen/StgCmmHpc.hs @@ -0,0 +1,83 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for coverage +-- +-- (c) Galois Connections, Inc. 2006 +-- +----------------------------------------------------------------------------- + +{-# OPTIONS -w #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + +module StgCmmHpc ( initHpc, mkTickBox ) where + +import StgCmmUtils +import StgCmmMonad +import StgCmmForeign +import StgCmmClosure + +import MkZipCfgCmm +import Cmm +import CLabel +import Module +import CmmUtils +import ForeignCall +import FastString +import HscTypes +import Char +import StaticFlags +import PackageConfig + +mkTickBox :: Module -> Int -> CmmAGraph +mkTickBox mod n + = mkStore tick_box (CmmMachOp (MO_Add W64) + [ CmmLoad tick_box b64 + , CmmLit (CmmInt 1 W64) + ]) + where + tick_box = cmmIndex W64 + (CmmLit $ CmmLabel $ mkHpcTicksLabel $ mod) + (fromIntegral n) + +initHpc :: Module -> HpcInfo -> FCode CmmAGraph +-- Emit top-level tables for HPC and return code to initialise +initHpc this_mod (NoHpcInfo {}) + = return mkNop +initHpc this_mod (HpcInfo tickCount hashNo) + = getCode $ whenC opt_Hpc $ + do { emitData ReadOnlyData + [ CmmDataLabel mkHpcModuleNameLabel + , CmmString $ map (fromIntegral . ord) + (full_name_str) + ++ [0] + ] + ; emitData Data $ [ CmmDataLabel (mkHpcTicksLabel this_mod) + ] ++ + [ CmmStaticLit (CmmInt 0 W64) + | _ <- take tickCount [0::Int ..] + ] + + ; id <- newTemp bWord -- TODO FIXME NOW + ; emitCCall + [(id,NoHint)] + (CmmLit $ CmmLabel $ mkForeignLabel mod_alloc Nothing False) + [ (mkLblExpr mkHpcModuleNameLabel,AddrHint) + , (CmmLit $ mkIntCLit tickCount,NoHint) + , (CmmLit $ mkIntCLit hashNo,NoHint) + , (CmmLit $ CmmLabel $ mkHpcTicksLabel $ this_mod,AddrHint) + ] + } + where + mod_alloc = mkFastString "hs_hpc_module" + module_name_str = moduleNameString (Module.moduleName this_mod) + full_name_str = if modulePackageId this_mod == mainPackageId + then module_name_str + else packageIdString (modulePackageId this_mod) ++ "/" ++ + module_name_str + + + diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs new file mode 100644 index 0000000000..f8d39646d6 --- /dev/null +++ b/compiler/codeGen/StgCmmLayout.hs @@ -0,0 +1,618 @@ +----------------------------------------------------------------------------- +-- +-- Building info tables. +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +{-# OPTIONS #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + +module StgCmmLayout ( + mkArgDescr, + emitCall, emitReturn, + + emitClosureCodeAndInfoTable, + + slowCall, directCall, + + mkVirtHeapOffsets, getHpRelOffset, hpRel, + + stdInfoTableSizeB, + entryCode, closureInfoPtr, + getConstrTag, + cmmGetClosureType, + infoTable, infoTableClosureType, + infoTablePtrs, infoTableNonPtrs, + funInfoTable, makeRelativeRefTo + ) where + + +#include "HsVersions.h" + +import StgCmmClosure +import StgCmmEnv +import StgCmmTicky +import StgCmmUtils +import StgCmmMonad + +import MkZipCfgCmm +import SMRep +import CmmUtils +import Cmm +import CLabel +import StgSyn +import Id +import Name +import TyCon ( PrimRep(..) ) +import Unique +import BasicTypes ( Arity ) +import StaticFlags + +import Bitmap +import Data.Bits + +import Maybes +import Constants +import Util +import Data.List +import Outputable +import FastString ( LitString, sLit ) + +------------------------------------------------------------------------ +-- Call and return sequences +------------------------------------------------------------------------ + +emitReturn :: [CmmExpr] -> FCode () +-- Return multiple values to the sequel +-- +-- If the sequel is Return +-- return (x,y) +-- If the sequel is AssignTo [p,q] +-- p=x; q=y; +emitReturn results + = do { adjustHpBackwards + ; sequel <- getSequel; + ; case sequel of + Return _ -> emit (mkReturn results) + AssignTo regs _ -> emit (mkMultiAssign regs results) + } + +emitCall :: CmmExpr -> [CmmExpr] -> FCode () +-- (cgCall fun args) makes a call to the entry-code of 'fun', +-- passing 'args', and returning the results to the current sequel +emitCall fun args + = do { adjustHpBackwards + ; sequel <- getSequel; + ; case sequel of + Return _ -> emit (mkJump fun args) + AssignTo res_regs srt -> emit (mkCmmCall fun res_regs args srt) + } + +adjustHpBackwards :: FCode () +-- This function adjusts and heap pointers just before a tail call or +-- return. At a call or return, the virtual heap pointer may be less +-- than the real Hp, because the latter was advanced to deal with +-- the worst-case branch of the code, and we may be in a better-case +-- branch. In that case, move the real Hp *back* and retract some +-- ticky allocation count. +-- +-- It *does not* deal with high-water-mark adjustment. +-- That's done by functions which allocate heap. +adjustHpBackwards + = do { hp_usg <- getHpUsage + ; let rHp = realHp hp_usg + vHp = virtHp hp_usg + adjust_words = vHp -rHp + ; new_hp <- getHpRelOffset vHp + + ; emit (if adjust_words == 0 + then mkNop + else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp + + ; tickyAllocHeap adjust_words -- ...ditto + + ; setRealHp vHp + } + + +------------------------------------------------------------------------- +-- Making calls: directCall and slowCall +------------------------------------------------------------------------- + +directCall :: CLabel -> Arity -> [StgArg] -> FCode () +-- (directCall f n args) +-- calls f(arg1, ..., argn), and applies the result to the remaining args +-- The function f has arity n, and there are guaranteed at least n args +-- Both arity and args include void args +directCall lbl arity stg_args + = do { cmm_args <- getNonVoidArgAmodes stg_args + ; direct_call lbl arity cmm_args (argsLReps stg_args) } + +slowCall :: CmmExpr -> [StgArg] -> FCode () +-- (slowCall fun args) applies fun to args, returning the results to Sequel +slowCall fun stg_args + = do { cmm_args <- getNonVoidArgAmodes stg_args + ; slow_call fun cmm_args (argsLReps stg_args) } + +-------------- +direct_call :: CLabel -> Arity -> [CmmExpr] -> [LRep] -> FCode () +-- NB1: (length args) maybe less than (length reps), because +-- the args exclude the void ones +-- NB2: 'arity' refers to the *reps* +direct_call lbl arity args reps + | null rest_args + = ASSERT( arity == length args) + emitCall target args + + | otherwise + = ASSERT( arity == length initial_reps ) + do { pap_id <- newTemp gcWord + ; let srt = pprTrace "Urk! SRT for over-sat call" + (ppr lbl) NoC_SRT + -- XXX: what if rest_args contains static refs? + ; withSequel (AssignTo [pap_id] srt) + (emitCall target args) + ; slow_call (CmmReg (CmmLocal pap_id)) + rest_args rest_reps } + where + target = CmmLit (CmmLabel lbl) + (initial_reps, rest_reps) = splitAt arity reps + arg_arity = count isNonV initial_reps + (_, rest_args) = splitAt arg_arity args + +-------------- +slow_call :: CmmExpr -> [CmmExpr] -> [LRep] -> FCode () +slow_call fun args reps + = direct_call (mkRtsApFastLabel rts_fun) (arity+1) + (fun : args) (P : reps) + where + (rts_fun, arity) = slowCallPattern reps + +-- These cases were found to cover about 99% of all slow calls: +slowCallPattern :: [LRep] -> (LitString, Arity) +-- Returns the generic apply function and arity +slowCallPattern (P: P: P: P: P: P: _) = (sLit "stg_ap_pppppp", 6) +slowCallPattern (P: P: P: P: P: _) = (sLit "stg_ap_ppppp", 5) +slowCallPattern (P: P: P: P: _) = (sLit "stg_ap_pppp", 4) +slowCallPattern (P: P: P: V: _) = (sLit "stg_ap_pppv", 4) +slowCallPattern (P: P: P: _) = (sLit "stg_ap_ppp", 3) +slowCallPattern (P: P: V: _) = (sLit "stg_ap_ppv", 3) +slowCallPattern (P: P: _) = (sLit "stg_ap_pp", 2) +slowCallPattern (P: V: _) = (sLit "stg_ap_pv", 2) +slowCallPattern (P: _) = (sLit "stg_ap_p", 1) +slowCallPattern (V: _) = (sLit "stg_ap_v", 1) +slowCallPattern (N: _) = (sLit "stg_ap_n", 1) +slowCallPattern (F: _) = (sLit "stg_ap_f", 1) +slowCallPattern (D: _) = (sLit "stg_ap_d", 1) +slowCallPattern (L: _) = (sLit "stg_ap_l", 1) +slowCallPattern [] = (sLit "stg_ap_0", 0) + + +------------------------------------------------------------------------- +-- Classifying arguments: LRep +------------------------------------------------------------------------- + +-- LRep is not exported (even abstractly) +-- It's a local helper type for classification + +data LRep = P -- GC Ptr + | N -- One-word non-ptr + | L -- Two-word non-ptr (long) + | V -- Void + | F -- Float + | D -- Double + +toLRep :: PrimRep -> LRep +toLRep VoidRep = V +toLRep PtrRep = P +toLRep IntRep = N +toLRep WordRep = N +toLRep AddrRep = N +toLRep Int64Rep = L +toLRep Word64Rep = L +toLRep FloatRep = F +toLRep DoubleRep = D + +isNonV :: LRep -> Bool +isNonV V = False +isNonV _ = True + +argsLReps :: [StgArg] -> [LRep] +argsLReps = map (toLRep . argPrimRep) + +lRepSizeW :: LRep -> WordOff -- Size in words +lRepSizeW N = 1 +lRepSizeW P = 1 +lRepSizeW F = 1 +lRepSizeW L = wORD64_SIZE `quot` wORD_SIZE +lRepSizeW D = dOUBLE_SIZE `quot` wORD_SIZE +lRepSizeW V = 0 + +------------------------------------------------------------------------- +---- Laying out objects on the heap and stack +------------------------------------------------------------------------- + +-- The heap always grows upwards, so hpRel is easy +hpRel :: VirtualHpOffset -- virtual offset of Hp + -> VirtualHpOffset -- virtual offset of The Thing + -> WordOff -- integer word offset +hpRel hp off = off - hp + +getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr +getHpRelOffset virtual_offset + = do { hp_usg <- getHpUsage + ; return (cmmRegOffW hpReg (hpRel (realHp hp_usg) virtual_offset)) } + +mkVirtHeapOffsets + :: Bool -- True <=> is a thunk + -> [(PrimRep,a)] -- Things to make offsets for + -> (WordOff, -- _Total_ number of words allocated + WordOff, -- Number of words allocated for *pointers* + [(a, VirtualHpOffset)]) + +-- Things with their offsets from start of object in order of +-- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER +-- First in list gets lowest offset, which is initial offset + 1. +-- +-- Void arguments are removed, so output list may be shorter than +-- input list +-- +-- mkVirtHeapOffsets always returns boxed things with smaller offsets +-- than the unboxed things + +mkVirtHeapOffsets is_thunk things + = let non_void_things = filterOut (isVoidRep . fst) things + (ptrs, non_ptrs) = partition (isGcPtrRep . fst) non_void_things + (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs + (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs + in + (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets) + where + hdr_size | is_thunk = thunkHdrSize + | otherwise = fixedHdrSize + + computeOffset wds_so_far (rep, thing) + = (wds_so_far + lRepSizeW (toLRep rep), + (thing, hdr_size + wds_so_far)) + + +------------------------------------------------------------------------- +-- +-- Making argument descriptors +-- +-- An argument descriptor describes the layout of args on the stack, +-- both for * GC (stack-layout) purposes, and +-- * saving/restoring registers when a heap-check fails +-- +-- Void arguments aren't important, therefore (contrast constructSlowCall) +-- +------------------------------------------------------------------------- + +-- bring in ARG_P, ARG_N, etc. +#include "../includes/StgFun.h" + +------------------------- +-- argDescrType :: ArgDescr -> StgHalfWord +-- -- The "argument type" RTS field type +-- argDescrType (ArgSpec n) = n +-- argDescrType (ArgGen liveness) +-- | isBigLiveness liveness = ARG_GEN_BIG +-- | otherwise = ARG_GEN + + +mkArgDescr :: Name -> [Id] -> FCode ArgDescr +mkArgDescr nm args + = case stdPattern arg_reps of + Just spec_id -> return (ArgSpec spec_id) + Nothing -> do { liveness <- mkLiveness nm size bitmap + ; return (ArgGen liveness) } + where + arg_reps = filter isNonV (map (toLRep . idPrimRep) args) + -- Getting rid of voids eases matching of standard patterns + + bitmap = mkBitmap arg_bits + arg_bits = argBits arg_reps + size = length arg_bits + +argBits :: [LRep] -> [Bool] -- True for non-ptr, False for ptr +argBits [] = [] +argBits (P : args) = False : argBits args +argBits (arg : args) = take (lRepSizeW arg) (repeat True) ++ argBits args + +---------------------- +stdPattern :: [LRep] -> Maybe StgHalfWord +stdPattern reps + = case reps of + [] -> Just ARG_NONE -- just void args, probably + [N] -> Just ARG_N + [P] -> Just ARG_N + [F] -> Just ARG_F + [D] -> Just ARG_D + [L] -> Just ARG_L + + [N,N] -> Just ARG_NN + [N,P] -> Just ARG_NP + [P,N] -> Just ARG_PN + [P,P] -> Just ARG_PP + + [N,N,N] -> Just ARG_NNN + [N,N,P] -> Just ARG_NNP + [N,P,N] -> Just ARG_NPN + [N,P,P] -> Just ARG_NPP + [P,N,N] -> Just ARG_PNN + [P,N,P] -> Just ARG_PNP + [P,P,N] -> Just ARG_PPN + [P,P,P] -> Just ARG_PPP + + [P,P,P,P] -> Just ARG_PPPP + [P,P,P,P,P] -> Just ARG_PPPPP + [P,P,P,P,P,P] -> Just ARG_PPPPPP + + _ -> Nothing + +------------------------------------------------------------------------- +-- +-- Liveness info +-- +------------------------------------------------------------------------- + +-- TODO: This along with 'mkArgDescr' should be unified +-- with 'CmmInfo.mkLiveness'. However that would require +-- potentially invasive changes to the 'ClosureInfo' type. +-- For now, 'CmmInfo.mkLiveness' handles only continuations and +-- this one handles liveness everything else. Another distinction +-- between these two is that 'CmmInfo.mkLiveness' information +-- about the stack layout, and this one is information about +-- the heap layout of PAPs. +mkLiveness :: Name -> Int -> Bitmap -> FCode Liveness +mkLiveness name size bits + | size > mAX_SMALL_BITMAP_SIZE -- Bitmap does not fit in one word + = do { let lbl = mkBitmapLabel (getUnique name) + ; emitRODataLits lbl ( mkWordCLit (fromIntegral size) + : map mkWordCLit bits) + ; return (BigLiveness lbl) } + + | otherwise -- Bitmap fits in one word + = let + small_bits = case bits of + [] -> 0 + [b] -> fromIntegral b + _ -> panic "livenessToAddrMode" + in + return (smallLiveness size small_bits) + +smallLiveness :: Int -> StgWord -> Liveness +smallLiveness size small_bits = SmallLiveness bits + where bits = fromIntegral size .|. (small_bits `shiftL` bITMAP_BITS_SHIFT) + +------------------- +-- isBigLiveness :: Liveness -> Bool +-- isBigLiveness (BigLiveness _) = True +-- isBigLiveness (SmallLiveness _) = False + +------------------- +-- mkLivenessCLit :: Liveness -> CmmLit +-- mkLivenessCLit (BigLiveness lbl) = CmmLabel lbl +-- mkLivenessCLit (SmallLiveness bits) = mkWordCLit bits + + +------------------------------------------------------------------------- +-- +-- Bitmap describing register liveness +-- across GC when doing a "generic" heap check +-- (a RET_DYN stack frame). +-- +-- NB. Must agree with these macros (currently in StgMacros.h): +-- GET_NON_PTRS(), GET_PTRS(), GET_LIVENESS(). +------------------------------------------------------------------------- + +{- Not used in new code gen +mkRegLiveness :: [(Id, GlobalReg)] -> Int -> Int -> StgWord +mkRegLiveness regs ptrs nptrs + = (fromIntegral nptrs `shiftL` 16) .|. + (fromIntegral ptrs `shiftL` 24) .|. + all_non_ptrs `xor` reg_bits regs + where + all_non_ptrs = 0xff + + reg_bits [] = 0 + reg_bits ((id, VanillaReg i) : regs) | isGcPtrRep (idPrimRep id) + = (1 `shiftL` (i - 1)) .|. reg_bits regs + reg_bits (_ : regs) + = reg_bits regs +-} + +------------------------------------------------------------------------- +-- +-- Generating the info table and code for a closure +-- +------------------------------------------------------------------------- + +-- Here we make an info table of type 'CmmInfo'. The concrete +-- representation as a list of 'CmmAddr' is handled later +-- in the pipeline by 'cmmToRawCmm'. + +emitClosureCodeAndInfoTable :: ClosureInfo -> CmmFormals + -> CmmAGraph -> FCode () +emitClosureCodeAndInfoTable cl_info args body + = do { info <- mkCmmInfo cl_info + ; emitProc info (infoLblToEntryLbl info_lbl) args body } + where + info_lbl = infoTableLabelFromCI cl_info + +-- Convert from 'ClosureInfo' to 'CmmInfo'. +-- Not used for return points. (The 'smRepClosureTypeInt' call would panic.) +mkCmmInfo :: ClosureInfo -> FCode CmmInfo +mkCmmInfo cl_info + = do { prof <- if opt_SccProfilingOn then + do fd_lit <- mkStringCLit (closureTypeDescr cl_info) + ad_lit <- mkStringCLit (closureValDescr cl_info) + return $ ProfilingInfo fd_lit ad_lit + else return $ ProfilingInfo (mkIntCLit 0) (mkIntCLit 0) + ; return (CmmInfo gc_target Nothing (CmmInfoTable prof cl_type info)) } + where + info = closureTypeInfo cl_info + cl_type = smRepClosureTypeInt (closureSMRep cl_info) + + -- The gc_target is to inform the CPS pass when it inserts a stack check. + -- Since that pass isn't used yet we'll punt for now. + -- When the CPS pass is fully integrated, this should + -- be replaced by the label that any heap check jumped to, + -- so that branch can be shared by both the heap (from codeGen) + -- and stack checks (from the CPS pass). + -- JD: Actually, we've decided to go a different route here: + -- the code generator is now responsible for producing the + -- stack limit check explicitly, so this field is now obsolete. + gc_target = Nothing + +----------------------------------------------------------------------------- +-- +-- Info table offsets +-- +----------------------------------------------------------------------------- + +stdInfoTableSizeW :: WordOff +-- The size of a standard info table varies with profiling/ticky etc, +-- so we can't get it from Constants +-- It must vary in sync with mkStdInfoTable +stdInfoTableSizeW + = size_fixed + size_prof + where + size_fixed = 2 -- layout, type + size_prof | opt_SccProfilingOn = 2 + | otherwise = 0 + +stdInfoTableSizeB :: ByteOff +stdInfoTableSizeB = stdInfoTableSizeW * wORD_SIZE :: ByteOff + +stdSrtBitmapOffset :: ByteOff +-- Byte offset of the SRT bitmap half-word which is +-- in the *higher-addressed* part of the type_lit +stdSrtBitmapOffset = stdInfoTableSizeB - hALF_WORD_SIZE + +stdClosureTypeOffset :: ByteOff +-- Byte offset of the closure type half-word +stdClosureTypeOffset = stdInfoTableSizeB - wORD_SIZE + +stdPtrsOffset, stdNonPtrsOffset :: ByteOff +stdPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE +stdNonPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE + hALF_WORD_SIZE + +------------------------------------------------------------------------- +-- +-- Accessing fields of an info table +-- +------------------------------------------------------------------------- + +closureInfoPtr :: CmmExpr -> CmmExpr +-- Takes a closure pointer and returns the info table pointer +closureInfoPtr e = CmmLoad e bWord + +entryCode :: CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns its entry code +entryCode e | tablesNextToCode = e + | otherwise = CmmLoad e bWord + +getConstrTag :: CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the *zero-indexed* +-- constructor tag obtained from the info table +-- This lives in the SRT field of the info table +-- (constructors don't need SRTs). +getConstrTag closure_ptr + = CmmMachOp (MO_UU_Conv halfWordWidth wordWidth) [infoTableConstrTag info_table] + where + info_table = infoTable (closureInfoPtr closure_ptr) + +cmmGetClosureType :: CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the closure type +-- obtained from the info table +cmmGetClosureType closure_ptr + = CmmMachOp (MO_UU_Conv halfWordWidth wordWidth) [infoTableClosureType info_table] + where + info_table = infoTable (closureInfoPtr closure_ptr) + +infoTable :: CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns a pointer to the first word of the standard-form +-- info table, excluding the entry-code word (if present) +infoTable info_ptr + | tablesNextToCode = cmmOffsetB info_ptr (- stdInfoTableSizeB) + | otherwise = cmmOffsetW info_ptr 1 -- Past the entry code pointer + +infoTableConstrTag :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the constr tag +-- field of the info table (same as the srt_bitmap field) +infoTableConstrTag = infoTableSrtBitmap + +infoTableSrtBitmap :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the srt_bitmap +-- field of the info table +infoTableSrtBitmap info_tbl + = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) bHalfWord + +infoTableClosureType :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the closure type +-- field of the info table. +infoTableClosureType info_tbl + = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) bHalfWord + +infoTablePtrs :: CmmExpr -> CmmExpr +infoTablePtrs info_tbl + = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) bHalfWord + +infoTableNonPtrs :: CmmExpr -> CmmExpr +infoTableNonPtrs info_tbl + = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) bHalfWord + +funInfoTable :: CmmExpr -> CmmExpr +-- Takes the info pointer of a function, +-- and returns a pointer to the first word of the StgFunInfoExtra struct +-- in the info table. +funInfoTable info_ptr + | tablesNextToCode + = cmmOffsetB info_ptr (- stdInfoTableSizeB - sIZEOF_StgFunInfoExtraRev) + | otherwise + = cmmOffsetW info_ptr (1 + stdInfoTableSizeW) + -- Past the entry code pointer + +------------------------------------------------------------------------- +-- +-- Static reference tables +-- +------------------------------------------------------------------------- + +-- srtLabelAndLength :: C_SRT -> CLabel -> (CmmLit, StgHalfWord) +-- srtLabelAndLength NoC_SRT _ +-- = (zeroCLit, 0) +-- srtLabelAndLength (C_SRT lbl off bitmap) info_lbl +-- = (makeRelativeRefTo info_lbl $ cmmLabelOffW lbl off, bitmap) + +------------------------------------------------------------------------- +-- +-- Position independent code +-- +------------------------------------------------------------------------- +-- In order to support position independent code, we mustn't put absolute +-- references into read-only space. Info tables in the tablesNextToCode +-- case must be in .text, which is read-only, so we doctor the CmmLits +-- to use relative offsets instead. + +-- Note that this is done even when the -fPIC flag is not specified, +-- as we want to keep binary compatibility between PIC and non-PIC. + +makeRelativeRefTo :: CLabel -> CmmLit -> CmmLit + +makeRelativeRefTo info_lbl (CmmLabel lbl) + | tablesNextToCode + = CmmLabelDiffOff lbl info_lbl 0 +makeRelativeRefTo info_lbl (CmmLabelOff lbl off) + | tablesNextToCode + = CmmLabelDiffOff lbl info_lbl off +makeRelativeRefTo _ lit = lit diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs new file mode 100644 index 0000000000..365263941e --- /dev/null +++ b/compiler/codeGen/StgCmmMonad.hs @@ -0,0 +1,601 @@ +----------------------------------------------------------------------------- +-- +-- Monad for Stg to C-- code generation +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmMonad ( + FCode, -- type + + initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, + returnFC, fixC, nopC, whenC, + newUnique, newUniqSupply, + + emit, emitData, emitProc, emitSimpleProc, + + getCmm, cgStmtsToBlocks, + getCodeR, getCode, getHeapUsage, + + forkClosureBody, forkStatics, forkAlts, forkProc, codeOnly, + + ConTagZ, + + Sequel(..), + withSequel, getSequel, + + setSRTLabel, getSRTLabel, + setTickyCtrLabel, getTickyCtrLabel, + + HeapUsage(..), VirtualHpOffset, initHpUsage, + getHpUsage, setHpUsage, heapHWM, + setVirtHp, getVirtHp, setRealHp, + + getModuleName, + + -- ideally we wouldn't export these, but some other modules access internal state + getState, setState, getInfoDown, getDynFlags, getThisPackage, + + -- more localised access to monad state + CgIdInfo(..), CgLoc(..), + getBinds, setBinds, getStaticBinds, + + -- out of general friendliness, we also export ... + CgInfoDownwards(..), CgState(..) -- non-abstract + ) where + +#include "HsVersions.h" + +import StgCmmClosure +import DynFlags +import MkZipCfgCmm +import BlockId +import Cmm +import CLabel +import TyCon ( PrimRep ) +import SMRep +import Module +import Id +import VarEnv +import OrdList +import Unique +import Util() +import UniqSupply +import FastString(sLit) +import Outputable + +import Control.Monad +import Data.List +import Prelude hiding( sequence ) +import qualified Prelude( sequence ) + +infixr 9 `thenC` -- Right-associative! +infixr 9 `thenFC` + + +-------------------------------------------------------- +-- The FCode monad and its types +-------------------------------------------------------- + +newtype FCode a = FCode (CgInfoDownwards -> CgState -> (a, CgState)) + +instance Monad FCode where + (>>=) = thenFC + return = returnFC + +{-# INLINE thenC #-} +{-# INLINE thenFC #-} +{-# INLINE returnFC #-} + +initC :: DynFlags -> Module -> FCode a -> IO a +initC dflags mod (FCode code) + = do { uniqs <- mkSplitUniqSupply 'c' + ; case code (initCgInfoDown dflags mod) (initCgState uniqs) of + (res, _) -> return res + } + +returnFC :: a -> FCode a +returnFC val = FCode (\_info_down state -> (val, state)) + +thenC :: FCode () -> FCode a -> FCode a +thenC (FCode m) (FCode k) = + FCode (\info_down state -> let (_,new_state) = m info_down state in + k info_down new_state) + +nopC :: FCode () +nopC = return () + +whenC :: Bool -> FCode () -> FCode () +whenC True code = code +whenC False _code = nopC + +listCs :: [FCode ()] -> FCode () +listCs [] = return () +listCs (fc:fcs) = do + fc + listCs fcs + +mapCs :: (a -> FCode ()) -> [a] -> FCode () +mapCs = mapM_ + +thenFC :: FCode a -> (a -> FCode c) -> FCode c +thenFC (FCode m) k = FCode ( + \info_down state -> + let + (m_result, new_state) = m info_down state + (FCode kcode) = k m_result + in + kcode info_down new_state + ) + +listFCs :: [FCode a] -> FCode [a] +listFCs = Prelude.sequence + +mapFCs :: (a -> FCode b) -> [a] -> FCode [b] +mapFCs = mapM + +fixC :: (a -> FCode a) -> FCode a +fixC fcode = FCode ( + \info_down state -> + let + FCode fc = fcode v + result@(v,_) = fc info_down state + -- ^--------^ + in + result + ) + + +-------------------------------------------------------- +-- The code generator environment +-------------------------------------------------------- + +-- This monadery has some information that it only passes +-- *downwards*, as well as some ``state'' which is modified +-- as we go along. + +data CgInfoDownwards -- information only passed *downwards* by the monad + = MkCgInfoDown { + cgd_dflags :: DynFlags, + cgd_mod :: Module, -- Module being compiled + cgd_statics :: CgBindings, -- [Id -> info] : static environment + cgd_srt_lbl :: CLabel, -- Label of the current top-level SRT + cgd_ticky :: CLabel, -- Current destination for ticky counts + cgd_sequel :: Sequel -- What to do at end of basic block + } + +type CgBindings = IdEnv CgIdInfo + +data CgIdInfo + = CgIdInfo + { cg_id :: Id -- Id that this is the info for + -- Can differ from the Id at occurrence sites by + -- virtue of being externalised, for splittable C + , cg_lf :: LambdaFormInfo + , cg_loc :: CgLoc + , cg_rep :: PrimRep -- Cache for (idPrimRep id) + , cg_tag :: {-# UNPACK #-} !DynTag -- Cache for (lfDynTag cg_lf) + } + +data CgLoc + = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning + -- Hp, so that it remains valid across calls + + | LneLoc BlockId [LocalReg] -- A join point + -- A join point (= let-no-escape) should only + -- be tail-called, and in a saturated way. + -- To tail-call it, assign to these locals, + -- and branch to the block id + +instance Outputable CgIdInfo where + ppr (CgIdInfo { cg_id = id, cg_loc = loc }) + = ppr id <+> ptext (sLit "-->") <+> ppr loc + +instance Outputable CgLoc where + ppr (CmmLoc e) = ptext (sLit "cmm") <+> ppr e + ppr (LneLoc b rs) = ptext (sLit "lne") <+> ppr b <+> ppr rs + + +-- Sequel tells what to do with the result of this expression +data Sequel + = Return Bool -- Return result(s) to continuation found on the stack + -- True <=> the continuation is update code (???) + + | AssignTo + [LocalReg] -- Put result(s) in these regs and fall through + -- NB: no void arguments here + C_SRT -- Here are the statics live in the continuation + + + +initCgInfoDown :: DynFlags -> Module -> CgInfoDownwards +initCgInfoDown dflags mod + = MkCgInfoDown { cgd_dflags = dflags, + cgd_mod = mod, + cgd_statics = emptyVarEnv, + cgd_srt_lbl = error "initC: srt_lbl", + cgd_ticky = mkTopTickyCtrLabel, + cgd_sequel = initSequel } + +initSequel :: Sequel +initSequel = Return False + + +-------------------------------------------------------- +-- The code generator state +-------------------------------------------------------- + +data CgState + = MkCgState { + cgs_stmts :: CmmAGraph, -- Current procedure + + cgs_tops :: OrdList CmmTopZ, + -- Other procedures and data blocks in this compilation unit + -- Both are ordered only so that we can + -- reduce forward references, when it's easy to do so + + cgs_binds :: CgBindings, -- [Id -> info] : *local* bindings environment + -- Bindings for top-level things are given in + -- the info-down part + + cgs_hp_usg :: HeapUsage, + + cgs_uniqs :: UniqSupply } + +data HeapUsage = + HeapUsage { + virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word + realHp :: VirtualHpOffset -- realHp: Virtual offset of real heap ptr + } + +type VirtualHpOffset = WordOff + +initCgState :: UniqSupply -> CgState +initCgState uniqs + = MkCgState { cgs_stmts = mkNop, cgs_tops = nilOL, + cgs_binds = emptyVarEnv, + cgs_hp_usg = initHpUsage, + cgs_uniqs = uniqs } + +stateIncUsage :: CgState -> CgState -> CgState +-- stateIncUsage@ e1 e2 incorporates in e1 +-- the heap high water mark found in e2. +stateIncUsage s1 s2@(MkCgState { cgs_hp_usg = hp_usg }) + = s1 { cgs_hp_usg = cgs_hp_usg s1 `maxHpHw` virtHp hp_usg } + `addCodeBlocksFrom` s2 + +addCodeBlocksFrom :: CgState -> CgState -> CgState +-- Add code blocks from the latter to the former +-- (The cgs_stmts will often be empty, but not always; see codeOnly) +s1 `addCodeBlocksFrom` s2 + = s1 { cgs_stmts = cgs_stmts s1 <*> cgs_stmts s2, + cgs_tops = cgs_tops s1 `appOL` cgs_tops s2 } + + +-- The heap high water mark is the larger of virtHp and hwHp. The latter is +-- only records the high water marks of forked-off branches, so to find the +-- heap high water mark you have to take the max of virtHp and hwHp. Remember, +-- virtHp never retreats! +-- +-- Note Jan 04: ok, so why do we only look at the virtual Hp?? + +heapHWM :: HeapUsage -> VirtualHpOffset +heapHWM = virtHp + +initHpUsage :: HeapUsage +initHpUsage = HeapUsage { virtHp = 0, realHp = 0 } + +maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage +hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw } + + +-------------------------------------------------------- +-- Operators for getting and setting the state and "info_down". +-------------------------------------------------------- + +getState :: FCode CgState +getState = FCode $ \_info_down state -> (state,state) + +setState :: CgState -> FCode () +setState state = FCode $ \_info_down _ -> ((),state) + +getHpUsage :: FCode HeapUsage +getHpUsage = do + state <- getState + return $ cgs_hp_usg state + +setHpUsage :: HeapUsage -> FCode () +setHpUsage new_hp_usg = do + state <- getState + setState $ state {cgs_hp_usg = new_hp_usg} + +setVirtHp :: VirtualHpOffset -> FCode () +setVirtHp new_virtHp + = do { hp_usage <- getHpUsage + ; setHpUsage (hp_usage {virtHp = new_virtHp}) } + +getVirtHp :: FCode VirtualHpOffset +getVirtHp + = do { hp_usage <- getHpUsage + ; return (virtHp hp_usage) } + +setRealHp :: VirtualHpOffset -> FCode () +setRealHp new_realHp + = do { hp_usage <- getHpUsage + ; setHpUsage (hp_usage {realHp = new_realHp}) } + +getBinds :: FCode CgBindings +getBinds = do + state <- getState + return $ cgs_binds state + +setBinds :: CgBindings -> FCode () +setBinds new_binds = do + state <- getState + setState $ state {cgs_binds = new_binds} + +getStaticBinds :: FCode CgBindings +getStaticBinds = do + info <- getInfoDown + return (cgd_statics info) + +withState :: FCode a -> CgState -> FCode (a,CgState) +withState (FCode fcode) newstate = FCode $ \info_down state -> + let (retval, state2) = fcode info_down newstate in ((retval,state2), state) + +newUniqSupply :: FCode UniqSupply +newUniqSupply = do + state <- getState + let (us1, us2) = splitUniqSupply (cgs_uniqs state) + setState $ state { cgs_uniqs = us1 } + return us2 + +newUnique :: FCode Unique +newUnique = do + us <- newUniqSupply + return (uniqFromSupply us) + +------------------ +getInfoDown :: FCode CgInfoDownwards +getInfoDown = FCode $ \info_down state -> (info_down,state) + +getDynFlags :: FCode DynFlags +getDynFlags = liftM cgd_dflags getInfoDown + +getThisPackage :: FCode PackageId +getThisPackage = liftM thisPackage getDynFlags + +withInfoDown :: FCode a -> CgInfoDownwards -> FCode a +withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state + +doFCode :: FCode a -> CgInfoDownwards -> CgState -> (a,CgState) +doFCode (FCode fcode) info_down state = fcode info_down state + + +-- ---------------------------------------------------------------------------- +-- Get the current module name + +getModuleName :: FCode Module +getModuleName = do { info <- getInfoDown; return (cgd_mod info) } + +-- ---------------------------------------------------------------------------- +-- Get/set the end-of-block info + +withSequel :: Sequel -> FCode () -> FCode () +withSequel sequel code + = do { info <- getInfoDown + ; withInfoDown code (info {cgd_sequel = sequel }) } + +getSequel :: FCode Sequel +getSequel = do { info <- getInfoDown + ; return (cgd_sequel info) } + +-- ---------------------------------------------------------------------------- +-- Get/set the current SRT label + +-- There is just one SRT for each top level binding; all the nested +-- bindings use sub-sections of this SRT. The label is passed down to +-- the nested bindings via the monad. + +getSRTLabel :: FCode CLabel -- Used only by cgPanic +getSRTLabel = do info <- getInfoDown + return (cgd_srt_lbl info) + +setSRTLabel :: CLabel -> FCode a -> FCode a +setSRTLabel srt_lbl code + = do info <- getInfoDown + withInfoDown code (info { cgd_srt_lbl = srt_lbl}) + +-- ---------------------------------------------------------------------------- +-- Get/set the current ticky counter label + +getTickyCtrLabel :: FCode CLabel +getTickyCtrLabel = do + info <- getInfoDown + return (cgd_ticky info) + +setTickyCtrLabel :: CLabel -> FCode () -> FCode () +setTickyCtrLabel ticky code = do + info <- getInfoDown + withInfoDown code (info {cgd_ticky = ticky}) + + +-------------------------------------------------------- +-- Forking +-------------------------------------------------------- + +forkClosureBody :: FCode () -> FCode () +-- forkClosureBody takes a code, $c$, and compiles it in a +-- fresh environment, except that: +-- - compilation info and statics are passed in unchanged. +-- - local bindings are passed in unchanged +-- (it's up to the enclosed code to re-bind the +-- free variables to a field of the closure) +-- +-- The current state is passed on completely unaltered, except that +-- C-- from the fork is incorporated. + +forkClosureBody body_code + = do { info <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let body_info_down = info { cgd_sequel = initSequel } + fork_state_in = (initCgState us) { cgs_binds = cgs_binds state } + ((),fork_state_out) + = doFCode body_code body_info_down fork_state_in + ; setState $ state `addCodeBlocksFrom` fork_state_out } + +forkStatics :: FCode a -> FCode a +-- @forkStatics@ $fc$ compiles $fc$ in an environment whose *statics* come +-- from the current *local bindings*, but which is otherwise freshly initialised. +-- The Abstract~C returned is attached to the current state, but the +-- bindings and usage information is otherwise unchanged. +forkStatics body_code + = do { info <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let rhs_info_down = info { cgd_statics = cgs_binds state, + cgd_sequel = initSequel } + (result, fork_state_out) = doFCode body_code rhs_info_down + (initCgState us) + ; setState (state `addCodeBlocksFrom` fork_state_out) + ; return result } + +forkProc :: FCode a -> FCode a +-- 'forkProc' takes a code and compiles it in the *current* environment, +-- returning the graph thus constructed. +-- +-- The current environment is passed on completely unchanged to +-- the successor. In particular, any heap usage from the enclosed +-- code is discarded; it should deal with its own heap consumption +forkProc body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let fork_state_in = (initCgState us) + { cgs_binds = cgs_binds state } + (result, fork_state_out) = doFCode body_code info_down fork_state_in + ; setState $ state `addCodeBlocksFrom` fork_state_out + ; return result } + +codeOnly :: FCode () -> FCode () +-- Emit any code from the inner thing into the outer thing +-- Do not affect anything else in the outer state +-- Used in almost-circular code to prevent false loop dependencies +codeOnly body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state, + cgs_hp_usg = cgs_hp_usg state } + ((), fork_state_out) = doFCode body_code info_down fork_state_in + ; setState $ state `addCodeBlocksFrom` fork_state_out } + +forkAlts :: [FCode a] -> FCode [a] +-- (forkAlts' bs d) takes fcodes 'bs' for the branches of a 'case', and +-- an fcode for the default case 'd', and compiles each in the current +-- environment. The current environment is passed on unmodified, except +-- that the virtual Hp is moved on to the worst virtual Hp for the branches + +forkAlts branch_fcodes + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let compile us branch + = (us2, doFCode branch info_down branch_state) + where + (us1,us2) = splitUniqSupply us + branch_state = (initCgState us1) { + cgs_binds = cgs_binds state, + cgs_hp_usg = cgs_hp_usg state } + + (_us, results) = mapAccumL compile us branch_fcodes + (branch_results, branch_out_states) = unzip results + ; setState $ foldl stateIncUsage state branch_out_states + -- NB foldl. state is the *left* argument to stateIncUsage + ; return branch_results } + +-- collect the code emitted by an FCode computation +getCodeR :: FCode a -> FCode (a, CmmAGraph) +getCodeR fcode + = do { state1 <- getState + ; (a, state2) <- withState fcode (state1 { cgs_stmts = mkNop }) + ; setState $ state2 { cgs_stmts = cgs_stmts state1 } + ; return (a, cgs_stmts state2) } + +getCode :: FCode a -> FCode CmmAGraph +getCode fcode = do { (_,stmts) <- getCodeR fcode; return stmts } + +-- 'getHeapUsage' applies a function to the amount of heap that it uses. +-- It initialises the heap usage to zeros, and passes on an unchanged +-- heap usage. +-- +-- It is usually a prelude to performing a GC check, so everything must +-- be in a tidy and consistent state. +-- +-- Note the slightly subtle fixed point behaviour needed here + +getHeapUsage :: (VirtualHpOffset -> FCode a) -> FCode a +getHeapUsage fcode + = do { info_down <- getInfoDown + ; state <- getState + ; let fstate_in = state { cgs_hp_usg = initHpUsage } + (r, fstate_out) = doFCode (fcode hp_hw) info_down fstate_in + hp_hw = heapHWM (cgs_hp_usg fstate_out) -- Loop here! + + ; setState $ fstate_out { cgs_hp_usg = cgs_hp_usg state } + ; return r } + +-- ---------------------------------------------------------------------------- +-- Combinators for emitting code + +emit :: CmmAGraph -> FCode () +emit ag + = do { state <- getState + ; setState $ state { cgs_stmts = cgs_stmts state <*> ag } } + +emitData :: Section -> [CmmStatic] -> FCode () +emitData sect lits + = do { state <- getState + ; setState $ state { cgs_tops = cgs_tops state `snocOL` data_block } } + where + data_block = CmmData sect lits + +emitProc :: CmmInfo -> CLabel -> CmmFormals -> CmmAGraph -> FCode () +emitProc info lbl args blocks + = do { us <- newUniqSupply + ; let (offset, entry) = mkEntry (mkBlockId $ uniqFromSupply us) Native args + blks = initUs_ us $ lgraphOfAGraph offset $ entry <*> blocks + -- ; blks <- cgStmtsToBlocks blocks + ; let proc_block = CmmProc info lbl args blks + ; state <- getState + ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } } + +emitSimpleProc :: CLabel -> CmmAGraph -> FCode () +-- Emit a procedure whose body is the specified code; no info table +emitSimpleProc lbl code + = emitProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] code + +getCmm :: FCode () -> FCode CmmZ +-- Get all the CmmTops (there should be no stmts) +-- Return a single Cmm which may be split from other Cmms by +-- object splitting (at a later stage) +getCmm code + = do { state1 <- getState + ; ((), state2) <- withState code (state1 { cgs_tops = nilOL }) + ; setState $ state2 { cgs_tops = cgs_tops state1 } + ; return (Cmm (fromOL (cgs_tops state2))) } + +-- ---------------------------------------------------------------------------- +-- CgStmts + +-- These functions deal in terms of CgStmts, which is an abstract type +-- representing the code in the current proc. + +-- turn CgStmts into [CmmBasicBlock], for making a new proc. +cgStmtsToBlocks :: CmmAGraph -> FCode CmmGraph +cgStmtsToBlocks stmts + = do { us <- newUniqSupply + ; return (initUs_ us (lgraphOfAGraph 0 stmts)) } + diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs new file mode 100644 index 0000000000..96467fe781 --- /dev/null +++ b/compiler/codeGen/StgCmmPrim.hs @@ -0,0 +1,662 @@ +----------------------------------------------------------------------------- +-- +-- Stg to C--: primitive operations +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmPrim ( + cgOpApp + ) where + +#include "HsVersions.h" + +import StgCmmLayout +import StgCmmForeign +import StgCmmEnv +import StgCmmMonad +import StgCmmUtils + +import MkZipCfgCmm +import StgSyn +import Cmm +import Type ( Type, tyConAppTyCon ) +import TyCon +import CLabel +import CmmUtils +import PrimOp +import SMRep +import Constants +import FastString +import Outputable + +------------------------------------------------------------------------ +-- Primitive operations and foreign calls +------------------------------------------------------------------------ + +{- Note [Foreign call results] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A foreign call always returns an unboxed tuple of results, one +of which is the state token. This seems to happen even for pure +calls. + +Even if we returned a single result for pure calls, it'd still be +right to wrap it in a singleton unboxed tuple, because the result +might be a Haskell closure pointer, we don't want to evaluate it. -} + +---------------------------------- +cgOpApp :: StgOp -- The op + -> [StgArg] -- Arguments + -> Type -- Result type (always an unboxed tuple) + -> FCode () + +-- Foreign calls +cgOpApp (StgFCallOp fcall _) stg_args res_ty + = do { (res_regs, res_hints) <- newUnboxedTupleRegs res_ty + -- Choose result regs r1, r2 + -- Note [Foreign call results] + ; cgForeignCall res_regs res_hints fcall stg_args + -- r1, r2 = foo( x, y ) + ; emitReturn (map (CmmReg . CmmLocal) res_regs) } + -- return (r1, r2) + +-- tagToEnum# is special: we need to pull the constructor +-- out of the table, and perform an appropriate return. + +cgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty + = ASSERT(isEnumerationTyCon tycon) + do { amode <- getArgAmode arg + ; emitReturn [tagToClosure tycon amode] } + where + -- If you're reading this code in the attempt to figure + -- out why the compiler panic'ed here, it is probably because + -- you used tagToEnum# in a non-monomorphic setting, e.g., + -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x# + -- That won't work. + tycon = tyConAppTyCon res_ty + +cgOpApp (StgPrimOp primop) args res_ty + | primOpOutOfLine primop + = do { cmm_args <- getNonVoidArgAmodes args + ; let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop)) + ; emitCall fun cmm_args } + + | ReturnsPrim VoidRep <- result_info + = do cgPrimOp [] primop args + emitReturn [] + + | ReturnsPrim rep <- result_info + = do res <- newTemp (primRepCmmType rep) + cgPrimOp [res] primop args + emitReturn [CmmReg (CmmLocal res)] + + | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon + = do (regs, _hints) <- newUnboxedTupleRegs res_ty + cgPrimOp regs primop args + emitReturn (map (CmmReg . CmmLocal) regs) + + | ReturnsAlg tycon <- result_info + , isEnumerationTyCon tycon + -- c.f. cgExpr (...TagToEnumOp...) + = do tag_reg <- newTemp bWord + cgPrimOp [tag_reg] primop args + emitReturn [tagToClosure tycon + (CmmReg (CmmLocal tag_reg))] + + | otherwise = panic "cgPrimop" + where + result_info = getPrimOpResultInfo primop + +--------------------------------------------------- +cgPrimOp :: [LocalReg] -- where to put the results + -> PrimOp -- the op + -> [StgArg] -- arguments + -> FCode () + +cgPrimOp results op args + = do arg_exprs <- getNonVoidArgAmodes args + emitPrimOp results op arg_exprs + + +------------------------------------------------------------------------ +-- Emitting code for a primop +------------------------------------------------------------------------ + +emitPrimOp :: [LocalReg] -- where to put the results + -> PrimOp -- the op + -> [CmmExpr] -- arguments + -> FCode () + +-- First we handle various awkward cases specially. The remaining +-- easy cases are then handled by translateOp, defined below. + +emitPrimOp [res_r,res_c] IntAddCOp [aa,bb] +{- + With some bit-twiddling, we can define int{Add,Sub}Czh portably in + C, and without needing any comparisons. This may not be the + fastest way to do it - if you have better code, please send it! --SDM + + Return : r = a + b, c = 0 if no overflow, 1 on overflow. + + We currently don't make use of the r value if c is != 0 (i.e. + overflow), we just convert to big integers and try again. This + could be improved by making r and c the correct values for + plugging into a new J#. + + { r = ((I_)(a)) + ((I_)(b)); \ + c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ + >> (BITS_IN (I_) - 1); \ + } + Wading through the mass of bracketry, it seems to reduce to: + c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1) + +-} + = emit $ catAGraphs [ + mkAssign (CmmLocal res_r) (CmmMachOp mo_wordAdd [aa,bb]), + mkAssign (CmmLocal res_c) $ + CmmMachOp mo_wordUShr [ + CmmMachOp mo_wordAnd [ + CmmMachOp mo_wordNot [CmmMachOp mo_wordXor [aa,bb]], + CmmMachOp mo_wordXor [aa, CmmReg (CmmLocal res_r)] + ], + CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1)) + ] + ] + + +emitPrimOp [res_r,res_c] IntSubCOp [aa,bb] +{- Similarly: + #define subIntCzh(r,c,a,b) \ + { r = ((I_)(a)) - ((I_)(b)); \ + c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ + >> (BITS_IN (I_) - 1); \ + } + + c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1) +-} + = emit $ catAGraphs [ + mkAssign (CmmLocal res_r) (CmmMachOp mo_wordSub [aa,bb]), + mkAssign (CmmLocal res_c) $ + CmmMachOp mo_wordUShr [ + CmmMachOp mo_wordAnd [ + CmmMachOp mo_wordXor [aa,bb], + CmmMachOp mo_wordXor [aa, CmmReg (CmmLocal res_r)] + ], + CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1)) + ] + ] + + +emitPrimOp [res] ParOp [arg] + = + -- for now, just implement this in a C function + -- later, we might want to inline it. + emitCCall + [(res,NoHint)] + (CmmLit (CmmLabel (mkRtsCodeLabel (sLit "newSpark")))) + [(CmmReg (CmmGlobal BaseReg), AddrHint), (arg,AddrHint)] + +emitPrimOp [res] ReadMutVarOp [mutv] + = emit (mkAssign (CmmLocal res) (cmmLoadIndexW mutv fixedHdrSize gcWord)) + +emitPrimOp [] WriteMutVarOp [mutv,var] + = do + emit (mkStore (cmmOffsetW mutv fixedHdrSize) var) + emitCCall + [{-no results-}] + (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) + [(CmmReg (CmmGlobal BaseReg), AddrHint), (mutv,AddrHint)] + +-- #define sizzeofByteArrayzh(r,a) \ +-- r = (((StgArrWords *)(a))->words * sizeof(W_)) +emitPrimOp [res] SizeofByteArrayOp [arg] + = emit $ + mkAssign (CmmLocal res) (CmmMachOp mo_wordMul [ + cmmLoadIndexW arg fixedHdrSize bWord, + CmmLit (mkIntCLit wORD_SIZE) + ]) + +-- #define sizzeofMutableByteArrayzh(r,a) \ +-- r = (((StgArrWords *)(a))->words * sizeof(W_)) +emitPrimOp [res] SizeofMutableByteArrayOp [arg] + = emitPrimOp [res] SizeofByteArrayOp [arg] + + +-- #define touchzh(o) /* nothing */ +emitPrimOp [] TouchOp [_arg] + = nopC + +-- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a) +emitPrimOp [res] ByteArrayContents_Char [arg] + = emit (mkAssign (CmmLocal res) (cmmOffsetB arg arrWordsHdrSize)) + +-- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn) +emitPrimOp [res] StableNameToIntOp [arg] + = emit (mkAssign (CmmLocal res) (cmmLoadIndexW arg fixedHdrSize bWord)) + +-- #define eqStableNamezh(r,sn1,sn2) \ +-- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) +emitPrimOp [res] EqStableNameOp [arg1,arg2] + = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [ + cmmLoadIndexW arg1 fixedHdrSize bWord, + cmmLoadIndexW arg2 fixedHdrSize bWord + ])) + + +emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2] + = emit (mkAssign (CmmLocal res) (CmmMachOp mo_wordEq [arg1,arg2])) + +-- #define addrToHValuezh(r,a) r=(P_)a +emitPrimOp [res] AddrToHValueOp [arg] + = emit (mkAssign (CmmLocal res) arg) + +-- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info)) +-- Note: argument may be tagged! +emitPrimOp [res] DataToTagOp [arg] + = emit (mkAssign (CmmLocal res) (getConstrTag (cmmUntag arg))) + +{- Freezing arrays-of-ptrs requires changing an info table, for the + benefit of the generational collector. It needs to scavenge mutable + objects, even if they are in old space. When they become immutable, + they can be removed from this scavenge list. -} + +-- #define unsafeFreezzeArrayzh(r,a) +-- { +-- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info); +-- r = a; +-- } +emitPrimOp [res] UnsafeFreezeArrayOp [arg] + = emit $ catAGraphs + [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)), + mkAssign (CmmLocal res) arg ] + +-- #define unsafeFreezzeByteArrayzh(r,a) r=(a) +emitPrimOp [res] UnsafeFreezeByteArrayOp [arg] + = emit (mkAssign (CmmLocal res) arg) + +-- Reading/writing pointer arrays + +emitPrimOp [r] ReadArrayOp [obj,ix] = doReadPtrArrayOp r obj ix +emitPrimOp [r] IndexArrayOp [obj,ix] = doReadPtrArrayOp r obj ix +emitPrimOp [] WriteArrayOp [obj,ix,v] = doWritePtrArrayOp obj ix v + +-- IndexXXXoffAddr + +emitPrimOp res IndexOffAddrOp_Char args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_WideChar args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Int args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Word args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Addr args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args +emitPrimOp res IndexOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args +emitPrimOp res IndexOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res IndexOffAddrOp_Int8 args = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_Int16 args = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res IndexOffAddrOp_Int32 args = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args +emitPrimOp res IndexOffAddrOp_Word8 args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexOffAddrOp_Word16 args = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res IndexOffAddrOp_Word32 args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args + +-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr. + +emitPrimOp res ReadOffAddrOp_Char args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_WideChar args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Int args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Word args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Addr args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Float args = doIndexOffAddrOp Nothing f32 res args +emitPrimOp res ReadOffAddrOp_Double args = doIndexOffAddrOp Nothing f64 res args +emitPrimOp res ReadOffAddrOp_StablePtr args = doIndexOffAddrOp Nothing bWord res args +emitPrimOp res ReadOffAddrOp_Int8 args = doIndexOffAddrOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_Int16 args = doIndexOffAddrOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res ReadOffAddrOp_Int32 args = doIndexOffAddrOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Int64 args = doIndexOffAddrOp Nothing b64 res args +emitPrimOp res ReadOffAddrOp_Word8 args = doIndexOffAddrOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadOffAddrOp_Word16 args = doIndexOffAddrOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res ReadOffAddrOp_Word32 args = doIndexOffAddrOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadOffAddrOp_Word64 args = doIndexOffAddrOp Nothing b64 res args + +-- IndexXXXArray + +emitPrimOp res IndexByteArrayOp_Char args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_WideChar args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Int args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Word args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Addr args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args +emitPrimOp res IndexByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args +emitPrimOp res IndexByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res IndexByteArrayOp_Int8 args = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_Int16 args = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res IndexByteArrayOp_Int32 args = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args +emitPrimOp res IndexByteArrayOp_Word8 args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res IndexByteArrayOp_Word16 args = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res IndexByteArrayOp_Word32 args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res IndexByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args + +-- ReadXXXArray, identical to IndexXXXArray. + +emitPrimOp res ReadByteArrayOp_Char args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_WideChar args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Int args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Word args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Addr args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Float args = doIndexByteArrayOp Nothing f32 res args +emitPrimOp res ReadByteArrayOp_Double args = doIndexByteArrayOp Nothing f64 res args +emitPrimOp res ReadByteArrayOp_StablePtr args = doIndexByteArrayOp Nothing bWord res args +emitPrimOp res ReadByteArrayOp_Int8 args = doIndexByteArrayOp (Just mo_s_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_Int16 args = doIndexByteArrayOp (Just mo_s_16ToWord) b16 res args +emitPrimOp res ReadByteArrayOp_Int32 args = doIndexByteArrayOp (Just mo_s_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Int64 args = doIndexByteArrayOp Nothing b64 res args +emitPrimOp res ReadByteArrayOp_Word8 args = doIndexByteArrayOp (Just mo_u_8ToWord) b8 res args +emitPrimOp res ReadByteArrayOp_Word16 args = doIndexByteArrayOp (Just mo_u_16ToWord) b16 res args +emitPrimOp res ReadByteArrayOp_Word32 args = doIndexByteArrayOp (Just mo_u_32ToWord) b32 res args +emitPrimOp res ReadByteArrayOp_Word64 args = doIndexByteArrayOp Nothing b64 res args + +-- WriteXXXoffAddr + +emitPrimOp res WriteOffAddrOp_Char args = doWriteOffAddrOp (Just mo_WordTo8) res args +emitPrimOp res WriteOffAddrOp_WideChar args = doWriteOffAddrOp (Just mo_WordTo32) res args +emitPrimOp res WriteOffAddrOp_Int args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Word args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Addr args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Float args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Double args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_StablePtr args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Int8 args = doWriteOffAddrOp (Just mo_WordTo8) res args +emitPrimOp res WriteOffAddrOp_Int16 args = doWriteOffAddrOp (Just mo_WordTo16) res args +emitPrimOp res WriteOffAddrOp_Int32 args = doWriteOffAddrOp (Just mo_WordTo32) res args +emitPrimOp res WriteOffAddrOp_Int64 args = doWriteOffAddrOp Nothing res args +emitPrimOp res WriteOffAddrOp_Word8 args = doWriteOffAddrOp (Just mo_WordTo8) res args +emitPrimOp res WriteOffAddrOp_Word16 args = doWriteOffAddrOp (Just mo_WordTo16) res args +emitPrimOp res WriteOffAddrOp_Word32 args = doWriteOffAddrOp (Just mo_WordTo32) res args +emitPrimOp res WriteOffAddrOp_Word64 args = doWriteOffAddrOp Nothing res args + +-- WriteXXXArray + +emitPrimOp res WriteByteArrayOp_Char args = doWriteByteArrayOp (Just mo_WordTo8) res args +emitPrimOp res WriteByteArrayOp_WideChar args = doWriteByteArrayOp (Just mo_WordTo32) res args +emitPrimOp res WriteByteArrayOp_Int args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Word args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Addr args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Float args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Double args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_StablePtr args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Int8 args = doWriteByteArrayOp (Just mo_WordTo8) res args +emitPrimOp res WriteByteArrayOp_Int16 args = doWriteByteArrayOp (Just mo_WordTo16) res args +emitPrimOp res WriteByteArrayOp_Int32 args = doWriteByteArrayOp (Just mo_WordTo32) res args +emitPrimOp res WriteByteArrayOp_Int64 args = doWriteByteArrayOp Nothing res args +emitPrimOp res WriteByteArrayOp_Word8 args = doWriteByteArrayOp (Just mo_WordTo8) res args +emitPrimOp res WriteByteArrayOp_Word16 args = doWriteByteArrayOp (Just mo_WordTo16) res args +emitPrimOp res WriteByteArrayOp_Word32 args = doWriteByteArrayOp (Just mo_WordTo32) res args +emitPrimOp res WriteByteArrayOp_Word64 args = doWriteByteArrayOp Nothing res args + + +-- The rest just translate straightforwardly +emitPrimOp [res] op [arg] + | nopOp op + = emit (mkAssign (CmmLocal res) arg) + + | Just (mop,rep) <- narrowOp op + = emit (mkAssign (CmmLocal res) $ + CmmMachOp (mop rep wordWidth) [CmmMachOp (mop wordWidth rep) [arg]]) + +emitPrimOp [res] op args + | Just prim <- callishOp op + = do emitPrimCall res prim args + + | Just mop <- translateOp op + = let stmt = mkAssign (CmmLocal res) (CmmMachOp mop args) in + emit stmt + +emitPrimOp _ op _ + = pprPanic "emitPrimOp: can't translate PrimOp" (ppr op) + + +-- These PrimOps are NOPs in Cmm + +nopOp :: PrimOp -> Bool +nopOp Int2WordOp = True +nopOp Word2IntOp = True +nopOp Int2AddrOp = True +nopOp Addr2IntOp = True +nopOp ChrOp = True -- Int# and Char# are rep'd the same +nopOp OrdOp = True +nopOp _ = False + +-- These PrimOps turn into double casts + +narrowOp :: PrimOp -> Maybe (Width -> Width -> MachOp, Width) +narrowOp Narrow8IntOp = Just (MO_SS_Conv, W8) +narrowOp Narrow16IntOp = Just (MO_SS_Conv, W16) +narrowOp Narrow32IntOp = Just (MO_SS_Conv, W32) +narrowOp Narrow8WordOp = Just (MO_UU_Conv, W8) +narrowOp Narrow16WordOp = Just (MO_UU_Conv, W16) +narrowOp Narrow32WordOp = Just (MO_UU_Conv, W32) +narrowOp _ = Nothing + +-- Native word signless ops + +translateOp :: PrimOp -> Maybe MachOp +translateOp IntAddOp = Just mo_wordAdd +translateOp IntSubOp = Just mo_wordSub +translateOp WordAddOp = Just mo_wordAdd +translateOp WordSubOp = Just mo_wordSub +translateOp AddrAddOp = Just mo_wordAdd +translateOp AddrSubOp = Just mo_wordSub + +translateOp IntEqOp = Just mo_wordEq +translateOp IntNeOp = Just mo_wordNe +translateOp WordEqOp = Just mo_wordEq +translateOp WordNeOp = Just mo_wordNe +translateOp AddrEqOp = Just mo_wordEq +translateOp AddrNeOp = Just mo_wordNe + +translateOp AndOp = Just mo_wordAnd +translateOp OrOp = Just mo_wordOr +translateOp XorOp = Just mo_wordXor +translateOp NotOp = Just mo_wordNot +translateOp SllOp = Just mo_wordShl +translateOp SrlOp = Just mo_wordUShr + +translateOp AddrRemOp = Just mo_wordURem + +-- Native word signed ops + +translateOp IntMulOp = Just mo_wordMul +translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordWidth) +translateOp IntQuotOp = Just mo_wordSQuot +translateOp IntRemOp = Just mo_wordSRem +translateOp IntNegOp = Just mo_wordSNeg + + +translateOp IntGeOp = Just mo_wordSGe +translateOp IntLeOp = Just mo_wordSLe +translateOp IntGtOp = Just mo_wordSGt +translateOp IntLtOp = Just mo_wordSLt + +translateOp ISllOp = Just mo_wordShl +translateOp ISraOp = Just mo_wordSShr +translateOp ISrlOp = Just mo_wordUShr + +-- Native word unsigned ops + +translateOp WordGeOp = Just mo_wordUGe +translateOp WordLeOp = Just mo_wordULe +translateOp WordGtOp = Just mo_wordUGt +translateOp WordLtOp = Just mo_wordULt + +translateOp WordMulOp = Just mo_wordMul +translateOp WordQuotOp = Just mo_wordUQuot +translateOp WordRemOp = Just mo_wordURem + +translateOp AddrGeOp = Just mo_wordUGe +translateOp AddrLeOp = Just mo_wordULe +translateOp AddrGtOp = Just mo_wordUGt +translateOp AddrLtOp = Just mo_wordULt + +-- Char# ops + +translateOp CharEqOp = Just (MO_Eq wordWidth) +translateOp CharNeOp = Just (MO_Ne wordWidth) +translateOp CharGeOp = Just (MO_U_Ge wordWidth) +translateOp CharLeOp = Just (MO_U_Le wordWidth) +translateOp CharGtOp = Just (MO_U_Gt wordWidth) +translateOp CharLtOp = Just (MO_U_Lt wordWidth) + +-- Double ops + +translateOp DoubleEqOp = Just (MO_F_Eq W64) +translateOp DoubleNeOp = Just (MO_F_Ne W64) +translateOp DoubleGeOp = Just (MO_F_Ge W64) +translateOp DoubleLeOp = Just (MO_F_Le W64) +translateOp DoubleGtOp = Just (MO_F_Gt W64) +translateOp DoubleLtOp = Just (MO_F_Lt W64) + +translateOp DoubleAddOp = Just (MO_F_Add W64) +translateOp DoubleSubOp = Just (MO_F_Sub W64) +translateOp DoubleMulOp = Just (MO_F_Mul W64) +translateOp DoubleDivOp = Just (MO_F_Quot W64) +translateOp DoubleNegOp = Just (MO_F_Neg W64) + +-- Float ops + +translateOp FloatEqOp = Just (MO_F_Eq W32) +translateOp FloatNeOp = Just (MO_F_Ne W32) +translateOp FloatGeOp = Just (MO_F_Ge W32) +translateOp FloatLeOp = Just (MO_F_Le W32) +translateOp FloatGtOp = Just (MO_F_Gt W32) +translateOp FloatLtOp = Just (MO_F_Lt W32) + +translateOp FloatAddOp = Just (MO_F_Add W32) +translateOp FloatSubOp = Just (MO_F_Sub W32) +translateOp FloatMulOp = Just (MO_F_Mul W32) +translateOp FloatDivOp = Just (MO_F_Quot W32) +translateOp FloatNegOp = Just (MO_F_Neg W32) + +-- Conversions + +translateOp Int2DoubleOp = Just (MO_SF_Conv wordWidth W64) +translateOp Double2IntOp = Just (MO_FS_Conv W64 wordWidth) + +translateOp Int2FloatOp = Just (MO_SF_Conv wordWidth W32) +translateOp Float2IntOp = Just (MO_FS_Conv W32 wordWidth) + +translateOp Float2DoubleOp = Just (MO_FF_Conv W32 W64) +translateOp Double2FloatOp = Just (MO_FF_Conv W64 W32) + +-- Word comparisons masquerading as more exotic things. + +translateOp SameMutVarOp = Just mo_wordEq +translateOp SameMVarOp = Just mo_wordEq +translateOp SameMutableArrayOp = Just mo_wordEq +translateOp SameMutableByteArrayOp = Just mo_wordEq +translateOp SameTVarOp = Just mo_wordEq +translateOp EqStablePtrOp = Just mo_wordEq + +translateOp _ = Nothing + +-- These primops are implemented by CallishMachOps, because they sometimes +-- turn into foreign calls depending on the backend. + +callishOp :: PrimOp -> Maybe CallishMachOp +callishOp DoublePowerOp = Just MO_F64_Pwr +callishOp DoubleSinOp = Just MO_F64_Sin +callishOp DoubleCosOp = Just MO_F64_Cos +callishOp DoubleTanOp = Just MO_F64_Tan +callishOp DoubleSinhOp = Just MO_F64_Sinh +callishOp DoubleCoshOp = Just MO_F64_Cosh +callishOp DoubleTanhOp = Just MO_F64_Tanh +callishOp DoubleAsinOp = Just MO_F64_Asin +callishOp DoubleAcosOp = Just MO_F64_Acos +callishOp DoubleAtanOp = Just MO_F64_Atan +callishOp DoubleLogOp = Just MO_F64_Log +callishOp DoubleExpOp = Just MO_F64_Exp +callishOp DoubleSqrtOp = Just MO_F64_Sqrt + +callishOp FloatPowerOp = Just MO_F32_Pwr +callishOp FloatSinOp = Just MO_F32_Sin +callishOp FloatCosOp = Just MO_F32_Cos +callishOp FloatTanOp = Just MO_F32_Tan +callishOp FloatSinhOp = Just MO_F32_Sinh +callishOp FloatCoshOp = Just MO_F32_Cosh +callishOp FloatTanhOp = Just MO_F32_Tanh +callishOp FloatAsinOp = Just MO_F32_Asin +callishOp FloatAcosOp = Just MO_F32_Acos +callishOp FloatAtanOp = Just MO_F32_Atan +callishOp FloatLogOp = Just MO_F32_Log +callishOp FloatExpOp = Just MO_F32_Exp +callishOp FloatSqrtOp = Just MO_F32_Sqrt + +callishOp _ = Nothing + +------------------------------------------------------------------------------ +-- Helpers for translating various minor variants of array indexing. + +doIndexOffAddrOp :: Maybe MachOp -> CmmType -> [LocalReg] -> [CmmExpr] -> FCode () +doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx] + = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx +doIndexOffAddrOp _ _ _ _ + = panic "CgPrimOp: doIndexOffAddrOp" + +doIndexByteArrayOp :: Maybe MachOp -> CmmType -> [LocalReg] -> [CmmExpr] -> FCode () +doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx] + = mkBasicIndexedRead arrWordsHdrSize maybe_post_read_cast rep res addr idx +doIndexByteArrayOp _ _ _ _ + = panic "CgPrimOp: doIndexByteArrayOp" + +doReadPtrArrayOp :: LocalReg -> CmmExpr -> CmmExpr -> FCode () +doReadPtrArrayOp res addr idx + = mkBasicIndexedRead arrPtrsHdrSize Nothing gcWord res addr idx + + +doWriteOffAddrOp :: Maybe MachOp -> [LocalReg] -> [CmmExpr] -> FCode () +doWriteOffAddrOp maybe_pre_write_cast [] [addr,idx,val] + = mkBasicIndexedWrite 0 maybe_pre_write_cast addr idx val +doWriteOffAddrOp _ _ _ + = panic "CgPrimOp: doWriteOffAddrOp" + +doWriteByteArrayOp :: Maybe MachOp -> [LocalReg] -> [CmmExpr] -> FCode () +doWriteByteArrayOp maybe_pre_write_cast [] [addr,idx,val] + = mkBasicIndexedWrite arrWordsHdrSize maybe_pre_write_cast addr idx val +doWriteByteArrayOp _ _ _ + = panic "CgPrimOp: doWriteByteArrayOp" + +doWritePtrArrayOp :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () +doWritePtrArrayOp addr idx val + = do emit (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel))) + mkBasicIndexedWrite arrPtrsHdrSize Nothing addr idx val + +mkBasicIndexedRead :: ByteOff -> Maybe MachOp -> CmmType + -> LocalReg -> CmmExpr -> CmmExpr -> FCode () +mkBasicIndexedRead off Nothing read_rep res base idx + = emit (mkAssign (CmmLocal res) (cmmLoadIndexOffExpr off read_rep base idx)) +mkBasicIndexedRead off (Just cast) read_rep res base idx + = emit (mkAssign (CmmLocal res) (CmmMachOp cast [ + cmmLoadIndexOffExpr off read_rep base idx])) + +mkBasicIndexedWrite :: ByteOff -> Maybe MachOp + -> CmmExpr -> CmmExpr -> CmmExpr -> FCode () +mkBasicIndexedWrite off Nothing base idx val + = emit (mkStore (cmmIndexOffExpr off (typeWidth (cmmExprType val)) base idx) val) +mkBasicIndexedWrite off (Just cast) base idx val + = mkBasicIndexedWrite off Nothing base idx (CmmMachOp cast [val]) + +-- ---------------------------------------------------------------------------- +-- Misc utils + +cmmIndexOffExpr :: ByteOff -> Width -> CmmExpr -> CmmExpr -> CmmExpr +cmmIndexOffExpr off width base idx + = cmmIndexExpr width (cmmOffsetB base off) idx + +cmmLoadIndexOffExpr :: ByteOff -> CmmType -> CmmExpr -> CmmExpr -> CmmExpr +cmmLoadIndexOffExpr off ty base idx + = CmmLoad (cmmIndexOffExpr off (typeWidth ty) base idx) ty + +setInfo :: CmmExpr -> CmmExpr -> CmmAGraph +setInfo closure_ptr info_ptr = mkStore closure_ptr info_ptr + diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs new file mode 100644 index 0000000000..f442295d25 --- /dev/null +++ b/compiler/codeGen/StgCmmProf.hs @@ -0,0 +1,553 @@ +{-# OPTIONS -w #-} +-- Lots of missing type sigs etc + +----------------------------------------------------------------------------- +-- +-- Code generation for profiling +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmProf ( + initCostCentres, ccType, ccsType, + mkCCostCentre, mkCCostCentreStack, + + -- Cost-centre Profiling + dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, + enterCostCentre, enterCostCentrePAP, enterCostCentreThunk, + chooseDynCostCentres, + costCentreFrom, + curCCS, curCCSAddr, + emitSetCCC, emitCCS, + + saveCurrentCostCentre, restoreCurrentCostCentre, + + -- Lag/drag/void stuff + ldvEnter, ldvEnterClosure, ldvRecordCreate + ) where + +#include "HsVersions.h" +#include "MachDeps.h" + -- For WORD_SIZE_IN_BITS only. +#include "../includes/Constants.h" + -- For LDV_CREATE_MASK, LDV_STATE_USE + -- which are StgWords +#include "../includes/DerivedConstants.h" + -- For REP_xxx constants, which are MachReps + +import StgCmmClosure +import StgCmmUtils +import StgCmmMonad +import SMRep + +import MkZipCfgCmm +import Cmm +import TyCon ( PrimRep(..) ) +import CmmUtils +import CLabel + +import Id +import qualified Module +import CostCentre +import StgSyn +import StaticFlags +import FastString +import Constants -- Lots of field offsets +import Outputable + +import Data.Maybe +import Data.Char +import Control.Monad + +----------------------------------------------------------------------------- +-- +-- Cost-centre-stack Profiling +-- +----------------------------------------------------------------------------- + +-- Expression representing the current cost centre stack +ccsType :: CmmType -- Type of a cost-centre stack +ccsType = bWord + +ccType :: CmmType -- Type of a cost centre +ccType = bWord + +curCCS :: CmmExpr +curCCS = CmmLoad curCCSAddr ccsType + +-- Address of current CCS variable, for storing into +curCCSAddr :: CmmExpr +curCCSAddr = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CCCS"))) + +mkCCostCentre :: CostCentre -> CmmLit +mkCCostCentre cc = CmmLabel (mkCCLabel cc) + +mkCCostCentreStack :: CostCentreStack -> CmmLit +mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs) + +costCentreFrom :: CmmExpr -- A closure pointer + -> CmmExpr -- The cost centre from that closure +costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) ccsType + +staticProfHdr :: CostCentreStack -> [CmmLit] +-- The profiling header words in a static closure +-- Was SET_STATIC_PROF_HDR +staticProfHdr ccs = ifProfilingL [mkCCostCentreStack ccs, + staticLdvInit] + +dynProfHdr :: CmmExpr -> [CmmExpr] +-- Profiling header words in a dynamic closure +dynProfHdr ccs = ifProfilingL [ccs, dynLdvInit] + +initUpdFrameProf :: CmmExpr -> FCode () +-- Initialise the profiling field of an update frame +initUpdFrameProf frame_amode + = ifProfiling $ -- frame->header.prof.ccs = CCCS + emit (mkStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS) + -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) + -- is unnecessary because it is not used anyhow. + +--------------------------------------------------------------------------- +-- Saving and restoring the current cost centre +--------------------------------------------------------------------------- + +{- Note [Saving the current cost centre] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The current cost centre is like a global register. Like other +global registers, it's a caller-saves one. But consider + case (f x) of (p,q) -> rhs +Since 'f' may set the cost centre, we must restore it +before resuming rhs. So we want code like this: + local_cc = CCC -- save + r = f( x ) + CCC = local_cc -- restore +That is, we explicitly "save" the current cost centre in +a LocalReg, local_cc; and restore it after the call. The +C-- infrastructure will arrange to save local_cc across the +call. + +The same goes for join points; + let j x = join-stuff + in blah-blah +We want this kind of code: + local_cc = CCC -- save + blah-blah + J: + CCC = local_cc -- restore +-} + +saveCurrentCostCentre :: FCode (Maybe LocalReg) + -- Returns Nothing if profiling is off +saveCurrentCostCentre + | not opt_SccProfilingOn + = return Nothing + | otherwise + = do { local_cc <- newTemp ccType + ; emit (mkAssign (CmmLocal local_cc) curCCS) + ; return (Just local_cc) } + +restoreCurrentCostCentre :: Maybe LocalReg -> FCode () +restoreCurrentCostCentre Nothing + = return () +restoreCurrentCostCentre (Just local_cc) + = emit (mkStore curCCSAddr (CmmReg (CmmLocal local_cc))) + + +------------------------------------------------------------------------------- +-- Recording allocation in a cost centre +------------------------------------------------------------------------------- + +-- | Record the allocation of a closure. The CmmExpr is the cost +-- centre stack to which to attribute the allocation. +profDynAlloc :: ClosureInfo -> CmmExpr -> FCode () +profDynAlloc cl_info ccs + = ifProfiling $ + profAlloc (CmmLit (mkIntCLit (closureSize cl_info))) ccs + +-- | Record the allocation of a closure (size is given by a CmmExpr) +-- The size must be in words, because the allocation counter in a CCS counts +-- in words. +profAlloc :: CmmExpr -> CmmExpr -> FCode () +profAlloc words ccs + = ifProfiling $ + emit (addToMemE alloc_rep + (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc) + (CmmMachOp (MO_UU_Conv wordWidth (typeWidth alloc_rep)) $ + [CmmMachOp mo_wordSub [words, + CmmLit (mkIntCLit profHdrSize)]])) + -- subtract the "profiling overhead", which is the + -- profiling header in a closure. + where + alloc_rep = REP_CostCentreStack_mem_alloc + +-- ---------------------------------------------------------------------- +-- Setting the cost centre in a new closure + +chooseDynCostCentres :: CostCentreStack + -> [Id] -- Args + -> StgExpr -- Body + -> FCode (CmmExpr, CmmExpr) +-- Called when alllcating a closure +-- Tells which cost centre to put in the object, and which +-- to blame the cost of allocation on +chooseDynCostCentres ccs args body = do + -- Cost-centre we record in the object + use_ccs <- emitCCS ccs + + -- Cost-centre on whom we blame the allocation + let blame_ccs + | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS) + | otherwise = use_ccs + + return (use_ccs, blame_ccs) + + +-- Some CostCentreStacks are a sequence of pushes on top of CCCS. +-- These pushes must be performed before we can refer to the stack in +-- an expression. +emitCCS :: CostCentreStack -> FCode CmmExpr +emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) + where + (cc's, ccs') = decomposeCCS ccs + + push_em ccs [] = return ccs + push_em ccs (cc:rest) = do + tmp <- newTemp ccsType + pushCostCentre tmp ccs cc + push_em (CmmReg (CmmLocal tmp)) rest + +ccsExpr :: CostCentreStack -> CmmExpr +ccsExpr ccs + | isCurrentCCS ccs = curCCS + | otherwise = CmmLit (mkCCostCentreStack ccs) + + +isBox :: StgExpr -> Bool +-- If it's an utterly trivial RHS, then it must be +-- one introduced by boxHigherOrderArgs for profiling, +-- so we charge it to "OVERHEAD". +-- This looks like a GROSS HACK to me --SDM +isBox (StgApp fun []) = True +isBox other = False + + +-- ----------------------------------------------------------------------- +-- Setting the current cost centre on entry to a closure + +-- For lexically scoped profiling we have to load the cost centre from +-- the closure entered, if the costs are not supposed to be inherited. +-- This is done immediately on entering the fast entry point. + +-- Load current cost centre from closure, if not inherited. +-- Node is guaranteed to point to it, if profiling and not inherited. + +enterCostCentre + :: ClosureInfo + -> CostCentreStack + -> StgExpr -- The RHS of the closure + -> FCode () + +-- We used to have a special case for bindings of form +-- f = g True +-- where g has arity 2. The RHS is a thunk, but we don't +-- need to update it; and we want to subsume costs. +-- We don't have these sort of PAPs any more, so the special +-- case has gone away. + +enterCostCentre closure_info ccs body + = ifProfiling $ + ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs) + enter_cost_centre closure_info ccs body + +enter_cost_centre closure_info ccs body + | isSubsumedCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(re_entrant) + enter_ccs_fsub + + | isDerivedFromCurrentCCS ccs + = do { + if re_entrant && not is_box + then + enter_ccs_fun node_ccs + else + emit (mkStore curCCSAddr node_ccs) + + -- don't forget to bump the scc count. This closure might have been + -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal + -- pass has turned into simply let x = e in ...x... and attached + -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that + -- we don't lose the scc counter, bump it in the entry code for x. + -- ToDo: for a multi-push we should really bump the counter for + -- each of the intervening CCSs, not just the top one. + ; when (not (isCurrentCCS ccs)) $ + emit (bumpSccCount curCCS) + } + + | isCafCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(not re_entrant) + do { -- This is just a special case of the isDerivedFromCurrentCCS + -- case above. We could delete this, but it's a micro + -- optimisation and saves a bit of code. + emit (mkStore curCCSAddr enc_ccs) + ; emit (bumpSccCount node_ccs) + } + + | otherwise + = panic "enterCostCentre" + where + enc_ccs = CmmLit (mkCCostCentreStack ccs) + re_entrant = closureReEntrant closure_info + node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag)) + is_box = isBox body + + -- if this is a function, then node will be tagged; we must subract the tag + node_tag = funTag closure_info + +-- set the current CCS when entering a PAP +enterCostCentrePAP :: CmmExpr -> FCode () +enterCostCentrePAP closure = + ifProfiling $ do + enter_ccs_fun (costCentreFrom closure) + enteringPAP 1 + +enterCostCentreThunk :: CmmExpr -> FCode () +enterCostCentreThunk closure = + ifProfiling $ do + emit $ mkStore curCCSAddr (costCentreFrom closure) + +enter_ccs_fun stack = emitRtsCall (sLit "EnterFunCCS") [(stack,AddrHint)] False + -- ToDo: vols + +enter_ccs_fsub = enteringPAP 0 + +-- When entering a PAP, EnterFunCCS is called by both the PAP entry +-- code and the function entry code; we don't want the function's +-- entry code to also update CCCS in the event that it was called via +-- a PAP, so we set the flag entering_PAP to indicate that we are +-- entering via a PAP. +enteringPAP :: Integer -> FCode () +enteringPAP n + = emit (mkStore (CmmLit (CmmLabel (mkRtsDataLabel (sLit "entering_PAP")))) + (CmmLit (CmmInt n cIntWidth))) + +ifProfiling :: FCode () -> FCode () +ifProfiling code + | opt_SccProfilingOn = code + | otherwise = nopC + +ifProfilingL :: [a] -> [a] +ifProfilingL xs + | opt_SccProfilingOn = xs + | otherwise = [] + + +--------------------------------------------------------------- +-- Initialising Cost Centres & CCSs +--------------------------------------------------------------- + +initCostCentres :: CollectedCCs -> FCode CmmAGraph +-- Emit the declarations, and return code to register them +initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs) + = getCode $ whenC opt_SccProfilingOn $ + do { mapM_ emitCostCentreDecl local_CCs + ; mapM_ emitCostCentreStackDecl singleton_CCSs + ; emit $ catAGraphs $ map mkRegisterCC local_CCs + ; emit $ catAGraphs $ map mkRegisterCCS singleton_CCSs } + + +emitCostCentreDecl :: CostCentre -> FCode () +emitCostCentreDecl cc = do + { label <- mkStringCLit (costCentreUserName cc) + ; modl <- mkStringCLit (Module.moduleNameString + (Module.moduleName (cc_mod cc))) + -- All cost centres will be in the main package, since we + -- don't normally use -auto-all or add SCCs to other packages. + -- Hence don't emit the package name in the module here. + ; let lits = [ zero, -- StgInt ccID, + label, -- char *label, + modl, -- char *module, + zero, -- StgWord time_ticks + zero64, -- StgWord64 mem_alloc + subsumed, -- StgInt is_caf + zero -- struct _CostCentre *link + ] + ; emitDataLits (mkCCLabel cc) lits + } + where + subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF + | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring + +emitCostCentreStackDecl :: CostCentreStack -> FCode () +emitCostCentreStackDecl ccs + = case maybeSingletonCCS ccs of + Just cc -> emitDataLits (mkCCSLabel ccs) (mk_lits cc) + Nothing -> pprPanic "emitCostCentreStackDecl" (ppr ccs) + where + mk_lits cc = zero : + mkCCostCentre cc : + replicate (sizeof_ccs_words - 2) zero + -- Note: to avoid making any assumptions about how the + -- C compiler (that compiles the RTS, in particular) does + -- layouts of structs containing long-longs, simply + -- pad out the struct with zero words until we hit the + -- size of the overall struct (which we get via DerivedConstants.h) + +zero = mkIntCLit 0 +zero64 = CmmInt 0 W64 + +sizeof_ccs_words :: Int +sizeof_ccs_words + -- round up to the next word. + | ms == 0 = ws + | otherwise = ws + 1 + where + (ws,ms) = SIZEOF_CostCentreStack `divMod` wORD_SIZE + +-- --------------------------------------------------------------------------- +-- Registering CCs and CCSs + +-- (cc)->link = CC_LIST; +-- CC_LIST = (cc); +-- (cc)->ccID = CC_ID++; + +mkRegisterCC :: CostCentre -> CmmAGraph +mkRegisterCC cc + = withTemp cInt $ \tmp -> + catAGraphs [ + mkStore (cmmOffsetB cc_lit oFFSET_CostCentre_link) + (CmmLoad cC_LIST bWord), + mkStore cC_LIST cc_lit, + mkAssign (CmmLocal tmp) (CmmLoad cC_ID cInt), + mkStore (cmmOffsetB cc_lit oFFSET_CostCentre_ccID) (CmmReg (CmmLocal tmp)), + mkStore cC_ID (cmmRegOffB (CmmLocal tmp) 1) + ] + where + cc_lit = CmmLit (CmmLabel (mkCCLabel cc)) + +-- (ccs)->prevStack = CCS_LIST; +-- CCS_LIST = (ccs); +-- (ccs)->ccsID = CCS_ID++; + +mkRegisterCCS :: CostCentreStack -> CmmAGraph +mkRegisterCCS ccs + = withTemp cInt $ \ tmp -> + catAGraphs [ + mkStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_prevStack) + (CmmLoad cCS_LIST bWord), + mkStore cCS_LIST ccs_lit, + mkAssign (CmmLocal tmp) (CmmLoad cCS_ID cInt), + mkStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_ccsID) (CmmReg (CmmLocal tmp)), + mkStore cCS_ID (cmmRegOffB (CmmLocal tmp) 1) + ] + where + ccs_lit = CmmLit (CmmLabel (mkCCSLabel ccs)) + + +cC_LIST = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CC_LIST"))) +cC_ID = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CC_ID"))) + +cCS_LIST = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CCS_LIST"))) +cCS_ID = CmmLit (CmmLabel (mkRtsDataLabel (sLit "CCS_ID"))) + +-- --------------------------------------------------------------------------- +-- Set the current cost centre stack + +emitSetCCC :: CostCentre -> FCode () +emitSetCCC cc + | not opt_SccProfilingOn = nopC + | otherwise = do + tmp <- newTemp ccsType -- TODO FIXME NOW + ASSERT( sccAbleCostCentre cc ) + pushCostCentre tmp curCCS cc + emit (mkStore curCCSAddr (CmmReg (CmmLocal tmp))) + when (isSccCountCostCentre cc) $ + emit (bumpSccCount curCCS) + +pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode () +pushCostCentre result ccs cc + = emitRtsCallWithResult result AddrHint + (sLit "PushCostCentre") [(ccs,AddrHint), + (CmmLit (mkCCostCentre cc), AddrHint)] + False + +bumpSccCount :: CmmExpr -> CmmAGraph +bumpSccCount ccs + = addToMem REP_CostCentreStack_scc_count + (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1 + +----------------------------------------------------------------------------- +-- +-- Lag/drag/void stuff +-- +----------------------------------------------------------------------------- + +-- +-- Initial value for the LDV field in a static closure +-- +staticLdvInit :: CmmLit +staticLdvInit = zeroCLit + +-- +-- Initial value of the LDV field in a dynamic closure +-- +dynLdvInit :: CmmExpr +dynLdvInit = -- (era << LDV_SHIFT) | LDV_STATE_CREATE + CmmMachOp mo_wordOr [ + CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ], + CmmLit (mkWordCLit lDV_STATE_CREATE) + ] + +-- +-- Initialise the LDV word of a new closure +-- +ldvRecordCreate :: CmmExpr -> FCode () +ldvRecordCreate closure = emit $ mkStore (ldvWord closure) dynLdvInit + +-- +-- Called when a closure is entered, marks the closure as having been "used". +-- The closure is not an 'inherently used' one. +-- The closure is not IND or IND_OLDGEN because neither is considered for LDV +-- profiling. +-- +ldvEnterClosure :: ClosureInfo -> FCode () +ldvEnterClosure closure_info = ldvEnter (cmmOffsetB (CmmReg nodeReg) (-tag)) + where tag = funTag closure_info + -- don't forget to substract node's tag + +ldvEnter :: CmmExpr -> FCode () +-- Argument is a closure pointer +ldvEnter cl_ptr + = ifProfiling $ + -- if (era > 0) { + -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | + -- era | LDV_STATE_USE } + emit (mkCmmIfThenElse (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) + (mkStore ldv_wd new_ldv_wd) + mkNop) + where + -- don't forget to substract node's tag + ldv_wd = ldvWord cl_ptr + new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd bWord) + (CmmLit (mkWordCLit lDV_CREATE_MASK))) + (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE))) + +loadEra :: CmmExpr +loadEra = CmmMachOp (MO_UU_Conv cIntWidth wordWidth) + [CmmLoad (mkLblExpr (mkRtsDataLabel (sLit "era"))) cInt] + +ldvWord :: CmmExpr -> CmmExpr +-- Takes the address of a closure, and returns +-- the address of the LDV word in the closure +ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw + +-- LDV constants, from ghc/includes/Constants.h +lDV_SHIFT = (LDV_SHIFT :: Int) +--lDV_STATE_MASK = (LDV_STATE_MASK :: StgWord) +lDV_CREATE_MASK = (LDV_CREATE_MASK :: StgWord) +--lDV_LAST_MASK = (LDV_LAST_MASK :: StgWord) +lDV_STATE_CREATE = (LDV_STATE_CREATE :: StgWord) +lDV_STATE_USE = (LDV_STATE_USE :: StgWord) + diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs new file mode 100644 index 0000000000..e4bebb447f --- /dev/null +++ b/compiler/codeGen/StgCmmTicky.hs @@ -0,0 +1,397 @@ +{-# OPTIONS -w #-} +-- Lots of missing type sigs etc + +----------------------------------------------------------------------------- +-- +-- Code generation for ticky-ticky profiling +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmTicky ( + emitTickyCounter, + + tickyDynAlloc, + tickyAllocHeap, + tickyAllocPrim, + tickyAllocThunk, + tickyAllocPAP, + + tickySlowCall, tickyDirectCall, + + tickyPushUpdateFrame, + tickyUpdateFrameOmitted, + + tickyEnterDynCon, + tickyEnterStaticCon, + tickyEnterViaNode, + + tickyEnterFun, + tickyEnterThunk, + + tickyUpdateBhCaf, + tickyBlackHole, + tickyUnboxedTupleReturn, tickyVectoredReturn, + tickyReturnOldCon, tickyReturnNewCon, + + tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, + tickyUnknownCall, tickySlowCallPat, + + staticTickyHdr, + ) where + +#include "HsVersions.h" +#include "../includes/DerivedConstants.h" + -- For REP_xxx constants, which are MachReps + +import StgCmmClosure +import StgCmmUtils +import StgCmmMonad +import SMRep + +import StgSyn +import Cmm +import MkZipCfgCmm +import CmmUtils +import CLabel + +import Name +import Id +import StaticFlags +import BasicTypes +import FastString +import Constants +import Outputable + +-- Turgid imports for showTypeCategory +import PrelNames +import TcType +import TyCon + +import Data.Maybe + +----------------------------------------------------------------------------- +-- +-- Ticky-ticky profiling +-- +----------------------------------------------------------------------------- + +staticTickyHdr :: [CmmLit] +-- krc: not using this right now -- +-- in the new version of ticky-ticky, we +-- don't change the closure layout. +-- leave it defined, though, to avoid breaking +-- other things. +staticTickyHdr = [] + +emitTickyCounter :: ClosureInfo -> [Id] -> FCode () +emitTickyCounter cl_info args + = ifTicky $ + do { mod_name <- getModuleName + ; fun_descr_lit <- mkStringCLit (fun_descr mod_name) + ; arg_descr_lit <- mkStringCLit arg_descr + ; emitDataLits ticky_ctr_label -- Must match layout of StgEntCounter +-- krc: note that all the fields are I32 now; some were I16 before, +-- but the code generator wasn't handling that properly and it led to chaos, +-- panic and disorder. + [ mkIntCLit 0, + mkIntCLit (length args), -- Arity + mkIntCLit 0, -- XXX: we no longer know this! Words passed on stack + fun_descr_lit, + arg_descr_lit, + zeroCLit, -- Entry count + zeroCLit, -- Allocs + zeroCLit -- Link + ] } + where + name = closureName cl_info + ticky_ctr_label = mkRednCountsLabel name $ clHasCafRefs cl_info + arg_descr = map (showTypeCategory . idType) args + fun_descr mod_name = ppr_for_ticky_name mod_name name + +-- When printing the name of a thing in a ticky file, we want to +-- give the module name even for *local* things. We print +-- just "x (M)" rather that "M.x" to distinguish them from the global kind. +ppr_for_ticky_name mod_name name + | isInternalName name = showSDocDebug (ppr name <+> (parens (ppr mod_name))) + | otherwise = showSDocDebug (ppr name) + +-- ----------------------------------------------------------------------------- +-- Ticky stack frames + +tickyPushUpdateFrame = ifTicky $ bumpTickyCounter (sLit "UPDF_PUSHED_ctr") +tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter (sLit "UPDF_OMITTED_ctr") + +-- ----------------------------------------------------------------------------- +-- Ticky entries + +tickyEnterDynCon = ifTicky $ bumpTickyCounter (sLit "ENT_DYN_CON_ctr") +tickyEnterDynThunk = ifTicky $ bumpTickyCounter (sLit "ENT_DYN_THK_ctr") +tickyEnterStaticCon = ifTicky $ bumpTickyCounter (sLit "ENT_STATIC_CON_ctr") +tickyEnterStaticThunk = ifTicky $ bumpTickyCounter (sLit "ENT_STATIC_THK_ctr") +tickyEnterViaNode = ifTicky $ bumpTickyCounter (sLit "ENT_VIA_NODE_ctr") + +tickyEnterThunk :: ClosureInfo -> FCode () +tickyEnterThunk cl_info + | isStaticClosure cl_info = tickyEnterStaticThunk + | otherwise = tickyEnterDynThunk + +tickyBlackHole :: Bool{-updatable-} -> FCode () +tickyBlackHole updatable + = ifTicky (bumpTickyCounter ctr) + where + ctr | updatable = (sLit "UPD_BH_SINGLE_ENTRY_ctr") + | otherwise = (sLit "UPD_BH_UPDATABLE_ctr") + +tickyUpdateBhCaf cl_info + = ifTicky (bumpTickyCounter ctr) + where + ctr | closureUpdReqd cl_info = (sLit "UPD_CAF_BH_SINGLE_ENTRY_ctr") + | otherwise = (sLit "UPD_CAF_BH_UPDATABLE_ctr") + +tickyEnterFun :: ClosureInfo -> FCode () +tickyEnterFun cl_info + = ifTicky $ + do { bumpTickyCounter ctr + ; fun_ctr_lbl <- getTickyCtrLabel + ; registerTickyCtr fun_ctr_lbl + ; bumpTickyCounter' (cmmLabelOffB fun_ctr_lbl oFFSET_StgEntCounter_entry_count) + } + where + ctr | isStaticClosure cl_info = (sLit "ENT_STATIC_FUN_DIRECT_ctr") + | otherwise = (sLit "ENT_DYN_FUN_DIRECT_ctr") + +registerTickyCtr :: CLabel -> FCode () +-- Register a ticky counter +-- if ( ! f_ct.registeredp ) { +-- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */ +-- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */ +-- f_ct.registeredp = 1 } +registerTickyCtr ctr_lbl + = emit (mkCmmIfThen test (catAGraphs register_stmts)) + where + -- krc: code generator doesn't handle Not, so we test for Eq 0 instead + test = CmmMachOp (MO_Eq wordWidth) + [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl + oFFSET_StgEntCounter_registeredp)) bWord, + CmmLit (mkIntCLit 0)] + register_stmts + = [ mkStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_link)) + (CmmLoad ticky_entry_ctrs bWord) + , mkStore ticky_entry_ctrs (mkLblExpr ctr_lbl) + , mkStore (CmmLit (cmmLabelOffB ctr_lbl + oFFSET_StgEntCounter_registeredp)) + (CmmLit (mkIntCLit 1)) ] + ticky_entry_ctrs = mkLblExpr (mkRtsDataLabel (sLit "ticky_entry_ctrs")) + +tickyReturnOldCon, tickyReturnNewCon :: Arity -> FCode () +tickyReturnOldCon arity + = ifTicky $ do { bumpTickyCounter (sLit "RET_OLD_ctr") + ; bumpHistogram (sLit "RET_OLD_hst") arity } +tickyReturnNewCon arity + | not opt_DoTickyProfiling = nopC + | otherwise + = ifTicky $ do { bumpTickyCounter (sLit "RET_NEW_ctr") + ; bumpHistogram (sLit "RET_NEW_hst") arity } + +tickyUnboxedTupleReturn :: Int -> FCode () +tickyUnboxedTupleReturn arity + = ifTicky $ do { bumpTickyCounter (sLit "RET_UNBOXED_TUP_ctr") + ; bumpHistogram (sLit "RET_UNBOXED_TUP_hst") arity } + +tickyVectoredReturn :: Int -> FCode () +tickyVectoredReturn family_size + = ifTicky $ do { bumpTickyCounter (sLit "VEC_RETURN_ctr") + ; bumpHistogram (sLit "RET_VEC_RETURN_hst") family_size } + +-- ----------------------------------------------------------------------------- +-- Ticky calls + +-- Ticks at a *call site*: +tickyDirectCall :: Arity -> [StgArg] -> FCode () +tickyDirectCall arity args + | arity == length args = tickyKnownCallExact + | otherwise = do tickyKnownCallExtraArgs + tickySlowCallPat (map argPrimRep (drop arity args)) + +tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter (sLit "KNOWN_CALL_TOO_FEW_ARGS_ctr") +tickyKnownCallExact = ifTicky $ bumpTickyCounter (sLit "KNOWN_CALL_ctr") +tickyKnownCallExtraArgs = ifTicky $ bumpTickyCounter (sLit "KNOWN_CALL_EXTRA_ARGS_ctr") +tickyUnknownCall = ifTicky $ bumpTickyCounter (sLit "UNKNOWN_CALL_ctr") + +-- Tick for the call pattern at slow call site (i.e. in addition to +-- tickyUnknownCall, tickyKnownCallExtraArgs, etc.) +tickySlowCall :: LambdaFormInfo -> [StgArg] -> FCode () +tickySlowCall lf_info [] + = return () +tickySlowCall lf_info args + = do { if (isKnownFun lf_info) + then tickyKnownCallTooFewArgs + else tickyUnknownCall + ; tickySlowCallPat (map argPrimRep args) } + +tickySlowCallPat :: [PrimRep] -> FCode () +tickySlowCallPat args = return () +{- LATER: (introduces recursive module dependency now). + case callPattern args of + (str, True) -> bumpTickyCounter' (mkRtsSlowTickyCtrLabel pat) + (str, False) -> bumpTickyCounter (sLit "TICK_SLOW_CALL_OTHER") + +callPattern :: [CgRep] -> (String,Bool) +callPattern reps + | match == length reps = (chars, True) + | otherwise = (chars, False) + where (_,match) = findMatch reps + chars = map argChar reps + +argChar VoidArg = 'v' +argChar PtrArg = 'p' +argChar NonPtrArg = 'n' +argChar LongArg = 'l' +argChar FloatArg = 'f' +argChar DoubleArg = 'd' +-} + +-- ----------------------------------------------------------------------------- +-- Ticky allocation + +tickyDynAlloc :: ClosureInfo -> FCode () +-- Called when doing a dynamic heap allocation +tickyDynAlloc cl_info + = ifTicky $ + case smRepClosureType (closureSMRep cl_info) of + Just Constr -> tick_alloc_con + Just ConstrNoCaf -> tick_alloc_con + Just Fun -> tick_alloc_fun + Just Thunk -> tick_alloc_thk + Just ThunkSelector -> tick_alloc_thk + -- black hole + Nothing -> return () + where + -- will be needed when we fill in stubs + cl_size = closureSize cl_info + slop_size = slopSize cl_info + + tick_alloc_thk + | closureUpdReqd cl_info = tick_alloc_up_thk + | otherwise = tick_alloc_se_thk + + -- krc: changed from panic to return () + -- just to get something working + tick_alloc_con = return () + tick_alloc_fun = return () + tick_alloc_up_thk = return () + tick_alloc_se_thk = return () + + +tickyAllocPrim :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () +tickyAllocPrim hdr goods slop = ifTicky $ pprTrace "ToDo: tickyAllocPrim" empty (return ()) + +tickyAllocThunk :: CmmExpr -> CmmExpr -> FCode () +tickyAllocThunk goods slop = ifTicky $ pprTrace "ToDo: tickyAllocThunk" empty (return ()) + +tickyAllocPAP :: CmmExpr -> CmmExpr -> FCode () +tickyAllocPAP goods slop = ifTicky $ pprTrace "ToDo: tickyAllocPAP" empty (return ()) + +tickyAllocHeap :: VirtualHpOffset -> FCode () +-- Called when doing a heap check [TICK_ALLOC_HEAP] +-- Must be lazy in the amount of allocation! +tickyAllocHeap hp + = ifTicky $ + do { ticky_ctr <- getTickyCtrLabel + ; emit $ catAGraphs $ + if hp == 0 then [] -- Inside the emitMiddle to avoid control + else [ -- dependency on the argument + -- Bump the allcoation count in the StgEntCounter + addToMem REP_StgEntCounter_allocs + (CmmLit (cmmLabelOffB ticky_ctr + oFFSET_StgEntCounter_allocs)) hp, + -- Bump ALLOC_HEAP_ctr + addToMemLbl cLong (mkRtsDataLabel (sLit "ALLOC_HEAP_ctr")) 1, + -- Bump ALLOC_HEAP_tot + addToMemLbl cLong (mkRtsDataLabel (sLit "ALLOC_HEAP_tot")) hp] } + +-- ----------------------------------------------------------------------------- +-- Ticky utils + +ifTicky :: FCode () -> FCode () +ifTicky code + | opt_DoTickyProfiling = code + | otherwise = nopC + +-- All the ticky-ticky counters are declared "unsigned long" in C +bumpTickyCounter :: LitString -> FCode () +bumpTickyCounter lbl = bumpTickyCounter' (cmmLabelOffB (mkRtsDataLabel lbl) 0) + +bumpTickyCounter' :: CmmLit -> FCode () +-- krc: note that we're incrementing the _entry_count_ field of the ticky counter +bumpTickyCounter' lhs = emit (addToMem cLong (CmmLit lhs) 1) + +bumpHistogram :: LitString -> Int -> FCode () +bumpHistogram lbl n +-- = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLongWidth)) + = return () -- TEMP SPJ Apr 07 + +bumpHistogramE :: LitString -> CmmExpr -> FCode () +bumpHistogramE lbl n + = do t <- newTemp cLong + emit (mkAssign (CmmLocal t) n) + emit (mkCmmIfThen (CmmMachOp (MO_U_Le cLongWidth) [CmmReg (CmmLocal t), eight]) + (mkAssign (CmmLocal t) eight)) + emit (addToMem cLong + (cmmIndexExpr cLongWidth + (CmmLit (CmmLabel (mkRtsDataLabel lbl))) + (CmmReg (CmmLocal t))) + 1) + where + eight = CmmLit (CmmInt 8 cLongWidth) + +------------------------------------------------------------------ +-- Showing the "type category" for ticky-ticky profiling + +showTypeCategory :: Type -> Char + {- {C,I,F,D} char, int, float, double + T tuple + S other single-constructor type + {c,i,f,d} unboxed ditto + t *unpacked* tuple + s *unpacked" single-cons... + + v void# + a primitive array + + E enumeration type + + dictionary, unless it's a ... + L List + > function + M other (multi-constructor) data-con type + . other type + - reserved for others to mark as "uninteresting" + -} +showTypeCategory ty + = if isDictTy ty + then '+' + else + case tcSplitTyConApp_maybe ty of + Nothing -> if isJust (tcSplitFunTy_maybe ty) + then '>' + else '.' + + Just (tycon, _) -> + let utc = getUnique tycon in + if utc == charDataConKey then 'C' + else if utc == intDataConKey then 'I' + else if utc == floatDataConKey then 'F' + else if utc == doubleDataConKey then 'D' + else if utc == charPrimTyConKey then 'c' + else if (utc == intPrimTyConKey || utc == wordPrimTyConKey + || utc == addrPrimTyConKey) then 'i' + else if utc == floatPrimTyConKey then 'f' + else if utc == doublePrimTyConKey then 'd' + else if isPrimTyCon tycon {- array, we hope -} then 'A' -- Bogus + else if isEnumerationTyCon tycon then 'E' + else if isTupleTyCon tycon then 'T' + else if isJust (tyConSingleDataCon_maybe tycon) then 'S' + else if utc == listTyConKey then 'L' + else 'M' -- oh, well... diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs new file mode 100644 index 0000000000..6cfca5f05f --- /dev/null +++ b/compiler/codeGen/StgCmmUtils.hs @@ -0,0 +1,902 @@ +----------------------------------------------------------------------------- +-- +-- Code generator utilities; mostly monadic +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module StgCmmUtils ( + cgLit, mkSimpleLit, + emitDataLits, mkDataLits, + emitRODataLits, mkRODataLits, + emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, + assignTemp, newTemp, withTemp, + + newUnboxedTupleRegs, + + mkMultiAssign, mkCmmSwitch, mkCmmLitSwitch, + emitSwitch, + + tagToClosure, mkTaggedObjectLoad, + + callerSaveVolatileRegs, get_GlobalReg_addr, + + cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord, + cmmUGtWord, + cmmOffsetExprW, cmmOffsetExprB, + cmmRegOffW, cmmRegOffB, + cmmLabelOffW, cmmLabelOffB, + cmmOffsetW, cmmOffsetB, + cmmOffsetLitW, cmmOffsetLitB, + cmmLoadIndexW, + cmmConstrTag, cmmConstrTag1, + + cmmUntag, cmmIsTagged, cmmGetTag, + + addToMem, addToMemE, addToMemLbl, + mkWordCLit, + mkStringCLit, mkByteStringCLit, + packHalfWordsCLit, + blankWord, + + getSRTInfo, clHasCafRefs, srt_escape + ) where + +#include "HsVersions.h" +#include "MachRegs.h" + +import StgCmmMonad +import StgCmmClosure +import BlockId +import Cmm +import CmmExpr +import MkZipCfgCmm +import CLabel +import CmmUtils +import PprCmm ( {- instances -} ) + +import ForeignCall +import IdInfo +import Type +import TyCon +import Constants +import SMRep +import StgSyn ( SRT(..) ) +import Literal +import Digraph +import ListSetOps +import Util +import Unique +import DynFlags +import FastString +import Outputable + +import Data.Char +import Data.Bits +import Data.Word +import Data.Maybe + + +------------------------------------------------------------------------- +-- +-- Literals +-- +------------------------------------------------------------------------- + +cgLit :: Literal -> FCode CmmLit +cgLit (MachStr s) = mkByteStringCLit (bytesFS s) + -- not unpackFS; we want the UTF-8 byte stream. +cgLit other_lit = return (mkSimpleLit other_lit) + +mkSimpleLit :: Literal -> CmmLit +mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordWidth +mkSimpleLit MachNullAddr = zeroCLit +mkSimpleLit (MachInt i) = CmmInt i wordWidth +mkSimpleLit (MachInt64 i) = CmmInt i W64 +mkSimpleLit (MachWord i) = CmmInt i wordWidth +mkSimpleLit (MachWord64 i) = CmmInt i W64 +mkSimpleLit (MachFloat r) = CmmFloat r W32 +mkSimpleLit (MachDouble r) = CmmFloat r W64 +mkSimpleLit (MachLabel fs ms) = CmmLabel (mkForeignLabel fs ms is_dyn) + where + is_dyn = False -- ToDo: fix me +mkSimpleLit other = pprPanic "mkSimpleLit" (ppr other) + +mkLtOp :: Literal -> MachOp +-- On signed literals we must do a signed comparison +mkLtOp (MachInt _) = MO_S_Lt wordWidth +mkLtOp (MachFloat _) = MO_F_Lt W32 +mkLtOp (MachDouble _) = MO_F_Lt W64 +mkLtOp lit = MO_U_Lt (typeWidth (cmmLitType (mkSimpleLit lit))) + -- ToDo: seems terribly indirect! + + +--------------------------------------------------- +-- +-- Cmm data type functions +-- +--------------------------------------------------- + +-- The "B" variants take byte offsets +cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr +cmmRegOffB = cmmRegOff + +cmmOffsetB :: CmmExpr -> ByteOff -> CmmExpr +cmmOffsetB = cmmOffset + +cmmOffsetExprB :: CmmExpr -> CmmExpr -> CmmExpr +cmmOffsetExprB = cmmOffsetExpr + +cmmLabelOffB :: CLabel -> ByteOff -> CmmLit +cmmLabelOffB = cmmLabelOff + +cmmOffsetLitB :: CmmLit -> ByteOff -> CmmLit +cmmOffsetLitB = cmmOffsetLit + +----------------------- +-- The "W" variants take word offsets +cmmOffsetExprW :: CmmExpr -> CmmExpr -> CmmExpr +-- The second arg is a *word* offset; need to change it to bytes +cmmOffsetExprW e (CmmLit (CmmInt n _)) = cmmOffsetW e (fromInteger n) +cmmOffsetExprW e wd_off = cmmIndexExpr wordWidth e wd_off + +cmmOffsetW :: CmmExpr -> WordOff -> CmmExpr +cmmOffsetW e n = cmmOffsetB e (wORD_SIZE * n) + +cmmRegOffW :: CmmReg -> WordOff -> CmmExpr +cmmRegOffW reg wd_off = cmmRegOffB reg (wd_off * wORD_SIZE) + +cmmOffsetLitW :: CmmLit -> WordOff -> CmmLit +cmmOffsetLitW lit wd_off = cmmOffsetLitB lit (wORD_SIZE * wd_off) + +cmmLabelOffW :: CLabel -> WordOff -> CmmLit +cmmLabelOffW lbl wd_off = cmmLabelOffB lbl (wORD_SIZE * wd_off) + +cmmLoadIndexW :: CmmExpr -> Int -> CmmType -> CmmExpr +cmmLoadIndexW base off ty = CmmLoad (cmmOffsetW base off) ty + +----------------------- +cmmULtWord, cmmUGeWord, cmmUGtWord, cmmSubWord, + cmmNeWord, cmmEqWord, cmmOrWord, cmmAndWord + :: CmmExpr -> CmmExpr -> CmmExpr +cmmOrWord e1 e2 = CmmMachOp mo_wordOr [e1, e2] +cmmAndWord e1 e2 = CmmMachOp mo_wordAnd [e1, e2] +cmmNeWord e1 e2 = CmmMachOp mo_wordNe [e1, e2] +cmmEqWord e1 e2 = CmmMachOp mo_wordEq [e1, e2] +cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2] +cmmUGeWord e1 e2 = CmmMachOp mo_wordUGe [e1, e2] +cmmUGtWord e1 e2 = CmmMachOp mo_wordUGt [e1, e2] +--cmmShlWord e1 e2 = CmmMachOp mo_wordShl [e1, e2] +--cmmUShrWord e1 e2 = CmmMachOp mo_wordUShr [e1, e2] +cmmSubWord e1 e2 = CmmMachOp mo_wordSub [e1, e2] + +cmmNegate :: CmmExpr -> CmmExpr +cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) +cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprWidth e)) [e] + +blankWord :: CmmStatic +blankWord = CmmUninitialised wORD_SIZE + +-- Tagging -- +-- Tag bits mask +--cmmTagBits = CmmLit (mkIntCLit tAG_BITS) +cmmTagMask, cmmPointerMask :: CmmExpr +cmmTagMask = CmmLit (mkIntCLit tAG_MASK) +cmmPointerMask = CmmLit (mkIntCLit (complement tAG_MASK)) + +-- Used to untag a possibly tagged pointer +-- A static label need not be untagged +cmmUntag, cmmGetTag :: CmmExpr -> CmmExpr +cmmUntag e@(CmmLit (CmmLabel _)) = e +-- Default case +cmmUntag e = (e `cmmAndWord` cmmPointerMask) + +cmmGetTag e = (e `cmmAndWord` cmmTagMask) + +-- Test if a closure pointer is untagged +cmmIsTagged :: CmmExpr -> CmmExpr +cmmIsTagged e = (e `cmmAndWord` cmmTagMask) + `cmmNeWord` CmmLit zeroCLit + +cmmConstrTag, cmmConstrTag1 :: CmmExpr -> CmmExpr +cmmConstrTag e = (e `cmmAndWord` cmmTagMask) `cmmSubWord` (CmmLit (mkIntCLit 1)) +-- Get constructor tag, but one based. +cmmConstrTag1 e = e `cmmAndWord` cmmTagMask + +----------------------- +-- Making literals + +mkWordCLit :: StgWord -> CmmLit +mkWordCLit wd = CmmInt (fromIntegral wd) wordWidth + +packHalfWordsCLit :: (Integral a, Integral b) => a -> b -> CmmLit +-- Make a single word literal in which the lower_half_word is +-- at the lower address, and the upper_half_word is at the +-- higher address +-- ToDo: consider using half-word lits instead +-- but be careful: that's vulnerable when reversed +packHalfWordsCLit lower_half_word upper_half_word +#ifdef WORDS_BIGENDIAN + = mkWordCLit ((fromIntegral lower_half_word `shiftL` hALF_WORD_SIZE_IN_BITS) + .|. fromIntegral upper_half_word) +#else + = mkWordCLit ((fromIntegral lower_half_word) + .|. (fromIntegral upper_half_word `shiftL` hALF_WORD_SIZE_IN_BITS)) +#endif + +-------------------------------------------------------------------------- +-- +-- Incrementing a memory location +-- +-------------------------------------------------------------------------- + +addToMemLbl :: CmmType -> CLabel -> Int -> CmmAGraph +addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n + +addToMem :: CmmType -- rep of the counter + -> CmmExpr -- Address + -> Int -- What to add (a word) + -> CmmAGraph +addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) (typeWidth rep))) + +addToMemE :: CmmType -- rep of the counter + -> CmmExpr -- Address + -> CmmExpr -- What to add (a word-typed expression) + -> CmmAGraph +addToMemE rep ptr n + = mkStore ptr (CmmMachOp (MO_Add (typeWidth rep)) [CmmLoad ptr rep, n]) + + +------------------------------------------------------------------------- +-- +-- Loading a field from an object, +-- where the object pointer is itself tagged +-- +------------------------------------------------------------------------- + +mkTaggedObjectLoad :: LocalReg -> LocalReg -> WordOff -> DynTag -> CmmAGraph +-- (loadTaggedObjectField reg base off tag) generates assignment +-- reg = bitsK[ base + off - tag ] +-- where K is fixed by 'reg' +mkTaggedObjectLoad reg base offset tag + = mkAssign (CmmLocal reg) + (CmmLoad (cmmOffsetB (CmmReg (CmmLocal base)) + (wORD_SIZE*offset - tag)) + (localRegType reg)) + +------------------------------------------------------------------------- +-- +-- Converting a closure tag to a closure for enumeration types +-- (this is the implementation of tagToEnum#). +-- +------------------------------------------------------------------------- + +tagToClosure :: TyCon -> CmmExpr -> CmmExpr +tagToClosure tycon tag + = CmmLoad (cmmOffsetExprW closure_tbl tag) bWord + where closure_tbl = CmmLit (CmmLabel lbl) + lbl = mkClosureTableLabel (tyConName tycon) NoCafRefs + +------------------------------------------------------------------------- +-- +-- Conditionals and rts calls +-- +------------------------------------------------------------------------- + +emitRtsCall :: LitString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () +emitRtsCall fun args safe = emitRtsCall' [] fun args Nothing safe + -- The 'Nothing' says "save all global registers" + +emitRtsCallWithVols :: LitString -> [(CmmExpr,ForeignHint)] -> [GlobalReg] -> Bool -> FCode () +emitRtsCallWithVols fun args vols safe + = emitRtsCall' [] fun args (Just vols) safe + +emitRtsCallWithResult :: LocalReg -> ForeignHint -> LitString + -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () +emitRtsCallWithResult res hint fun args safe + = emitRtsCall' [(res,hint)] fun args Nothing safe + +-- Make a call to an RTS C procedure +emitRtsCall' + :: [(LocalReg,ForeignHint)] + -> LitString + -> [(CmmExpr,ForeignHint)] + -> Maybe [GlobalReg] + -> Bool -- True <=> CmmSafe call + -> FCode () +emitRtsCall' res fun args _vols safe + = --error "emitRtsCall'" + do { emit caller_save + ; emit call + ; emit caller_load } + where + call = if safe then + mkCall fun_expr CCallConv res' args' undefined + else + mkUnsafeCall (ForeignTarget fun_expr + (ForeignConvention CCallConv arg_hints res_hints)) res' args' + (args', arg_hints) = unzip args + (res', res_hints) = unzip res + (caller_save, caller_load) = callerSaveVolatileRegs + fun_expr = mkLblExpr (mkRtsCodeLabel fun) + + +----------------------------------------------------------------------------- +-- +-- Caller-Save Registers +-- +----------------------------------------------------------------------------- + +-- Here we generate the sequence of saves/restores required around a +-- foreign call instruction. + +-- TODO: reconcile with includes/Regs.h +-- * Regs.h claims that BaseReg should be saved last and loaded first +-- * This might not have been tickled before since BaseReg is callee save +-- * Regs.h saves SparkHd, ParkT1, SparkBase and SparkLim +callerSaveVolatileRegs :: (CmmAGraph, CmmAGraph) +callerSaveVolatileRegs = (caller_save, caller_load) + where + caller_save = catAGraphs (map callerSaveGlobalReg regs_to_save) + caller_load = catAGraphs (map callerRestoreGlobalReg regs_to_save) + + system_regs = [ Sp,SpLim,Hp,HpLim,CurrentTSO,CurrentNursery + {- ,SparkHd,SparkTl,SparkBase,SparkLim -} + , BaseReg ] + + regs_to_save = filter callerSaves system_regs + + callerSaveGlobalReg reg + = mkStore (get_GlobalReg_addr reg) (CmmReg (CmmGlobal reg)) + + callerRestoreGlobalReg reg + = mkAssign (CmmGlobal reg) + (CmmLoad (get_GlobalReg_addr reg) (globalRegType reg)) + +-- ----------------------------------------------------------------------------- +-- Global registers + +-- We map STG registers onto appropriate CmmExprs. Either they map +-- to real machine registers or stored as offsets from BaseReg. Given +-- a GlobalReg, get_GlobalReg_addr always produces the +-- register table address for it. +-- (See also get_GlobalReg_reg_or_addr in MachRegs) + +get_GlobalReg_addr :: GlobalReg -> CmmExpr +get_GlobalReg_addr BaseReg = regTableOffset 0 +get_GlobalReg_addr mid = get_Regtable_addr_from_offset + (globalRegType mid) (baseRegOffset mid) + +-- Calculate a literal representing an offset into the register table. +-- Used when we don't have an actual BaseReg to offset from. +regTableOffset :: Int -> CmmExpr +regTableOffset n = + CmmLit (CmmLabelOff mkMainCapabilityLabel (oFFSET_Capability_r + n)) + +get_Regtable_addr_from_offset :: CmmType -> Int -> CmmExpr +get_Regtable_addr_from_offset _rep offset = +#ifdef REG_Base + CmmRegOff (CmmGlobal BaseReg) offset +#else + regTableOffset offset +#endif + + +-- | Returns 'True' if this global register is stored in a caller-saves +-- machine register. + +callerSaves :: GlobalReg -> Bool + +#ifdef CALLER_SAVES_Base +callerSaves BaseReg = True +#endif +#ifdef CALLER_SAVES_Sp +callerSaves Sp = True +#endif +#ifdef CALLER_SAVES_SpLim +callerSaves SpLim = True +#endif +#ifdef CALLER_SAVES_Hp +callerSaves Hp = True +#endif +#ifdef CALLER_SAVES_HpLim +callerSaves HpLim = True +#endif +#ifdef CALLER_SAVES_CurrentTSO +callerSaves CurrentTSO = True +#endif +#ifdef CALLER_SAVES_CurrentNursery +callerSaves CurrentNursery = True +#endif +callerSaves _ = False + + +-- ----------------------------------------------------------------------------- +-- Information about global registers + +baseRegOffset :: GlobalReg -> Int + +baseRegOffset Sp = oFFSET_StgRegTable_rSp +baseRegOffset SpLim = oFFSET_StgRegTable_rSpLim +baseRegOffset (LongReg 1) = oFFSET_StgRegTable_rL1 +baseRegOffset Hp = oFFSET_StgRegTable_rHp +baseRegOffset HpLim = oFFSET_StgRegTable_rHpLim +baseRegOffset CurrentTSO = oFFSET_StgRegTable_rCurrentTSO +baseRegOffset CurrentNursery = oFFSET_StgRegTable_rCurrentNursery +baseRegOffset HpAlloc = oFFSET_StgRegTable_rHpAlloc +baseRegOffset GCEnter1 = oFFSET_stgGCEnter1 +baseRegOffset GCFun = oFFSET_stgGCFun +baseRegOffset reg = pprPanic "baseRegOffset:" (ppr reg) + +------------------------------------------------------------------------- +-- +-- Strings generate a top-level data block +-- +------------------------------------------------------------------------- + +emitDataLits :: CLabel -> [CmmLit] -> FCode () +-- Emit a data-segment data block +emitDataLits lbl lits + = emitData Data (CmmDataLabel lbl : map CmmStaticLit lits) + +mkDataLits :: CLabel -> [CmmLit] -> GenCmmTop CmmStatic info stmt +-- Emit a data-segment data block +mkDataLits lbl lits + = CmmData Data (CmmDataLabel lbl : map CmmStaticLit lits) + +emitRODataLits :: CLabel -> [CmmLit] -> FCode () +-- Emit a read-only data block +emitRODataLits lbl lits + = emitData section (CmmDataLabel lbl : map CmmStaticLit lits) + where section | any needsRelocation lits = RelocatableReadOnlyData + | otherwise = ReadOnlyData + needsRelocation (CmmLabel _) = True + needsRelocation (CmmLabelOff _ _) = True + needsRelocation _ = False + +mkRODataLits :: CLabel -> [CmmLit] -> GenCmmTop CmmStatic info stmt +mkRODataLits lbl lits + = CmmData section (CmmDataLabel lbl : map CmmStaticLit lits) + where section | any needsRelocation lits = RelocatableReadOnlyData + | otherwise = ReadOnlyData + needsRelocation (CmmLabel _) = True + needsRelocation (CmmLabelOff _ _) = True + needsRelocation _ = False + +mkStringCLit :: String -> FCode CmmLit +-- Make a global definition for the string, +-- and return its label +mkStringCLit str = mkByteStringCLit (map (fromIntegral . ord) str) + +mkByteStringCLit :: [Word8] -> FCode CmmLit +mkByteStringCLit bytes + = do { uniq <- newUnique + ; let lbl = mkStringLitLabel uniq + ; emitData ReadOnlyData [CmmDataLabel lbl, CmmString bytes] + ; return (CmmLabel lbl) } + +------------------------------------------------------------------------- +-- +-- Assigning expressions to temporaries +-- +------------------------------------------------------------------------- + +assignTemp :: CmmExpr -> FCode LocalReg +-- Make sure the argument is in a local register +assignTemp (CmmReg (CmmLocal reg)) = return reg +assignTemp e = do { uniq <- newUnique + ; let reg = LocalReg uniq (cmmExprType e) + ; emit (mkAssign (CmmLocal reg) e) + ; return reg } + +newTemp :: CmmType -> FCode LocalReg +newTemp rep = do { uniq <- newUnique + ; return (LocalReg uniq rep) } + +newUnboxedTupleRegs :: Type -> FCode ([LocalReg], [ForeignHint]) +-- Choose suitable local regs to use for the components +-- of an unboxed tuple that we are about to return to +-- the Sequel. If the Sequel is a joint point, using the +-- regs it wants will save later assignments. +newUnboxedTupleRegs res_ty + = ASSERT( isUnboxedTupleType res_ty ) + do { sequel <- getSequel + ; regs <- choose_regs sequel + ; ASSERT( regs `equalLength` reps ) + return (regs, map primRepForeignHint reps) } + where + ty_args = tyConAppArgs (repType res_ty) + reps = [ rep + | ty <- ty_args + , let rep = typePrimRep ty + , not (isVoidRep rep) ] + choose_regs (AssignTo regs _) = return regs + choose_regs _other = mapM (newTemp . primRepCmmType) reps + + + +------------------------------------------------------------------------- +-- mkMultiAssign +------------------------------------------------------------------------- + +mkMultiAssign :: [LocalReg] -> [CmmExpr] -> CmmAGraph +-- Emit code to perform the assignments in the +-- input simultaneously, using temporary variables when necessary. + +type Key = Int +type Vrtx = (Key, Stmt) -- Give each vertex a unique number, + -- for fast comparison +type Stmt = (LocalReg, CmmExpr) -- r := e + +-- We use the strongly-connected component algorithm, in which +-- * the vertices are the statements +-- * an edge goes from s1 to s2 iff +-- s1 assigns to something s2 uses +-- that is, if s1 should *follow* s2 in the final order + +mkMultiAssign [] [] = mkNop +mkMultiAssign [reg] [rhs] = mkAssign (CmmLocal reg) rhs +mkMultiAssign regs rhss = ASSERT( equalLength regs rhss ) + unscramble ([1..] `zip` (regs `zip` rhss)) + +unscramble :: [Vrtx] -> CmmAGraph +unscramble vertices + = catAGraphs (map do_component components) + where + edges :: [ (Vrtx, Key, [Key]) ] + edges = [ (vertex, key1, edges_from stmt1) + | vertex@(key1, stmt1) <- vertices ] + + edges_from :: Stmt -> [Key] + edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices, + stmt1 `mustFollow` stmt2 ] + + components :: [SCC Vrtx] + components = stronglyConnCompFromEdgedVertices edges + + -- do_components deal with one strongly-connected component + -- Not cyclic, or singleton? Just do it + do_component :: SCC Vrtx -> CmmAGraph + do_component (AcyclicSCC (_,stmt)) = mk_graph stmt + do_component (CyclicSCC []) = panic "do_component" + do_component (CyclicSCC [(_,stmt)]) = mk_graph stmt + + -- Cyclic? Then go via temporaries. Pick one to + -- break the loop and try again with the rest. + do_component (CyclicSCC ((_,first_stmt) : rest)) + = withUnique $ \u -> + let (to_tmp, from_tmp) = split u first_stmt + in mk_graph to_tmp + <*> unscramble rest + <*> mk_graph from_tmp + + split :: Unique -> Stmt -> (Stmt, Stmt) + split uniq (reg, rhs) + = ((tmp, rhs), (reg, CmmReg (CmmLocal tmp))) + where + rep = cmmExprType rhs + tmp = LocalReg uniq rep + + mk_graph :: Stmt -> CmmAGraph + mk_graph (reg, rhs) = mkAssign (CmmLocal reg) rhs + +mustFollow :: Stmt -> Stmt -> Bool +(reg, _) `mustFollow` (_, rhs) = reg `regUsedIn` rhs + +regUsedIn :: LocalReg -> CmmExpr -> Bool +reg `regUsedIn` CmmLoad e _ = reg `regUsedIn` e +reg `regUsedIn` CmmReg (CmmLocal reg') = reg == reg' +reg `regUsedIn` CmmRegOff (CmmLocal reg') _ = reg == reg' +reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es +_reg `regUsedIn` _other = False -- The CmmGlobal cases + + +------------------------------------------------------------------------- +-- mkSwitch +------------------------------------------------------------------------- + + +emitSwitch :: CmmExpr -- Tag to switch on + -> [(ConTagZ, CmmAGraph)] -- Tagged branches + -> Maybe CmmAGraph -- Default branch (if any) + -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour + -- outside this range is undefined + -> FCode () +emitSwitch tag_expr branches mb_deflt lo_tag hi_tag + = do { dflags <- getDynFlags + ; emit (mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag) } + where + via_C dflags | HscC <- hscTarget dflags = True + | otherwise = False + + +mkCmmSwitch :: Bool -- True <=> never generate a conditional tree + -> CmmExpr -- Tag to switch on + -> [(ConTagZ, CmmAGraph)] -- Tagged branches + -> Maybe CmmAGraph -- Default branch (if any) + -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour + -- outside this range is undefined + -> CmmAGraph + +-- First, two rather common cases in which there is no work to do +mkCmmSwitch _ _ [] (Just code) _ _ = code +mkCmmSwitch _ _ [(_,code)] Nothing _ _ = code + +-- Right, off we go +mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag + = withFreshLabel "switch join" $ \ join_lbl -> + label_default join_lbl mb_deflt $ \ mb_deflt -> + label_branches join_lbl branches $ \ branches -> + assignTemp' tag_expr $ \tag_expr' -> + + mk_switch tag_expr' (sortLe le branches) mb_deflt + lo_tag hi_tag via_C + -- Sort the branches before calling mk_switch + <*> mkLabel join_lbl Nothing + + where + (t1,_) `le` (t2,_) = t1 <= t2 + +mk_switch :: CmmExpr -> [(ConTagZ, BlockId)] + -> Maybe BlockId + -> ConTagZ -> ConTagZ -> Bool + -> CmmAGraph + +-- SINGLETON TAG RANGE: no case analysis to do +mk_switch _tag_expr [(tag, lbl)] _ lo_tag hi_tag _via_C + | lo_tag == hi_tag + = ASSERT( tag == lo_tag ) + mkBranch lbl + +-- SINGLETON BRANCH, NO DEFAULT: no case analysis to do +mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _ + = mkBranch lbl + -- The simplifier might have eliminated a case + -- so we may have e.g. case xs of + -- [] -> e + -- In that situation we can be sure the (:) case + -- can't happen, so no need to test + +-- SINGLETON BRANCH: one equality check to do +mk_switch tag_expr [(tag,lbl)] (Just deflt) _ _ _ + = mkCbranch cond deflt lbl + where + cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag)) + -- We have lo_tag < hi_tag, but there's only one branch, + -- so there must be a default + +-- ToDo: we might want to check for the two branch case, where one of +-- the branches is the tag 0, because comparing '== 0' is likely to be +-- more efficient than other kinds of comparison. + +-- DENSE TAG RANGE: use a switch statment. +-- +-- We also use a switch uncoditionally when compiling via C, because +-- this will get emitted as a C switch statement and the C compiler +-- should do a good job of optimising it. Also, older GCC versions +-- (2.95 in particular) have problems compiling the complicated +-- if-trees generated by this code, so compiling to a switch every +-- time works around that problem. +-- +mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C + | use_switch -- Use a switch + = let + find_branch :: ConTagZ -> Maybe BlockId + find_branch i = case (assocMaybe branches i) of + Just lbl -> Just lbl + Nothing -> mb_deflt + + -- NB. we have eliminated impossible branches at + -- either end of the range (see below), so the first + -- tag of a real branch is real_lo_tag (not lo_tag). + arms :: [Maybe BlockId] + arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]] + in + mkSwitch (cmmOffset tag_expr (- real_lo_tag)) arms + + -- if we can knock off a bunch of default cases with one if, then do so + | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches + = mkCmmIfThenElse + (cmmULtWord tag_expr (CmmLit (mkIntCLit lowest_branch))) + (mkBranch deflt) + (mk_switch tag_expr branches mb_deflt + lowest_branch hi_tag via_C) + + | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches + = mkCmmIfThenElse + (cmmUGtWord tag_expr (CmmLit (mkIntCLit highest_branch))) + (mk_switch tag_expr branches mb_deflt + lo_tag highest_branch via_C) + (mkBranch deflt) + + | otherwise -- Use an if-tree + = mkCmmIfThenElse + (cmmUGeWord tag_expr (CmmLit (mkIntCLit mid_tag))) + (mk_switch tag_expr hi_branches mb_deflt + mid_tag hi_tag via_C) + (mk_switch tag_expr lo_branches mb_deflt + lo_tag (mid_tag-1) via_C) + -- we test (e >= mid_tag) rather than (e < mid_tag), because + -- the former works better when e is a comparison, and there + -- are two tags 0 & 1 (mid_tag == 1). In this case, the code + -- generator can reduce the condition to e itself without + -- having to reverse the sense of the comparison: comparisons + -- can't always be easily reversed (eg. floating + -- pt. comparisons). + where + use_switch = {- pprTrace "mk_switch" ( + ppr tag_expr <+> text "n_tags:" <+> int n_tags <+> + text "branches:" <+> ppr (map fst branches) <+> + text "n_branches:" <+> int n_branches <+> + text "lo_tag:" <+> int lo_tag <+> + text "hi_tag:" <+> int hi_tag <+> + text "real_lo_tag:" <+> int real_lo_tag <+> + text "real_hi_tag:" <+> int real_hi_tag) $ -} + ASSERT( n_branches > 1 && n_tags > 1 ) + n_tags > 2 && (via_C || (dense && big_enough)) + -- up to 4 branches we use a decision tree, otherwise + -- a switch (== jump table in the NCG). This seems to be + -- optimal, and corresponds with what gcc does. + big_enough = n_branches > 4 + dense = n_branches > (n_tags `div` 2) + n_branches = length branches + + -- ignore default slots at each end of the range if there's + -- no default branch defined. + lowest_branch = fst (head branches) + highest_branch = fst (last branches) + + real_lo_tag + | isNothing mb_deflt = lowest_branch + | otherwise = lo_tag + + real_hi_tag + | isNothing mb_deflt = highest_branch + | otherwise = hi_tag + + n_tags = real_hi_tag - real_lo_tag + 1 + + -- INVARIANT: Provided hi_tag > lo_tag (which is true) + -- lo_tag <= mid_tag < hi_tag + -- lo_branches have tags < mid_tag + -- hi_branches have tags >= mid_tag + + (mid_tag,_) = branches !! (n_branches `div` 2) + -- 2 branches => n_branches `div` 2 = 1 + -- => branches !! 1 give the *second* tag + -- There are always at least 2 branches here + + (lo_branches, hi_branches) = span is_lo branches + is_lo (t,_) = t < mid_tag + +-------------- +mkCmmLitSwitch :: CmmExpr -- Tag to switch on + -> [(Literal, CmmAGraph)] -- Tagged branches + -> CmmAGraph -- Default branch (always) + -> CmmAGraph -- Emit the code +-- Used for general literals, whose size might not be a word, +-- where there is always a default case, and where we don't know +-- the range of values for certain. For simplicity we always generate a tree. +-- +-- ToDo: for integers we could do better here, perhaps by generalising +-- mk_switch and using that. --SDM 15/09/2004 +mkCmmLitSwitch _scrut [] deflt = deflt +mkCmmLitSwitch scrut branches deflt + = assignTemp' scrut $ \ scrut' -> + withFreshLabel "switch join" $ \ join_lbl -> + label_code join_lbl deflt $ \ deflt -> + label_branches join_lbl branches $ \ branches -> + mk_lit_switch scrut' deflt (sortLe le branches) + where + le (t1,_) (t2,_) = t1 <= t2 + +mk_lit_switch :: CmmExpr -> BlockId + -> [(Literal,BlockId)] + -> CmmAGraph +mk_lit_switch scrut deflt [(lit,blk)] + = mkCbranch + (CmmMachOp (MO_Ne rep) [scrut, CmmLit cmm_lit]) + deflt blk + where + cmm_lit = mkSimpleLit lit + rep = typeWidth (cmmLitType cmm_lit) + +mk_lit_switch scrut deflt_blk_id branches + = mkCmmIfThenElse cond + (mk_lit_switch scrut deflt_blk_id lo_branches) + (mk_lit_switch scrut deflt_blk_id hi_branches) + where + n_branches = length branches + (mid_lit,_) = branches !! (n_branches `div` 2) + -- See notes above re mid_tag + + (lo_branches, hi_branches) = span is_lo branches + is_lo (t,_) = t < mid_lit + + cond = CmmMachOp (mkLtOp mid_lit) + [scrut, CmmLit (mkSimpleLit mid_lit)] + + +-------------- +label_default :: BlockId -> Maybe CmmAGraph + -> (Maybe BlockId -> CmmAGraph) + -> CmmAGraph +label_default _ Nothing thing_inside + = thing_inside Nothing +label_default join_lbl (Just code) thing_inside + = label_code join_lbl code $ \ lbl -> + thing_inside (Just lbl) + +-------------- +label_branches :: BlockId -> [(a,CmmAGraph)] + -> ([(a,BlockId)] -> CmmAGraph) + -> CmmAGraph +label_branches _join_lbl [] thing_inside + = thing_inside [] +label_branches join_lbl ((tag,code):branches) thing_inside + = label_code join_lbl code $ \ lbl -> + label_branches join_lbl branches $ \ branches' -> + thing_inside ((tag,lbl):branches') + +-------------- +label_code :: BlockId -> CmmAGraph -> (BlockId -> CmmAGraph) -> CmmAGraph +-- (label_code J code fun) +-- generates +-- [L: code; goto J] fun L +label_code join_lbl code thing_inside + = withFreshLabel "switch" $ \lbl -> + outOfLine (mkLabel lbl Nothing <*> code <*> mkBranch join_lbl) + <*> thing_inside lbl + + +-------------- +assignTemp' :: CmmExpr -> (CmmExpr -> CmmAGraph) -> CmmAGraph +assignTemp' e thing_inside + | isTrivialCmmExpr e = thing_inside e + | otherwise = withTemp (cmmExprType e) $ \ lreg -> + let reg = CmmLocal lreg in + mkAssign reg e <*> thing_inside (CmmReg reg) + +withTemp :: CmmType -> (LocalReg -> CmmAGraph) -> CmmAGraph +withTemp rep thing_inside + = withUnique $ \uniq -> thing_inside (LocalReg uniq rep) + + +------------------------------------------------------------------------- +-- +-- Static Reference Tables +-- +------------------------------------------------------------------------- + +-- There is just one SRT for each top level binding; all the nested +-- bindings use sub-sections of this SRT. The label is passed down to +-- the nested bindings via the monad. + +getSRTInfo :: SRT -> FCode C_SRT +getSRTInfo (SRTEntries {}) = panic "getSRTInfo" + +getSRTInfo (SRT off len bmp) + | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape] + = do { id <- newUnique + ; top_srt <- getSRTLabel + ; let srt_desc_lbl = mkLargeSRTLabel id + ; emitRODataLits srt_desc_lbl + ( cmmLabelOffW top_srt off + : mkWordCLit (fromIntegral len) + : map mkWordCLit bmp) + ; return (C_SRT srt_desc_lbl 0 srt_escape) } + + | otherwise + = do { top_srt <- getSRTLabel + ; return (C_SRT top_srt off (fromIntegral (head bmp))) } + -- The fromIntegral converts to StgHalfWord + +getSRTInfo NoSRT + = -- TODO: Should we panic in this case? + -- Someone obviously thinks there should be an SRT + return NoC_SRT + + +srt_escape :: StgHalfWord +srt_escape = -1 |