diff options
39 files changed, 832 insertions, 175 deletions
diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 130dba05f9..d8d6c9bb46 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -88,7 +88,8 @@ cmmCheckMachOp op args = return (resultRepOfMachOp op) isWordOffsetReg (CmmGlobal Sp) = True -isWordOffsetReg (CmmGlobal Hp) = True +-- No warnings for unaligned arithmetic, which is used to tag dynamic constructor closures. +--isWordOffsetReg (CmmGlobal Hp) = True isWordOffsetReg _ = False isOffsetOp (MO_Add _) = True @@ -98,14 +99,18 @@ isOffsetOp _ = False -- This expression should be an address from which a word can be loaded: -- check for funny-looking sub-word offsets. cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)]) - | isOffsetOp op && i `rem` fromIntegral wORD_SIZE /= 0 + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 = cmmLintDubiousWordOffset e cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg]) - | isOffsetOp op && i `rem` fromIntegral wORD_SIZE /= 0 + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 = cmmLintDubiousWordOffset e cmmCheckWordAddress _ = return () +-- No warnings for unaligned arithmetic with the node register, +-- which is used to extract fields from tagged constructor closures. +notNodeReg (CmmReg reg) | reg == nodeReg = False +notNodeReg _ = True lintCmmStmt :: CmmStmt -> CmmLint () lintCmmStmt stmt@(CmmAssign reg expr) = do diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 77d337df93..6032dc255c 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -322,8 +322,9 @@ pprExpr e = case e of -> char '*' <> pprAsPtrReg r CmmLoad (CmmRegOff r off) rep - | isPtrReg r && rep == wordRep + | isPtrReg r && rep == wordRep && (off `rem` wORD_SIZE == 0) -- ToDo: check that the offset is a word multiple? + -- (For tagging to work, I had to avoid unaligned loads. --ARY) -> pprAsPtrReg r <> brackets (ppr (off `shiftR` wordShift)) CmmLoad expr rep -> diff --git a/compiler/codeGen/CgBindery.lhs b/compiler/codeGen/CgBindery.lhs index d5a2c69d60..7447222d45 100644 --- a/compiler/codeGen/CgBindery.lhs +++ b/compiler/codeGen/CgBindery.lhs @@ -11,7 +11,8 @@ module CgBindery ( cgIdInfoId, cgIdInfoArgRep, cgIdInfoLF, - stableIdInfo, heapIdInfo, + stableIdInfo, heapIdInfo, + taggedStableIdInfo, taggedHeapIdInfo, letNoEscapeIdInfo, idInfoToAmode, addBindC, addBindsC, @@ -22,7 +23,7 @@ module CgBindery ( getLiveStackBindings, bindArgsToStack, rebindToStack, - bindNewToNode, bindNewToReg, bindArgsToRegs, + bindNewToNode, bindNewToUntagNode, bindNewToReg, bindArgsToRegs, bindNewToTemp, getArgAmode, getArgAmodes, getCgIdInfo, @@ -38,11 +39,13 @@ import CgStackery import CgUtils import CLabel import ClosureInfo +import Constants import Cmm import PprCmm ( {- instance Outputable -} ) import SMRep import Id +import DataCon import VarEnv import VarSet import Literal @@ -52,6 +55,7 @@ import StgSyn import Unique import UniqSet import Outputable + \end{code} @@ -80,23 +84,44 @@ data CgIdInfo , cg_rep :: CgRep , cg_vol :: VolatileLoc , cg_stb :: StableLoc - , cg_lf :: LambdaFormInfo } + , cg_lf :: LambdaFormInfo + , cg_tag :: {-# UNPACK #-} !Int -- tag to be added in idInfoToAmode + } mkCgIdInfo id vol stb lf = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, - cg_lf = lf, cg_rep = idCgRep id } + cg_lf = lf, cg_rep = idCgRep id, cg_tag = tag } + where + tag + | Just con <- isDataConWorkId_maybe id, + {- Is this an identifier for a static constructor closure? -} + isNullaryRepDataCon con + {- If yes, is this a nullary constructor? + If yes, we assume that the constructor is evaluated and can + be tagged. + -} + = tagForCon con + + | otherwise + = funTagLFInfo lf voidIdInfo id = CgIdInfo { cg_id = id, cg_vol = NoVolatileLoc , cg_stb = VoidLoc, cg_lf = mkLFArgument id - , cg_rep = VoidArg } + , cg_rep = VoidArg, cg_tag = 0 } -- Used just for VoidRep things data VolatileLoc -- These locations die across a call = NoVolatileLoc | RegLoc CmmReg -- In one of the registers (global or local) | VirHpLoc VirtualHpOffset -- Hp+offset (address of closure) - | VirNodeLoc VirtualHpOffset -- Cts of offset indirect from Node - -- ie *(Node+offset) + | VirNodeLoc ByteOff -- Cts of offset indirect from Node + -- ie *(Node+offset). + -- NB. Byte offset, because we subtract R1's + -- tag from the offset. + +mkTaggedCgIdInfo id vol stb lf con + = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, + cg_lf = lf, cg_rep = idCgRep id, cg_tag = tagForCon con } \end{code} @StableLoc@ encodes where an Id can be found, used by @@ -121,7 +146,7 @@ data StableLoc \begin{code} instance Outputable CgIdInfo where - ppr (CgIdInfo id rep vol stb lf) + ppr (CgIdInfo id rep vol stb lf _) -- TODO, pretty pring the tag info = ppr id <+> ptext SLIT("-->") <+> vcat [ppr vol, ppr stb] instance Outputable VolatileLoc where @@ -149,19 +174,29 @@ stableIdInfo id amode lf_info = mkCgIdInfo id NoVolatileLoc (StableLoc amode) heapIdInfo id offset lf_info = mkCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info letNoEscapeIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLNE sp) lf_info stackIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLoc sp) lf_info -nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc offset) NoStableLoc lf_info +nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc (wORD_SIZE*offset)) NoStableLoc lf_info regIdInfo id reg lf_info = mkCgIdInfo id (RegLoc reg) NoStableLoc lf_info +taggedStableIdInfo id amode lf_info con + = mkTaggedCgIdInfo id NoVolatileLoc (StableLoc amode) lf_info con +taggedHeapIdInfo id offset lf_info con + = mkTaggedCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info con +untagNodeIdInfo id offset lf_info tag + = mkCgIdInfo id (VirNodeLoc (wORD_SIZE*offset - tag)) NoStableLoc lf_info + + idInfoToAmode :: CgIdInfo -> FCode CmmExpr idInfoToAmode info = case cg_vol info of { RegLoc reg -> returnFC (CmmReg reg) ; - VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetW (CmmReg nodeReg) nd_off) mach_rep) ; - VirHpLoc hp_off -> getHpRelOffset hp_off ; + VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetB (CmmReg nodeReg) nd_off) + mach_rep) ; + VirHpLoc hp_off -> do { off <- getHpRelOffset hp_off + ; return $! maybeTag off }; NoVolatileLoc -> case cg_stb info of - StableLoc amode -> returnFC amode + StableLoc amode -> returnFC $! maybeTag amode VirStkLoc sp_off -> do { sp_rel <- getSpRelOffset sp_off ; return (CmmLoad sp_rel mach_rep) } @@ -177,6 +212,11 @@ idInfoToAmode info where mach_rep = argMachRep (cg_rep info) + maybeTag amode -- add the tag, if we have one + | tag == 0 = amode + | otherwise = cmmOffsetB amode tag + where tag = cg_tag info + cgIdInfoId :: CgIdInfo -> Id cgIdInfoId = cg_id @@ -389,6 +429,10 @@ bindNewToNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Code bindNewToNode id offset lf_info = addBindC id (nodeIdInfo id offset lf_info) +bindNewToUntagNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Int -> Code +bindNewToUntagNode id offset lf_info tag + = addBindC id (untagNodeIdInfo id offset lf_info tag) + -- Create a new temporary whose unique is that in the id, -- bind the id to it, and return the addressing mode for the -- temporary. diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs index fabf434d07..86e13ab383 100644 --- a/compiler/codeGen/CgClosure.lhs +++ b/compiler/codeGen/CgClosure.lhs @@ -177,7 +177,14 @@ cgRhsClosure bndr cc bndr_info fvs upd_flag args body = do -- BUILD ITS INFO TABLE AND CODE ; forkClosureBody (do { -- Bind the fvs - let bind_fv (info, offset) + let + -- A function closure pointer may be tagged, so we + -- must take it into account when accessing the free variables. + mbtag = tagForArity (length args) + bind_fv (info, offset) + | Just tag <- mbtag + = bindNewToUntagNode (cgIdInfoId info) offset (cgIdInfoLF info) tag + | otherwise = bindNewToNode (cgIdInfoId info) offset (cgIdInfoLF info) ; mapCs bind_fv bind_details @@ -236,7 +243,7 @@ NB: Thunks cannot have a primitive type! closureCodeBody binder_info cl_info cc [{- No args i.e. thunk -}] body = do { body_absC <- getCgStmts $ do { tickyEnterThunk cl_info - ; ldvEnter (CmmReg nodeReg) -- NB: Node always points when profiling + ; ldvEnterClosure cl_info -- NB: Node always points when profiling ; thunkWrapper cl_info $ do -- We only enter cc after setting up update so -- that cc of enclosing scope will be recorded @@ -400,8 +407,19 @@ funWrapper :: ClosureInfo -- Closure whose code body this is funWrapper closure_info arg_regs reg_save_code fun_body = do { let node_points = nodeMustPointToIt (closureLFInfo closure_info) + {- + -- Debugging: check that R1 has the correct tag + ; let tag = funTag closure_info + ; whenC (tag /= 0 && node_points) $ do + l <- newLabelC + stmtC (CmmCondBranch (CmmMachOp mo_wordEq [cmmGetTag (CmmReg nodeReg), + CmmLit (mkIntCLit tag)]) l) + stmtC (CmmStore (CmmLit (mkWordCLit 0)) (CmmLit (mkWordCLit 0))) + labelC l + -} + -- Enter for Ldv profiling - ; whenC node_points (ldvEnter (CmmReg nodeReg)) + ; whenC node_points (ldvEnterClosure closure_info) -- GranSim yeild poin ; granYield arg_regs node_points diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs index a2c8578d18..91d7098f3e 100644 --- a/compiler/codeGen/CgCon.lhs +++ b/compiler/codeGen/CgCon.lhs @@ -43,8 +43,10 @@ import Id import Type import PrelInfo import Outputable -import Util import ListSetOps +#ifdef DEBUG +import Util ( lengthIs ) +#endif \end{code} @@ -93,7 +95,7 @@ cgTopRhsCon id con args ; emitDataLits closure_label closure_rep -- RETURN - ; returnFC (id, stableIdInfo id (mkLblExpr closure_label) lf_info) } + ; returnFC (id, taggedStableIdInfo id (mkLblExpr closure_label) lf_info con) } \end{code} %************************************************************************ @@ -134,9 +136,10 @@ at all. \begin{code} buildDynCon binder cc con [] = do this_pkg <- getThisPackage - returnFC (stableIdInfo binder + returnFC (taggedStableIdInfo binder (mkLblExpr (mkClosureLabel this_pkg (dataConName con))) - (mkConLFInfo con)) + (mkConLFInfo con) + con) \end{code} The following three paragraphs about @Char@-like and @Int@-like @@ -170,7 +173,7 @@ buildDynCon binder cc con [arg_amode] offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1) -- INTLIKE closures consist of a header and one word payload intlike_amode = CmmLit (cmmLabelOffW intlike_lbl offsetW) - ; returnFC (stableIdInfo binder intlike_amode (mkConLFInfo con)) } + ; returnFC (taggedStableIdInfo binder intlike_amode (mkConLFInfo con) con) } buildDynCon binder cc con [arg_amode] | maybeCharLikeCon con @@ -181,7 +184,7 @@ buildDynCon binder cc con [arg_amode] offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1) -- CHARLIKE closures consist of a header and one word payload charlike_amode = CmmLit (cmmLabelOffW charlike_lbl offsetW) - ; returnFC (stableIdInfo binder charlike_amode (mkConLFInfo con)) } + ; returnFC (taggedStableIdInfo binder charlike_amode (mkConLFInfo con) con) } \end{code} Now the general case. @@ -194,7 +197,7 @@ buildDynCon binder ccs con args (closure_info, amodes_w_offsets) = layOutDynConstr this_pkg con args ; hp_off <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets - ; returnFC (heapIdInfo binder hp_off lf_info) } + ; returnFC (taggedHeapIdInfo binder hp_off lf_info con) } where lf_info = mkConLFInfo con @@ -223,7 +226,9 @@ bindConArgs :: DataCon -> [Id] -> Code bindConArgs con args = do this_pkg <- getThisPackage let - bind_arg (arg, offset) = bindNewToNode arg offset (mkLFArgument arg) + -- The binding below forces the masking out of the tag bits + -- when accessing the constructor field. + bind_arg (arg, offset) = bindNewToUntagNode arg offset (mkLFArgument arg) (tagForCon con) (_, args_w_offsets) = layOutDynConstr this_pkg con (addIdReps args) -- ASSERT(not (isUnboxedTupleCon con)) return () @@ -386,11 +391,12 @@ cgTyCon tycon -- Put the table after the data constructor decls, because the -- datatype closure table (for enumeration types) -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff + -- Note that the closure pointers are tagged. ; extra <- if isEnumerationTyCon tycon then do tbl <- getCmm (emitRODataLits (mkLocalClosureTableLabel (tyConName tycon)) - [ CmmLabel (mkLocalClosureLabel (dataConName con)) + [ CmmLabelOff (mkLocalClosureLabel (dataConName con)) (tagForCon con) | con <- tyConDataCons tycon]) return [tbl] else @@ -434,6 +440,9 @@ cgDataCon data_con body_code = do { -- NB: We don't set CC when entering data (WDP 94/06) tickyReturnOldCon (length arg_things) + -- The case continuation code is expecting a tagged pointer + ; stmtC (CmmAssign nodeReg + (tagCons data_con (CmmReg nodeReg))) ; performReturn emitReturnInstr } -- noStmts: Ptr to thing already in Node diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs index 3bba211aa1..b89452e1de 100644 --- a/compiler/codeGen/CgHeapery.lhs +++ b/compiler/codeGen/CgHeapery.lhs @@ -288,6 +288,9 @@ hpStkCheck cl_info is_fun reg_save_code code = noStmts | otherwise = oneStmt (CmmAssign nodeReg (CmmLit (CmmLabel closure_lbl))) + -- Strictly speaking, we should tag node here. But if + -- node doesn't point to the closure, the code for the closure + -- cannot depend on the value of R1 anyway, so we're safe. closure_lbl = closureLabelFromCI cl_info full_save_code = node_asst `plusStmts` reg_save_code diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 4e38485455..e9751fa748 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -15,6 +15,7 @@ module CgInfoTbls ( stdInfoTableSizeB, entryCode, closureInfoPtr, getConstrTag, + cmmGetClosureType, infoTable, infoTableClosureType, infoTablePtrs, infoTableNonPtrs, funInfoTable, makeRelativeRefTo @@ -273,14 +274,24 @@ emitAlgReturnTarget emitAlgReturnTarget name branches mb_deflt fam_sz = do { blks <- getCgStmts $ - emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) - -- NB: tag_expr is zero-based + -- is the constructor tag in the node reg? + if isSmallFamily fam_sz + then do -- yes, node has constr. tag + let tag_expr = cmmConstrTag1 (CmmReg nodeReg) + branches' = [(tag+1,branch)|(tag,branch)<-branches] + emitSwitch tag_expr branches' mb_deflt 1 fam_sz + else do -- no, get tag from info table + let -- Note that ptr _always_ has tag 1 + -- when the family size is big enough + untagged_ptr = cmmRegOffB nodeReg (-1) + tag_expr = getConstrTag (untagged_ptr) + emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) ; lbl <- emitReturnTarget name blks ; return (lbl, Nothing) } -- Nothing: the internal branches in the switch don't have -- global labels, so we can't use them at the 'call site' where - tag_expr = getConstrTag (CmmReg nodeReg) + uniq = getUnique name -------------------------------- emitReturnInstr :: Code @@ -346,6 +357,14 @@ getConstrTag closure_ptr where info_table = infoTable (closureInfoPtr closure_ptr) +cmmGetClosureType :: CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the closure type +-- obtained from the info table +cmmGetClosureType closure_ptr + = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableClosureType info_table] + where + info_table = infoTable (closureInfoPtr closure_ptr) + infoTable :: CmmExpr -> CmmExpr -- Takes an info pointer (the first word of a closure) -- and returns a pointer to the first word of the standard-form diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs index d26d9c6901..e489d73646 100644 --- a/compiler/codeGen/CgPrimOp.hs +++ b/compiler/codeGen/CgPrimOp.hs @@ -183,8 +183,9 @@ emitPrimOp [res] AddrToHValueOp [arg] live = stmtC (CmmAssign (CmmLocal res) arg) -- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info)) +-- Note: argument may be tagged! emitPrimOp [res] DataToTagOp [arg] live - = stmtC (CmmAssign (CmmLocal res) (getConstrTag arg)) + = stmtC (CmmAssign (CmmLocal res) (getConstrTag (cmmUntag arg))) {- Freezing arrays-of-ptrs requires changing an info table, for the benefit of the generational collector. It needs to scavenge mutable diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs index 27ee54c50d..651f0eaa82 100644 --- a/compiler/codeGen/CgProf.hs +++ b/compiler/codeGen/CgProf.hs @@ -20,7 +20,7 @@ module CgProf ( emitSetCCC, emitCCS, -- Lag/drag/void stuff - ldvEnter, ldvRecordCreate + ldvEnter, ldvEnterClosure, ldvRecordCreate ) where #include "HsVersions.h" @@ -242,9 +242,12 @@ enter_cost_centre closure_info ccs body where enc_ccs = CmmLit (mkCCostCentreStack ccs) re_entrant = closureReEntrant closure_info - node_ccs = costCentreFrom (CmmReg nodeReg) + node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag)) is_box = isBox body + -- if this is a function, then node will be tagged; we must subract the tag + node_tag = funTag closure_info + -- set the current CCS when entering a PAP enterCostCentrePAP :: CmmExpr -> Code enterCostCentrePAP closure = @@ -448,9 +451,14 @@ ldvRecordCreate closure = stmtC $ CmmStore (ldvWord closure) dynLdvInit -- The closure is not IND or IND_OLDGEN because neither is considered for LDV -- profiling. -- +ldvEnterClosure :: ClosureInfo -> Code +ldvEnterClosure closure_info = ldvEnter (cmmOffsetB (CmmReg nodeReg) (-tag)) + where tag = funTag closure_info + -- don't forget to substract node's tag + ldvEnter :: CmmExpr -> Code -- Argument is a closure pointer -ldvEnter cl_ptr +ldvEnter cl_ptr = ifProfiling $ -- if (era > 0) { -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | @@ -458,6 +466,7 @@ ldvEnter cl_ptr emitIf (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) (stmtC (CmmStore ldv_wd new_ldv_wd)) where + -- don't forget to substract node's tag ldv_wd = ldvWord cl_ptr new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd wordRep) (CmmLit (mkWordCLit lDV_CREATE_MASK))) diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs index 22cecb7249..952702674f 100644 --- a/compiler/codeGen/CgTailCall.lhs +++ b/compiler/codeGen/CgTailCall.lhs @@ -27,6 +27,7 @@ import CgUtils import CgTicky import ClosureInfo import SMRep +import MachOp import Cmm import CmmUtils import CLabel @@ -102,7 +103,8 @@ performTailCall fun_info arg_amodes pending_assts | otherwise = do { fun_amode <- idInfoToAmode fun_info - ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode) + ; let assignSt = CmmAssign nodeReg fun_amode + node_asst = oneStmt assignSt opt_node_asst | nodeMustPointToIt lf_info = node_asst | otherwise = noStmts ; EndOfBlockInfo sp _ <- getEndOfBlockInfo @@ -113,8 +115,15 @@ performTailCall fun_info arg_amodes pending_assts -- Node must always point to things we enter EnterIt -> do { emitSimultaneously (node_asst `plusStmts` pending_assts) - ; let target = entryCode (closureInfoPtr (CmmReg nodeReg)) - ; doFinalJump sp False (stmtC (CmmJump target [])) } + ; let target = entryCode (closureInfoPtr (CmmReg nodeReg)) + enterClosure = stmtC (CmmJump target []) + -- If this is a scrutinee + -- let's check if the closure is a constructor + -- so we can directly jump to the alternatives switch + -- statement. + jumpInstr = getEndOfBlockInfo >>= + maybeSwitchOnCons enterClosure + ; doFinalJump sp False jumpInstr } -- A function, but we have zero arguments. It is already in WHNF, -- so we can just return it. @@ -149,6 +158,7 @@ performTailCall fun_info arg_amodes pending_assts ; directCall sp apply_lbl args extra_args (node_asst `plusStmts` pending_assts) + } -- A direct function call (possibly with some left-over arguments) @@ -169,8 +179,58 @@ performTailCall fun_info arg_amodes pending_assts where fun_name = idName (cgIdInfoId fun_info) lf_info = cgIdInfoLF fun_info - - + untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg)) + -- Test if closure is a constructor + maybeSwitchOnCons enterClosure eob + | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob + = do { is_constr <- newLabelC + -- Is the pointer tagged? + -- Yes, jump to switch statement + ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg)) + is_constr) + -- No, enter the closure. + ; enterClosure + ; labelC is_constr + ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) []) + } +{- + -- This is a scrutinee for a case expression + -- so let's see if we can directly inspect the closure + | EndOfBlockInfo _ (CaseAlts lbl _ _ _) <- eob + = do { no_cons <- newLabelC + -- Both the NCG and gcc optimize away the temp + ; z <- newTemp wordRep + ; stmtC (CmmAssign z tag_expr) + ; let tag = CmmReg z + -- Is the closure a cons? + ; stmtC (CmmCondBranch (cond1 tag) no_cons) + ; stmtC (CmmCondBranch (cond2 tag) no_cons) + -- Yes, jump to switch statement + ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) []) + ; labelC no_cons + -- No, enter the closure. + ; enterClosure + } +-} + -- No case expression involved, enter the closure. + | otherwise + = do { stmtC untag_node + ; enterClosure + } + where + --cond1 tag = cmmULtWord tag lowCons + -- More efficient than the above? + tag_expr = cmmGetClosureType (CmmReg nodeReg) + cond1 tag = cmmEqWord tag (CmmLit (mkIntCLit 0)) + cond2 tag = cmmUGtWord tag highCons + lowCons = CmmLit (mkIntCLit 1) + -- CONSTR + highCons = CmmLit (mkIntCLit 8) + -- CONSTR_NOCAF_STATIC (from ClosureType.h) + + +untagCmmAssign (CmmAssign r cmmExpr) = CmmAssign r (cmmUntag cmmExpr) +untagCmmAssign stmt = stmt directCall sp lbl args extra_args assts = do let diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index c66fc9ebdd..8d3578e1ef 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -22,12 +22,17 @@ module CgUtils ( callerSaveVolatileRegs, get_GlobalReg_addr, cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord, + cmmUGtWord, cmmOffsetExprW, cmmOffsetExprB, cmmRegOffW, cmmRegOffB, cmmLabelOffW, cmmLabelOffB, cmmOffsetW, cmmOffsetB, cmmOffsetLitW, cmmOffsetLitB, cmmLoadIndexW, + cmmConstrTag, cmmConstrTag1, + + tagForCon, tagCons, isSmallFamily, + cmmUntag, cmmIsTagged, cmmGetTag, addToMem, addToMemE, mkWordCLit, @@ -43,6 +48,7 @@ module CgUtils ( import CgMonad import TyCon +import DataCon import Id import Constants import SMRep @@ -61,7 +67,9 @@ import Util import DynFlags import FastString import PackageConfig +#ifdef DEBUG import Outputable +#endif import Data.Char import Data.Bits @@ -164,6 +172,9 @@ cmmEqWord e1 e2 = CmmMachOp mo_wordEq [e1, e2] cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2] cmmUGeWord e1 e2 = CmmMachOp mo_wordUGe [e1, e2] cmmUGtWord e1 e2 = CmmMachOp mo_wordUGt [e1, e2] +--cmmShlWord e1 e2 = CmmMachOp mo_wordShl [e1, e2] +--cmmUShrWord e1 e2 = CmmMachOp mo_wordUShr [e1, e2] +cmmSubWord e1 e2 = CmmMachOp mo_wordSub [e1, e2] cmmNegate :: CmmExpr -> CmmExpr cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) @@ -172,6 +183,57 @@ cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprRep e)) [e] blankWord :: CmmStatic blankWord = CmmUninitialised wORD_SIZE +-- Tagging -- +-- Tag bits mask +--cmmTagBits = CmmLit (mkIntCLit tAG_BITS) +cmmTagMask = CmmLit (mkIntCLit tAG_MASK) +cmmPointerMask = CmmLit (mkIntCLit (complement tAG_MASK)) + +-- Used to untag a possibly tagged pointer +-- A static label need not be untagged +cmmUntag e@(CmmLit (CmmLabel _)) = e +-- Default case +cmmUntag e = (e `cmmAndWord` cmmPointerMask) + +cmmGetTag e = (e `cmmAndWord` cmmTagMask) + +-- Test if a closure pointer is untagged +cmmIsTagged e = (e `cmmAndWord` cmmTagMask) + `cmmNeWord` CmmLit zeroCLit + +cmmConstrTag e = (e `cmmAndWord` cmmTagMask) `cmmSubWord` (CmmLit (mkIntCLit 1)) +-- Get constructor tag, but one based. +cmmConstrTag1 e = e `cmmAndWord` cmmTagMask + +{- + The family size of a data type (the number of constructors) + can be either: + * small, if the family size < 2**tag_bits + * big, otherwise. + + Small families can have the constructor tag in the tag + bits. + Big families only use the tag value 1 to represent + evaluatedness. +-} +isSmallFamily fam_size = fam_size <= mAX_PTR_TAG + +tagForCon con = tag + where + con_tag = dataConTagZ con + fam_size = tyConFamilySize (dataConTyCon con) + tag | isSmallFamily fam_size = con_tag + 1 + | otherwise = 1 + +--Tag an expression, to do: refactor, this appears in some other module. +tagCons con expr = cmmOffsetB expr (tagForCon con) + +-- Copied from CgInfoTbls.hs +-- We keep the *zero-indexed* tag in the srt_len field of the info +-- table of a data constructor. +dataConTagZ :: DataCon -> ConTagZ +dataConTagZ con = dataConTag con - fIRST_TAG + ----------------------- -- Making literals diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs index d0d2ed98b2..d537a7b3d9 100644 --- a/compiler/codeGen/ClosureInfo.lhs +++ b/compiler/codeGen/ClosureInfo.lhs @@ -23,7 +23,7 @@ module ClosureInfo ( mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo, mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape, - mkClosureInfo, mkConInfo, + mkClosureInfo, mkConInfo, maybeIsLFCon, closureSize, closureNonHdrSize, closureGoodStuffSize, closurePtrsSize, @@ -35,6 +35,7 @@ module ClosureInfo ( closureNeedsUpdSpace, closureIsThunk, closureSingleEntry, closureReEntrant, isConstrClosure_maybe, closureFunInfo, isStandardFormThunk, isKnownFun, + funTag, funTagLFInfo, tagForArity, enterIdLabel, enterLocalIdLabel, enterReturnPtLabel, @@ -58,6 +59,7 @@ module ClosureInfo ( #include "../includes/MachDeps.h" #include "HsVersions.h" +--import CgUtils import StgSyn import SMRep @@ -277,6 +279,10 @@ might_be_a_function ty mkConLFInfo :: DataCon -> LambdaFormInfo mkConLFInfo con = LFCon con +maybeIsLFCon :: LambdaFormInfo -> Maybe DataCon +maybeIsLFCon (LFCon con) = Just con +maybeIsLFCon _ = Nothing + mkSelectorLFInfo id offset updatable = LFThunk NotTopLevel False updatable (SelectorThunk offset) (might_be_a_function (idType id)) @@ -804,10 +810,32 @@ isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con isConstrClosure_maybe _ = Nothing closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr) -closureFunInfo (ClosureInfo { closureLFInfo = LFReEntrant _ arity _ arg_desc}) - = Just (arity, arg_desc) -closureFunInfo _ - = Nothing +closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info +closureFunInfo _ = Nothing + +lfFunInfo :: LambdaFormInfo -> Maybe (Int, ArgDescr) +lfFunInfo (LFReEntrant _ arity _ arg_desc) = Just (arity, arg_desc) +lfFunInfo _ = Nothing + +funTag :: ClosureInfo -> Int +funTag (ClosureInfo { closureLFInfo = lf_info }) = funTagLFInfo lf_info +funTag _ = 0 + +-- maybe this should do constructor tags too? +funTagLFInfo :: LambdaFormInfo -> Int +funTagLFInfo lf + -- A function is tagged with its arity + | Just (arity,_) <- lfFunInfo lf, + Just tag <- tagForArity arity + = tag + + -- other closures (and unknown ones) are not tagged + | otherwise + = 0 + +tagForArity :: Int -> Maybe Int +tagForArity i | i <= mAX_PTR_TAG = Just i + | otherwise = Nothing \end{code} \begin{code} diff --git a/compiler/main/Constants.lhs b/compiler/main/Constants.lhs index 4f13af8828..2e0c4d4095 100644 --- a/compiler/main/Constants.lhs +++ b/compiler/main/Constants.lhs @@ -6,6 +6,8 @@ \begin{code} module Constants (module Constants) where +import Data.Bits (shiftL) + -- This magical #include brings in all the everybody-knows-these magic -- constants unfortunately, we need to be *explicit* about which one -- we want; if we just hope a -I... will get the right one, we could @@ -108,6 +110,14 @@ wORD_SIZE = (SIZEOF_HSWORD :: Int) wORD_SIZE_IN_BITS = wORD_SIZE * 8 :: Int \end{code} +Amount of pointer bits used for semi-tagging constructor closures + +\begin{code} +tAG_BITS = (TAG_BITS :: Int) +tAG_MASK = ((1 `shiftL` tAG_BITS) - 1) :: Int +mAX_PTR_TAG = tAG_MASK :: Int +\end{code} + Size of a C int, in bytes. May be smaller than wORD_SIZE. \begin{code} diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs index 2c07016a4f..cc940749f9 100644 --- a/compiler/nativeGen/MachCodeGen.hs +++ b/compiler/nativeGen/MachCodeGen.hs @@ -2216,6 +2216,18 @@ condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do -- return (CondCode False cond code) +-- anything vs zero, using a mask +-- TODO: Add some sanity checking!!!! +condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk)) + | (CmmLit (CmmInt mask pk2)) <- o2 + = do + (x_reg, x_code) <- getSomeReg x + let + code = x_code `snocOL` + TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg) + -- + return (CondCode False cond code) + -- anything vs zero condIntCode cond x (CmmLit (CmmInt 0 pk)) = do (x_reg, x_code) <- getSomeReg x diff --git a/includes/Closures.h b/includes/Closures.h index 64582ba6b5..df53ceedd3 100644 --- a/includes/Closures.h +++ b/includes/Closures.h @@ -306,7 +306,8 @@ typedef struct { */ typedef struct { const struct _StgInfoTable* info; - StgWord size; + StgHalfWord size; + StgHalfWord tag; StgClosure * fun; StgClosure * payload[FLEXIBLE_ARRAY]; } StgRetFun; diff --git a/includes/Cmm.h b/includes/Cmm.h index b23a37be04..cecf92640b 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -91,12 +91,34 @@ #if SIZEOF_VOID_P == 4 #define W_ bits32 +/* Maybe it's better to include MachDeps.h */ +#define TAG_BITS 2 #elif SIZEOF_VOID_P == 8 #define W_ bits64 +/* Maybe it's better to include MachDeps.h */ +#define TAG_BITS 3 #else #error Unknown word size #endif +/* + * The RTS must UNTAG a pointer before dereferencing it. + * The use of UNTAG follows the following rules of thumb: + * + * - Any pointer might be tagged. + * - Except the pointers that are passed in R1 to RTS functions. + * - R1 is also untagged when entering constructor code. + * + * TODO: + * + * - Remove redundancies of tagging and untagging in code generation. + * - Optimize getTag or dataToTag# ? + * + */ +#define TAG_MASK ((1 << TAG_BITS) - 1) +#define UNTAG(p) (p & ~TAG_MASK) +#define GETTAG(p) (p & TAG_MASK) + #if SIZEOF_INT == 4 #define CInt bits32 #elif SIZEOF_INT == 8 @@ -228,11 +250,23 @@ ToDo: range should end in N_CLOSURE_TYPES-1, not N_CLOSURE_TYPES, but switch doesn't allow us to use exprs there yet. + + If R1 points to a tagged object it points either to + * A constructor. + * A function with arity <= TAG_MASK. + In both cases the right thing to do is to return. + Note: it is rather lucky that we can use the tag bits to do this + for both objects. Maybe it points to a brittle design? + + Indirections can contain tagged pointers, so their tag is checked. -------------------------------------------------------------------------- */ #define ENTER() \ again: \ W_ info; \ + if (GETTAG(R1) != 0) { \ + jump %ENTRY_CODE(Sp(0)); \ + } \ info = %INFO_PTR(R1); \ switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \ (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \ @@ -247,14 +281,13 @@ goto again; \ } \ case \ - BCO, \ FUN, \ FUN_1_0, \ FUN_0_1, \ FUN_2_0, \ FUN_1_1, \ - FUN_0_2, \ - FUN_STATIC, \ + FUN_STATIC, \ + BCO, \ PAP: \ { \ jump %ENTRY_CODE(Sp(0)); \ @@ -265,6 +298,10 @@ } \ } +// The FUN cases almost never happen: a pointer to a non-static FUN +// should always be tagged. This unfortunately isn't true for the +// interpreter right now, which leaves untagged FUNs on the stack. + /* ----------------------------------------------------------------------------- Constants. -------------------------------------------------------------------------- */ @@ -375,7 +412,7 @@ (TO_W_(%INFO_TYPE(%STD_INFO(p))) != INVALID_OBJECT) && \ (TO_W_(%INFO_TYPE(%STD_INFO(p))) < N_CLOSURE_TYPES)) -#define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(p))) +#define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p)))) /* * The layout of the StgFunInfoExtra part of an info table changes diff --git a/includes/InfoTables.h b/includes/InfoTables.h index a8e76b05b3..bbffea6468 100644 --- a/includes/InfoTables.h +++ b/includes/InfoTables.h @@ -164,7 +164,7 @@ typedef struct { extern StgWord16 closure_flags[]; -#define closureFlags(c) (closure_flags[get_itbl(c)->type]) +#define closureFlags(c) (closure_flags[get_itbl(UNTAG_CLOSURE(c))->type]) #define closure_HNF(c) ( closureFlags(c) & _HNF) #define closure_BITMAP(c) ( closureFlags(c) & _BTM) diff --git a/includes/MachDeps.h b/includes/MachDeps.h index abe4405d5e..7b71f7c378 100644 --- a/includes/MachDeps.h +++ b/includes/MachDeps.h @@ -105,4 +105,14 @@ #endif #endif +#ifndef TAG_BITS +#if SIZEOF_HSWORD == 4 +#define TAG_BITS 2 +#else +#define TAG_BITS 3 +#endif +#endif + +#define TAG_MASK ((1 << TAG_BITS) - 1) + #endif /* MACHDEPS_H */ diff --git a/includes/Rts.h b/includes/Rts.h index d009618442..eba8146fd2 100644 --- a/includes/Rts.h +++ b/includes/Rts.h @@ -107,6 +107,29 @@ extern void _assertFail (const char *, unsigned int); #define FMT_Int64 "lld" #endif +/* + * Macros for untagging and retagging closure pointers + * For more information look at the comments in Cmm.h + */ + +static inline StgWord +GET_CLOSURE_TAG(StgClosure * p) +{ + return (StgWord)p & TAG_MASK; +} + +static inline StgClosure * +UNTAG_CLOSURE(StgClosure * p) +{ + return (StgClosure*)((StgWord)p & ~TAG_MASK); +} + +static inline StgClosure * +TAG_CLOSURE(StgWord tag,StgClosure * p) +{ + return (StgClosure*)((StgWord)p | tag); +} + /* ----------------------------------------------------------------------------- Include everything STG-ish -------------------------------------------------------------------------- */ @@ -207,6 +230,23 @@ extern void stg_exit(int n) GNU_ATTRIBUTE(__noreturn__); /* declarations for runtime flags/values */ #define MAX_RTS_ARGS 32 +#ifdef DEBUG +#define TICK_VAR(arity) \ + extern StgInt SLOW_CALLS_##arity; \ + extern StgInt RIGHT_ARITY_##arity; \ + extern StgInt TAGGED_PTR_##arity; + +#define TICK_VAR_INI(arity) \ + StgInt SLOW_CALLS_##arity = 1; \ + StgInt RIGHT_ARITY_##arity = 1; \ + StgInt TAGGED_PTR_##arity = 0; + +extern StgInt TOTAL_CALLS; + +TICK_VAR(1) +TICK_VAR(2) +#endif + /* ----------------------------------------------------------------------------- Assertions and Debuggery -------------------------------------------------------------------------- */ diff --git a/includes/Storage.h b/includes/Storage.h index 604e49e043..92a856c963 100644 --- a/includes/Storage.h +++ b/includes/Storage.h @@ -303,7 +303,7 @@ void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p); ((StgInfoTable *)(INFO_PTR_TO_STRUCT(p)))->type < N_CLOSURE_TYPES) #define LOOKS_LIKE_CLOSURE_PTR(p) \ - (LOOKS_LIKE_INFO_PTR(((StgClosure *)(p))->header.info)) + (LOOKS_LIKE_INFO_PTR((UNTAG_CLOSURE((StgClosure *)(p)))->header.info)) /* ----------------------------------------------------------------------------- Macros for calculating how big a closure will be (used during allocation) diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c index 2fe99b6ba5..aa3c6730f8 100644 --- a/includes/mkDerivedConstants.c +++ b/includes/mkDerivedConstants.c @@ -403,6 +403,10 @@ main(int argc, char *argv[]) struct_field(StgLargeBitmap, size); field_offset(StgLargeBitmap, bitmap); + struct_field(StgRetFun, size); + struct_field(StgRetFun, tag); + struct_field(StgRetFun, fun); + struct_size(snEntry); struct_field(snEntry,sn_obj); struct_field(snEntry,addr); diff --git a/rts/Apply.cmm b/rts/Apply.cmm index e0ca03944c..cf8a108006 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -90,8 +90,6 @@ stg_PAP_apply // Enter PAP cost centre ENTER_CCS_PAP_CL(pap); - R1 = StgPAP_fun(pap); - // Reload the stack W_ i; W_ p; @@ -105,14 +103,30 @@ for: goto for; } + R1 = StgPAP_fun(pap); + +/* DEBUGGING CODE, ensures that arity 1 and 2 functions are entered tagged + if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == 1 ) { + if (GETTAG(R1)!=1) { + W_[0]=1; + } + } + + if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == 2 ) { + if (GETTAG(R1)!=2) { + W_[0]=1; + } + } +*/ + // Off we go! TICK_ENT_VIA_NODE(); #ifdef NO_ARG_REGS - jump %GET_ENTRY(R1); + jump %GET_ENTRY(UNTAG(R1)); #else W_ info; - info = %GET_FUN_INFO(R1); + info = %GET_FUN_INFO(UNTAG(R1)); W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN) { @@ -167,8 +181,6 @@ INFO_TABLE(stg_AP,/*special layout*/0,0,AP,"AP","AP") // Enter PAP cost centre ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL - R1 = StgAP_fun(ap); - // Reload the stack W_ i; W_ p; @@ -182,14 +194,16 @@ for: goto for; } + R1 = StgAP_fun(ap); + // Off we go! TICK_ENT_VIA_NODE(); #ifdef NO_ARG_REGS - jump %GET_ENTRY(R1); + jump %GET_ENTRY(UNTAG(R1)); #else W_ info; - info = %GET_FUN_INFO(R1); + info = %GET_FUN_INFO(UNTAG(R1)); W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN) { @@ -246,8 +260,6 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK") // Enter PAP cost centre ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL - R1 = StgAP_STACK_fun(ap); - // Reload the stack W_ i; W_ p; @@ -264,5 +276,7 @@ for: // Off we go! TICK_ENT_VIA_NODE(); + R1 = StgAP_STACK_fun(ap); + ENTER(); } diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm index f40fbf5519..3c66e7806f 100644 --- a/rts/HeapStackCheck.cmm +++ b/rts/HeapStackCheck.cmm @@ -551,6 +551,8 @@ INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, "ptr" W_ unused ) +---------------------+ | f_closure | +---------------------+ + | tag | + +- - - - - - - - - - -+ | size | +---------------------+ | stg_gc_fun_info | @@ -567,8 +569,11 @@ __stg_gc_fun W_ size; W_ info; W_ type; + W_ tag; + W_ ret_fun; - info = %GET_FUN_INFO(R1); + tag = GETTAG(R1); + info = %GET_FUN_INFO(UNTAG(R1)); // cache the size type = TO_W_(StgFunInfoExtra_fun_type(info)); @@ -579,7 +584,7 @@ __stg_gc_fun #ifdef TABLES_NEXT_TO_CODE // bitmap field holds an offset size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) - + %GET_ENTRY(R1) /* ### */ ); + + %GET_ENTRY(UNTAG(R1)) /* ### */ ); #else size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) ); #endif @@ -591,9 +596,11 @@ __stg_gc_fun #ifdef NO_ARG_REGS // we don't have to save any registers away Sp_adj(-3); - Sp(2) = R1; - Sp(1) = size; Sp(0) = stg_gc_fun_info; + ret_fun = Sp; + StgRetFun_size(ret_fun) = HALF_W_(size); + StgRetFun_tag(ret_fun) = HALF_W_(tag); + StgRetFun_fun(ret_fun) = R1; GC_GENERIC #else W_ type; @@ -602,9 +609,11 @@ __stg_gc_fun if (type == ARG_GEN || type == ARG_GEN_BIG) { // regs already saved by the heap check code Sp_adj(-3); - Sp(2) = R1; - Sp(1) = size; Sp(0) = stg_gc_fun_info; + ret_fun = Sp; + StgRetFun_size(ret_fun) = HALF_W_(size); + StgRetFun_tag(ret_fun) = HALF_W_(tag); + StgRetFun_fun(ret_fun) = R1; // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)");); GC_GENERIC } else { @@ -624,17 +633,22 @@ __stg_gc_fun INFO_TABLE_RET( stg_gc_fun, RET_FUN ) { - R1 = Sp(2); + // Grab the fun, but remember to add in the tag. The GC doesn't + // guarantee to retain the tag on the pointer, so we have to do + // it manually, because the function entry code assumes it. + W_ ret_fun; + ret_fun = Sp; + R1 = StgRetFun_fun(ret_fun) | TO_W_(StgRetFun_tag(ret_fun)); Sp_adj(3); #ifdef NO_ARG_REGS // Minor optimisation: there are no argument registers to load up, // so we can just jump straight to the function's entry point. - jump %GET_ENTRY(R1); + jump %GET_ENTRY(UNTAG(R1)); #else W_ info; W_ type; - info = %GET_FUN_INFO(R1); + info = %GET_FUN_INFO(UNTAG(R1)); type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN || type == ARG_GEN_BIG) { jump StgFunInfoExtra_slow_apply(info); diff --git a/rts/Interpreter.c b/rts/Interpreter.c index 6663445995..527ebde0d0 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -224,7 +224,7 @@ interpretBCO (Capability* cap) // +---------------+ // else if (Sp[0] == (W_)&stg_apply_interp_info) { - obj = (StgClosure *)Sp[1]; + obj = UNTAG_CLOSURE((StgClosure *)Sp[1]); Sp += 2; goto run_BCO_fun; } @@ -244,6 +244,7 @@ eval: obj = (StgClosure*)Sp[0]; Sp++; eval_obj: + obj = UNTAG_CLOSURE(obj); INTERP_TICK(it_total_evals); IF_DEBUG(interpreter, @@ -327,7 +328,7 @@ eval_obj: Sp[i] = (W_)ap->payload[i]; } - obj = (StgClosure*)ap->fun; + obj = UNTAG_CLOSURE((StgClosure*)ap->fun); ASSERT(get_itbl(obj)->type == BCO); goto run_BCO_fun; } @@ -531,7 +532,7 @@ do_apply: pap = (StgPAP *)obj; // we only cope with PAPs whose function is a BCO - if (get_itbl(pap->fun)->type != BCO) { + if (get_itbl(UNTAG_CLOSURE(pap->fun))->type != BCO) { goto defer_apply_to_sched; } @@ -556,7 +557,7 @@ do_apply: for (i = 0; i < pap->n_args; i++) { Sp[i] = (W_)pap->payload[i]; } - obj = pap->fun; + obj = UNTAG_CLOSURE(pap->fun); goto run_BCO_fun; } else if (arity == n) { @@ -564,7 +565,7 @@ do_apply: for (i = 0; i < pap->n_args; i++) { Sp[i] = (W_)pap->payload[i]; } - obj = pap->fun; + obj = UNTAG_CLOSURE(pap->fun); goto run_BCO_fun; } else /* arity > n */ { diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 7c75fca0e8..cb8626e5dd 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1869,7 +1869,7 @@ unpackClosurezh_fast // TODO: Consider the absence of ptrs or nonptrs as a special case ? W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr; - info = %GET_STD_INFO(R1); + info = %GET_STD_INFO(UNTAG(R1)); // Some closures have non-standard layout, so we omit those here. W_ type; @@ -1899,6 +1899,9 @@ out: ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast); + W_ clos; + clos = UNTAG(R1); + ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1); nptrs_arr = Hp - nptrs_arr_sz + WDS(1); @@ -1907,7 +1910,7 @@ out: p = 0; for: if(p < ptrs) { - W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(R1,p); + W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p); p = p + 1; goto for; } @@ -1917,7 +1920,7 @@ for: p = 0; for2: if(p < nptrs) { - W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(R1, p+ptrs); + W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs); p = p + 1; goto for2; } diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c index 9f29acae19..2613b9e4bc 100644 --- a/rts/RetainerProfile.c +++ b/rts/RetainerProfile.c @@ -1486,7 +1486,9 @@ retainStack( StgClosure *c, retainer c_child_r, * ------------------------------------------------------------------------- */ static INLINE StgPtr -retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun, +retain_PAP_payload (StgClosure *pap, /* NOT tagged */ + retainer c_child_r, /* NOT tagged */ + StgClosure *fun, /* tagged */ StgClosure** payload, StgWord n_args) { StgPtr p; @@ -1494,6 +1496,7 @@ retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun, StgFunInfoTable *fun_info; retainClosure(fun, pap, c_child_r); + fun = UNTAG_CLOSURE(fun); fun_info = get_fun_itbl(fun); ASSERT(fun_info->i.type != PAP); @@ -1542,9 +1545,9 @@ retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun, static void retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 ) { - // c = Current closure - // cp = Current closure's Parent - // r = current closures' most recent Retainer + // c = Current closure (possibly tagged) + // cp = Current closure's Parent (NOT tagged) + // r = current closures' most recent Retainer (NOT tagged) // c_child_r = current closure's children's most recent retainer // first_child = first child of c StgClosure *c, *cp, *first_child; @@ -1582,6 +1585,8 @@ loop: //debugBelch("inner_loop"); inner_loop: + c = UNTAG_CLOSURE(c); + // c = current closure under consideration, // cp = current closure's parent, // r = current closure's most recent retainer @@ -1794,16 +1799,19 @@ inner_loop: static void retainRoot( StgClosure **tl ) { + StgClosure *c; + // We no longer assume that only TSOs and WEAKs are roots; any closure can // be a root. ASSERT(isEmptyRetainerStack()); currentStackBoundary = stackTop; - if (*tl != &stg_END_TSO_QUEUE_closure && isRetainer(*tl)) { - retainClosure(*tl, *tl, getRetainerFrom(*tl)); + c = UNTAG_CLOSURE(*tl); + if (c != &stg_END_TSO_QUEUE_closure && isRetainer(c)) { + retainClosure(c, c, getRetainerFrom(c)); } else { - retainClosure(*tl, *tl, CCS_SYSTEM); + retainClosure(c, c, CCS_SYSTEM); } // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl))); diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c index 69fac8d474..716b4a2f2b 100644 --- a/rts/RtsAPI.c +++ b/rts/RtsAPI.c @@ -22,6 +22,10 @@ /* ---------------------------------------------------------------------------- Building Haskell objects from C datatypes. + + TODO: Currently this code does not tag created pointers, + however it is not unsafe (the contructor code will do it) + just inefficient. ------------------------------------------------------------------------- */ HaskellObj rts_mkChar (Capability *cap, HsChar c) @@ -221,7 +225,7 @@ rts_getChar (HaskellObj p) // See comment above: // ASSERT(p->header.info == Czh_con_info || // p->header.info == Czh_static_info); - return (StgChar)(StgWord)(p->payload[0]); + return (StgChar)(StgWord)(UNTAG_CLOSURE(p)->payload[0]); } HsInt @@ -230,7 +234,7 @@ rts_getInt (HaskellObj p) // See comment above: // ASSERT(p->header.info == Izh_con_info || // p->header.info == Izh_static_info); - return (HsInt)(p->payload[0]); + return (HsInt)(UNTAG_CLOSURE(p)->payload[0]); } HsInt8 @@ -239,7 +243,7 @@ rts_getInt8 (HaskellObj p) // See comment above: // ASSERT(p->header.info == I8zh_con_info || // p->header.info == I8zh_static_info); - return (HsInt8)(HsInt)(p->payload[0]); + return (HsInt8)(HsInt)(UNTAG_CLOSURE(p)->payload[0]); } HsInt16 @@ -248,7 +252,7 @@ rts_getInt16 (HaskellObj p) // See comment above: // ASSERT(p->header.info == I16zh_con_info || // p->header.info == I16zh_static_info); - return (HsInt16)(HsInt)(p->payload[0]); + return (HsInt16)(HsInt)(UNTAG_CLOSURE(p)->payload[0]); } HsInt32 @@ -257,7 +261,7 @@ rts_getInt32 (HaskellObj p) // See comment above: // ASSERT(p->header.info == I32zh_con_info || // p->header.info == I32zh_static_info); - return (HsInt32)(HsInt)(p->payload[0]); + return (HsInt32)(HsInt)(UNTAG_CLOSURE(p)->payload[0]); } HsInt64 @@ -267,7 +271,7 @@ rts_getInt64 (HaskellObj p) // See comment above: // ASSERT(p->header.info == I64zh_con_info || // p->header.info == I64zh_static_info); - tmp = (HsInt64*)&(p->payload[0]); + tmp = (HsInt64*)&(UNTAG_CLOSURE(p)->payload[0]); return *tmp; } HsWord @@ -276,7 +280,7 @@ rts_getWord (HaskellObj p) // See comment above: // ASSERT(p->header.info == Wzh_con_info || // p->header.info == Wzh_static_info); - return (HsWord)(p->payload[0]); + return (HsWord)(UNTAG_CLOSURE(p)->payload[0]); } HsWord8 @@ -285,7 +289,7 @@ rts_getWord8 (HaskellObj p) // See comment above: // ASSERT(p->header.info == W8zh_con_info || // p->header.info == W8zh_static_info); - return (HsWord8)(HsWord)(p->payload[0]); + return (HsWord8)(HsWord)(UNTAG_CLOSURE(p)->payload[0]); } HsWord16 @@ -294,7 +298,7 @@ rts_getWord16 (HaskellObj p) // See comment above: // ASSERT(p->header.info == W16zh_con_info || // p->header.info == W16zh_static_info); - return (HsWord16)(HsWord)(p->payload[0]); + return (HsWord16)(HsWord)(UNTAG_CLOSURE(p)->payload[0]); } HsWord32 @@ -303,7 +307,7 @@ rts_getWord32 (HaskellObj p) // See comment above: // ASSERT(p->header.info == W32zh_con_info || // p->header.info == W32zh_static_info); - return (HsWord32)(HsWord)(p->payload[0]); + return (HsWord32)(HsWord)(UNTAG_CLOSURE(p)->payload[0]); } @@ -314,7 +318,7 @@ rts_getWord64 (HaskellObj p) // See comment above: // ASSERT(p->header.info == W64zh_con_info || // p->header.info == W64zh_static_info); - tmp = (HsWord64*)&(p->payload[0]); + tmp = (HsWord64*)&(UNTAG_CLOSURE(p)->payload[0]); return *tmp; } @@ -324,7 +328,7 @@ rts_getFloat (HaskellObj p) // See comment above: // ASSERT(p->header.info == Fzh_con_info || // p->header.info == Fzh_static_info); - return (float)(PK_FLT((P_)p->payload)); + return (float)(PK_FLT((P_)UNTAG_CLOSURE(p)->payload)); } HsDouble @@ -333,7 +337,7 @@ rts_getDouble (HaskellObj p) // See comment above: // ASSERT(p->header.info == Dzh_con_info || // p->header.info == Dzh_static_info); - return (double)(PK_DBL((P_)p->payload)); + return (double)(PK_DBL((P_)UNTAG_CLOSURE(p)->payload)); } HsStablePtr @@ -342,7 +346,7 @@ rts_getStablePtr (HaskellObj p) // See comment above: // ASSERT(p->header.info == StablePtr_con_info || // p->header.info == StablePtr_static_info); - return (StgStablePtr)(p->payload[0]); + return (StgStablePtr)(UNTAG_CLOSURE(p)->payload[0]); } HsPtr @@ -351,7 +355,7 @@ rts_getPtr (HaskellObj p) // See comment above: // ASSERT(p->header.info == Ptr_con_info || // p->header.info == Ptr_static_info); - return (Capability *)(p->payload[0]); + return (Capability *)(UNTAG_CLOSURE(p)->payload[0]); } HsFunPtr @@ -360,7 +364,7 @@ rts_getFunPtr (HaskellObj p) // See comment above: // ASSERT(p->header.info == FunPtr_con_info || // p->header.info == FunPtr_static_info); - return (void *)(p->payload[0]); + return (void *)(UNTAG_CLOSURE(p)->payload[0]); } HsBool @@ -368,7 +372,7 @@ rts_getBool (HaskellObj p) { StgInfoTable *info; - info = get_itbl((StgClosure *)p); + info = get_itbl((StgClosure *)UNTAG_CLOSURE(p)); if (info->srt_bitmap == 0) { // srt_bitmap is the constructor tag return 0; } else { diff --git a/rts/Sanity.c b/rts/Sanity.c index 7de8ec7d0a..a2ddff87d6 100644 --- a/rts/Sanity.c +++ b/rts/Sanity.c @@ -80,13 +80,16 @@ checkLargeBitmap( StgPtr payload, StgLargeBitmap* large_bitmap, nat size ) static void checkClosureShallow( StgClosure* p ) { - ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); + StgClosure *q; + + q = UNTAG_CLOSURE(p); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); /* Is it a static closure? */ - if (!HEAP_ALLOCED(p)) { - ASSERT(closure_STATIC(p)); + if (!HEAP_ALLOCED(q)) { + ASSERT(closure_STATIC(q)); } else { - ASSERT(!closure_STATIC(p)); + ASSERT(!closure_STATIC(q)); } } @@ -162,7 +165,7 @@ checkStackFrame( StgPtr c ) StgRetFun *ret_fun; ret_fun = (StgRetFun *)c; - fun_info = get_fun_itbl(ret_fun->fun); + fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun)); size = ret_fun->size; switch (fun_info->f.fun_type) { case ARG_GEN: @@ -206,6 +209,7 @@ checkPAP (StgClosure *fun, StgClosure** payload, StgWord n_args) StgClosure *p; StgFunInfoTable *fun_info; + fun = UNTAG_CLOSURE(fun); ASSERT(LOOKS_LIKE_CLOSURE_PTR(fun)); fun_info = get_fun_itbl(fun); @@ -241,6 +245,7 @@ checkClosure( StgClosure* p ) ASSERT(LOOKS_LIKE_INFO_PTR(p->header.info)); + p = UNTAG_CLOSURE(p); /* Is it a static closure (i.e. in the data segment)? */ if (!HEAP_ALLOCED(p)) { ASSERT(closure_STATIC(p)); @@ -815,7 +820,7 @@ checkStaticObjects ( StgClosure* static_objects ) switch (info->type) { case IND_STATIC: { - StgClosure *indirectee = ((StgIndStatic *)p)->indirectee; + StgClosure *indirectee = UNTAG_CLOSURE(((StgIndStatic *)p)->indirectee); ASSERT(LOOKS_LIKE_CLOSURE_PTR(indirectee)); ASSERT(LOOKS_LIKE_INFO_PTR(indirectee->header.info)); diff --git a/rts/Sparks.c b/rts/Sparks.c index ca60e1338c..0ff4ee4cce 100644 --- a/rts/Sparks.c +++ b/rts/Sparks.c @@ -200,6 +200,12 @@ newSpark (StgRegTable *reg, StgClosure *p) { StgSparkPool *pool = &(reg->rSparks); + /* I am not sure whether this is the right thing to do. + * Maybe it is better to exploit the tag information + * instead of throwing it away? + */ + p = UNTAG_CLOSURE(p); + ASSERT_SPARK_POOL_INVARIANTS(pool); if (closure_SHOULD_SPARK(p)) { diff --git a/rts/Stable.c b/rts/Stable.c index e5e8dfbdd0..0ed18bcec2 100644 --- a/rts/Stable.c +++ b/rts/Stable.c @@ -177,6 +177,9 @@ exitStablePtrTable(void) /* * get at the real stuff...remove indirections. + * It untags pointers before dereferencing and + * retags the real stuff with its tag (if there + * is any) when returning. * * ToDo: move to a better home. */ @@ -184,16 +187,18 @@ static StgClosure* removeIndirections(StgClosure* p) { - StgClosure* q = p; + StgWord tag = GET_CLOSURE_TAG(p); + StgClosure* q = UNTAG_CLOSURE(p); while (get_itbl(q)->type == IND || get_itbl(q)->type == IND_STATIC || get_itbl(q)->type == IND_OLDGEN || get_itbl(q)->type == IND_PERM || get_itbl(q)->type == IND_OLDGEN_PERM ) { - q = ((StgInd *)q)->indirectee; + tag = GET_CLOSURE_TAG(q); + q = UNTAG_CLOSURE(((StgInd *)q)->indirectee); } - return q; + return TAG_CLOSURE(tag,q); } static StgWord diff --git a/rts/Stats.c b/rts/Stats.c index 9342118ade..f18e26fbd5 100644 --- a/rts/Stats.c +++ b/rts/Stats.c @@ -441,6 +441,52 @@ stat_endHeapCensus(void) were left unused when the heap-check failed. -------------------------------------------------------------------------- */ +#ifdef DEBUG +#define TICK_VAR(arity) \ + extern StgInt SLOW_CALLS_##arity; \ + extern StgInt RIGHT_ARITY_##arity; \ + extern StgInt TAGGED_PTR_##arity; + +#define TICK_VAR_INI(arity) \ + StgInt SLOW_CALLS_##arity = 1; \ + StgInt RIGHT_ARITY_##arity = 1; \ + StgInt TAGGED_PTR_##arity = 0; + +extern StgInt TOTAL_CALLS; + +TICK_VAR(1) +TICK_VAR(2) + +TICK_VAR_INI(1) +TICK_VAR_INI(2) + +StgInt TOTAL_CALLS=1; +#endif + +/* Report the value of a counter */ +#define REPORT(counter) \ + { \ + ullong_format_string(counter,temp,rtsTrue/*commas*/); \ + statsPrintf(" (" #counter ") : %s\n",temp); \ + } + +/* Report the value of a counter as a percentage of another counter */ +#define REPORT_PCT(counter,countertot) \ + statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \ + counter*100.0/countertot) + +#define TICK_PRINT(arity) \ + REPORT(SLOW_CALLS_##arity); \ + REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \ + REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \ + REPORT(RIGHT_ARITY_##arity); \ + REPORT(TAGGED_PTR_##arity) + +#define TICK_PRINT_TOT(arity) \ + statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \ + SLOW_CALLS_##arity * 100.0/TOTAL_CALLS) + + void stat_exit(int alloc) { @@ -557,6 +603,15 @@ stat_exit(int alloc) TICK_TO_DBL(time - GC_tot_time - PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime) * 100 / TICK_TO_DBL(etime)); + + /* + TICK_PRINT(1); + TICK_PRINT(2); + REPORT(TOTAL_CALLS); + TICK_PRINT_TOT(1); + TICK_PRINT_TOT(2); + */ + #if USE_PAPI /* PAPI reporting, should put somewhere else? * Note that the cycles are counted _after_ the initialization of the RTS -- AR */ diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index e092e3fdc0..58cbaf9d56 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -167,7 +167,7 @@ INFO_TABLE_FUN( stg_BCO, 4, 0, BCO, "BCO", "BCO", ARG_BCO ) INFO_TABLE(stg_IND,1,0,IND,"IND","IND") { TICK_ENT_DYN_IND(); /* tick */ - R1 = StgInd_indirectee(R1); + R1 = UNTAG(StgInd_indirectee(R1)); TICK_ENT_VIA_NODE(); jump %GET_ENTRY(R1); } @@ -183,7 +183,7 @@ INFO_TABLE(stg_IND_direct,1,0,IND,"IND","IND") INFO_TABLE(stg_IND_STATIC,1,0,IND_STATIC,"IND_STATIC","IND_STATIC") { TICK_ENT_STATIC_IND(); /* tick */ - R1 = StgInd_indirectee(R1); + R1 = UNTAG(StgInd_indirectee(R1)); TICK_ENT_VIA_NODE(); jump %GET_ENTRY(R1); } @@ -220,7 +220,7 @@ INFO_TABLE(stg_IND_PERM,1,0,IND_PERM,"IND_PERM","IND_PERM") StgHeader_info(R1) = stg_IND_info; #endif /* TICKY_TICKY */ - R1 = StgInd_indirectee(R1); + R1 = UNTAG(StgInd_indirectee(R1)); #if defined(TICKY_TICKY) && !defined(PROFILING) TICK_ENT_VIA_NODE(); @@ -233,7 +233,7 @@ INFO_TABLE(stg_IND_PERM,1,0,IND_PERM,"IND_PERM","IND_PERM") INFO_TABLE(stg_IND_OLDGEN,1,0,IND_OLDGEN,"IND_OLDGEN","IND_OLDGEN") { TICK_ENT_STATIC_IND(); /* tick */ - R1 = StgInd_indirectee(R1); + R1 = UNTAG(StgInd_indirectee(R1)); TICK_ENT_VIA_NODE(); jump %GET_ENTRY(R1); } @@ -262,7 +262,7 @@ INFO_TABLE(stg_IND_OLDGEN_PERM,1,0,IND_OLDGEN_PERM,"IND_OLDGEN_PERM","IND_OLDGEN StgHeader_info(R1) = stg_IND_OLDGEN_info; #endif /* TICKY_TICKY */ - R1 = StgInd_indirectee(R1); + R1 = UNTAG(StgInd_indirectee(R1)); TICK_ENT_VIA_NODE(); jump %GET_ENTRY(R1); diff --git a/rts/StgStartup.cmm b/rts/StgStartup.cmm index 5b0f7e2a5f..b5a5cdcb2f 100644 --- a/rts/StgStartup.cmm +++ b/rts/StgStartup.cmm @@ -142,6 +142,7 @@ stg_threadFinished forceIO takes care of this, performing the IO action and entering the results that comes back. + ------------------------------------------------------------------------- */ INFO_TABLE_RET( stg_forceIO, RET_SMALL) diff --git a/rts/StgStdThunks.cmm b/rts/StgStdThunks.cmm index db9c254233..20ceb6aaba 100644 --- a/rts/StgStdThunks.cmm +++ b/rts/StgStdThunks.cmm @@ -39,10 +39,23 @@ #define RET_PARAMS #endif +/* + * TODO: On return, we can use a more efficient + * untagging (we know the constructor tag). + * + * When entering stg_sel_#_upd, we know R1 points to its closure, + * so it's untagged. + * The payload might be a thunk or a constructor, + * so we enter it. + * + * When returning, we know for sure it is a constructor, + * so we untag it before accessing the field. + * + */ #define SELECTOR_CODE_UPD(offset) \ INFO_TABLE_RET(stg_sel_ret_##offset##_upd, RET_SMALL, RET_PARAMS) \ { \ - R1 = StgClosure_payload(R1,offset); \ + R1 = StgClosure_payload(UNTAG(R1),offset); \ GET_SAVED_CCCS; \ Sp = Sp + SIZEOF_StgHeader; \ ENTER(); \ @@ -58,8 +71,11 @@ ENTER_CCS_THUNK(R1); \ SAVE_CCCS(WITHUPD_FRAME_SIZE); \ W_[Sp-WITHUPD_FRAME_SIZE] = stg_sel_ret_##offset##_upd_info; \ - R1 = StgThunk_payload(R1,0); \ Sp = Sp - WITHUPD_FRAME_SIZE; \ + R1 = StgThunk_payload(R1,0); \ + if (GETTAG(R1) != 0) { \ + jump RET_LBL(stg_sel_ret_##offset##_upd); \ + } \ jump %GET_ENTRY(R1); \ } /* NOTE: no need to ENTER() here, we know the closure cannot evaluate to a function, @@ -85,10 +101,10 @@ SELECTOR_CODE_UPD(15) #define SELECTOR_CODE_NOUPD(offset) \ INFO_TABLE_RET(stg_sel_ret_##offset##_noupd, RET_SMALL, RET_PARAMS) \ { \ - R1 = StgClosure_payload(R1,offset); \ + R1 = StgClosure_payload(UNTAG(R1),offset); \ GET_SAVED_CCCS; \ Sp = Sp + SIZEOF_StgHeader; \ - jump %GET_ENTRY(R1); \ + ENTER(); \ } \ \ INFO_TABLE_SELECTOR(stg_sel_##offset##_noupd, offset, THUNK_SELECTOR, "stg_sel_noupd", "stg_sel_noupd")\ @@ -101,8 +117,11 @@ SELECTOR_CODE_UPD(15) ENTER_CCS_THUNK(R1); \ SAVE_CCCS(NOUPD_FRAME_SIZE); \ W_[Sp-NOUPD_FRAME_SIZE] = stg_sel_ret_##offset##_noupd_info; \ - R1 = StgThunk_payload(R1,0); \ Sp = Sp - NOUPD_FRAME_SIZE; \ + R1 = StgThunk_payload(R1,0); \ + if (GETTAG(R1) != 0) { \ + jump RET_LBL(stg_sel_ret_##offset##_noupd); \ + } \ jump %GET_ENTRY(R1); \ } diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c index feebef87aa..e8d154059b 100644 --- a/rts/sm/Compact.c +++ b/rts/sm/Compact.c @@ -55,23 +55,32 @@ STATIC_INLINE void thread (StgClosure **p) { - StgPtr q = *(StgPtr *)p; + StgClosure *q0 = *p; + StgPtr q = (StgPtr)UNTAG_CLOSURE(q0); + nat tag = GET_CLOSURE_TAG(q0); bdescr *bd; // It doesn't look like a closure at the moment, because the info // ptr is possibly threaded: // ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); + + // We need one tag value here, because we a non-zero tag to + // indicate "not an info pointer". So we add one to the existing + // tag. If this would overflow the tag bits, we throw away the + // original tag (which is safe but pessimistic; tags are optional). + if (tag == TAG_MASK) tag = 0; - if (HEAP_ALLOCED(q)) { + if (HEAP_ALLOCED(q)) + { bd = Bdescr(q); // a handy way to discover whether the ptr is into the // compacted area of the old gen, is that the EVACUATED flag // is zero (it's non-zero for all the other areas of live // memory). - if ((bd->flags & BF_EVACUATED) == 0) { - + if ((bd->flags & BF_EVACUATED) == 0) + { *(StgPtr)p = (StgWord)*q; - *q = (StgWord)p + 1; // set the low bit + *q = (StgWord)p + tag + 1; // set the low bit } } } @@ -84,11 +93,15 @@ STATIC_INLINE void unthread( StgPtr p, StgPtr free ) { StgWord q = *p, r; + nat tag; + StgPtr q1; - while ((q & 1) != 0) { - q -= 1; // unset the low bit again - r = *((StgPtr)q); - *((StgPtr)q) = (StgWord)free; + while (GET_CLOSURE_TAG((StgClosure *)q) != 0) { + q -= 1; // restore the original tag + tag = GET_CLOSURE_TAG((StgClosure *)q); + q1 = (StgPtr)UNTAG_CLOSURE((StgClosure *)q); + r = *q1; + *q1 = (StgWord)free + tag; q = r; } *p = q; @@ -97,10 +110,10 @@ unthread( StgPtr p, StgPtr free ) STATIC_INLINE StgInfoTable * get_threaded_info( StgPtr p ) { - StgPtr q = (P_)GET_INFO((StgClosure *)p); + StgPtr q = (P_)GET_INFO(UNTAG_CLOSURE((StgClosure *)p)); - while (((StgWord)q & 1) != 0) { - q = (P_)*((StgPtr)((StgWord)q-1)); + while (GET_CLOSURE_TAG((StgClosure *)q) != 0) { + q = (P_)*((StgPtr)((StgWord)(UNTAG_CLOSURE((StgClosure *)q)))); } ASSERT(LOOKS_LIKE_INFO_PTR(q)); diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index dda5659675..d437e3f786 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -39,7 +39,7 @@ upd_evacuee(StgClosure *p, StgClosure *dest) STATIC_INLINE StgClosure * -copy(StgClosure *src, nat size, step *stp) +copy_tag(StgClosure *src, nat size, step *stp,StgWord tag) { StgPtr to, from; nat i; @@ -75,6 +75,10 @@ copy(StgClosure *src, nat size, step *stp) for (i = 0; i < size; i++) { // unroll for small i to[i] = from[i]; } + + /* retag pointer before updating EVACUATE closure and returning */ + to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to); + upd_evacuee((StgClosure *)from,(StgClosure *)to); #ifdef PROFILING @@ -89,7 +93,7 @@ copy(StgClosure *src, nat size, step *stp) // that will not be scavenged. Used for object that have no pointer // fields. STATIC_INLINE StgClosure * -copy_noscav(StgClosure *src, nat size, step *stp) +copy_noscav_tag(StgClosure *src, nat size, step *stp, StgWord tag) { StgPtr to, from; nat i; @@ -125,6 +129,10 @@ copy_noscav(StgClosure *src, nat size, step *stp) for (i = 0; i < size; i++) { // unroll for small i to[i] = from[i]; } + + /* retag pointer before updating EVACUATE closure and returning */ + to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to); + upd_evacuee((StgClosure *)from,(StgClosure *)to); #ifdef PROFILING @@ -184,6 +192,19 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp) } +/* Copy wrappers that don't tag the closure after copying */ +STATIC_INLINE StgClosure * +copy(StgClosure *src, nat size, step *stp) +{ + return copy_tag(src,size,stp,0); +} + +STATIC_INLINE StgClosure * +copy_noscav(StgClosure *src, nat size, step *stp) +{ + return copy_noscav_tag(src,size,stp,0); +} + /* ----------------------------------------------------------------------------- Evacuate a large object @@ -295,13 +316,18 @@ evacuate(StgClosure *q) bdescr *bd = NULL; step *stp; const StgInfoTable *info; + StgWord tag; loop: + /* The tag and the pointer are split, to be merged after evacing */ + tag = GET_CLOSURE_TAG(q); + q = UNTAG_CLOSURE(q); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); if (!HEAP_ALLOCED(q)) { - if (!major_gc) return q; + if (!major_gc) return TAG_CLOSURE(tag,q); info = get_itbl(q); switch (info->type) { @@ -338,14 +364,16 @@ loop: if (*STATIC_LINK(info,(StgClosure *)q) == NULL) { *STATIC_LINK(info,(StgClosure *)q) = static_objects; static_objects = (StgClosure *)q; + /* I am assuming that static_objects pointers are not + * written to other objects, and thus, no need to retag. */ } - return q; + return TAG_CLOSURE(tag,q); case CONSTR_NOCAF_STATIC: /* no need to put these on the static linked list, they don't need * to be scavenged. */ - return q; + return TAG_CLOSURE(tag,q); default: barf("evacuate(static): strange closure type %d", (int)(info->type)); @@ -365,7 +393,7 @@ loop: failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } - return q; + return TAG_CLOSURE(tag,q); } if ((bd->flags & (BF_LARGE | BF_COMPACTED | BF_EVACUATED)) != 0) { @@ -380,7 +408,7 @@ loop: failed_to_evac = rtsTrue; TICK_GC_FAILED_PROMOTION(); } - return q; + return TAG_CLOSURE(tag,q); } /* evacuate large objects by re-linking them onto a different list. @@ -393,7 +421,7 @@ loop: goto loop; } evacuate_large((P_)q); - return q; + return TAG_CLOSURE(tag,q); } /* If the object is in a step that we're compacting, then we @@ -408,7 +436,7 @@ loop: } push_mark_stack((P_)q); } - return q; + return TAG_CLOSURE(tag,q); } } @@ -429,20 +457,24 @@ loop: if (q->header.info == Czh_con_info && // unsigned, so always true: (StgChar)w >= MIN_CHARLIKE && (StgChar)w <= MAX_CHARLIKE) { - return (StgClosure *)CHARLIKE_CLOSURE((StgChar)w); + return TAG_CLOSURE(tag, + (StgClosure *)CHARLIKE_CLOSURE((StgChar)w) + ); } if (q->header.info == Izh_con_info && (StgInt)w >= MIN_INTLIKE && (StgInt)w <= MAX_INTLIKE) { - return (StgClosure *)INTLIKE_CLOSURE((StgInt)w); + return TAG_CLOSURE(tag, + (StgClosure *)INTLIKE_CLOSURE((StgInt)w) + ); } // else - return copy_noscav(q,sizeofW(StgHeader)+1,stp); + return copy_noscav_tag(q,sizeofW(StgHeader)+1,stp,tag); } case FUN_0_1: case FUN_1_0: case CONSTR_1_0: - return copy(q,sizeofW(StgHeader)+1,stp); + return copy_tag(q,sizeofW(StgHeader)+1,stp,tag); case THUNK_1_0: case THUNK_0_1: @@ -462,27 +494,27 @@ loop: case FUN_1_1: case FUN_2_0: + case FUN_0_2: case CONSTR_1_1: case CONSTR_2_0: - case FUN_0_2: - return copy(q,sizeofW(StgHeader)+2,stp); + return copy_tag(q,sizeofW(StgHeader)+2,stp,tag); case CONSTR_0_2: - return copy_noscav(q,sizeofW(StgHeader)+2,stp); + return copy_noscav_tag(q,sizeofW(StgHeader)+2,stp,tag); case THUNK: return copy(q,thunk_sizeW_fromITBL(info),stp); case FUN: - case CONSTR: case IND_PERM: case IND_OLDGEN_PERM: case WEAK: case STABLE_NAME: - return copy(q,sizeW_fromITBL(info),stp); + case CONSTR: + return copy_tag(q,sizeW_fromITBL(info),stp,tag); case BCO: - return copy(q,bco_sizeW((StgBCO *)q),stp); + return copy(q,bco_sizeW((StgBCO *)q),stp); case CAF_BLACKHOLE: case SE_CAF_BLACKHOLE: @@ -739,7 +771,9 @@ eval_thunk_selector( nat field, StgSelector * p ) const StgInfoTable *info_ptr; StgClosure *selectee; - selectee = p->selectee; + // The selectee might be a constructor closure, + // so we untag the pointer. + selectee = UNTAG_CLOSURE(p->selectee); // Save the real info pointer (NOTE: not the same as get_itbl()). info_ptr = p->header.info; @@ -814,7 +848,7 @@ selector_loop: { StgClosure *q; q = selectee->payload[field]; - if (is_to_space(q)) { + if (is_to_space(UNTAG_CLOSURE(q))) { goto bale_out; } else { return q; @@ -826,7 +860,8 @@ selector_loop: case IND_OLDGEN: case IND_OLDGEN_PERM: case IND_STATIC: - selectee = ((StgInd *)selectee)->indirectee; + // Again, we might need to untag a constructor. + selectee = UNTAG_CLOSURE( ((StgInd *)selectee)->indirectee ); goto selector_loop; case EVACUATED: @@ -880,7 +915,8 @@ selector_loop: // indirection. LDV_RECORD_CREATE(selectee); - selectee = val; + // Of course this pointer might be tagged + selectee = UNTAG_CLOSURE(val); goto selector_loop; } } diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 1fee394139..216d3cbe44 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -1031,6 +1031,7 @@ GarbageCollect ( rtsBool force_major_gc ) closure if it is alive, or NULL otherwise. NOTE: Use it before compaction only! + It untags and (if needed) retags pointers to closures. -------------------------------------------------------------------------- */ @@ -1039,8 +1040,12 @@ isAlive(StgClosure *p) { const StgInfoTable *info; bdescr *bd; + StgWord tag; while (1) { + /* The tag and the pointer are split, to be merged later when needed. */ + tag = GET_CLOSURE_TAG(p); + p = UNTAG_CLOSURE(p); ASSERT(LOOKS_LIKE_CLOSURE_PTR(p)); info = get_itbl(p); @@ -1052,18 +1057,18 @@ isAlive(StgClosure *p) // for static closures with an empty SRT or CONSTR_STATIC_NOCAFs. // if (!HEAP_ALLOCED(p)) { - return p; + return TAG_CLOSURE(tag,p); } // ignore closures in generations that we're not collecting. bd = Bdescr((P_)p); if (bd->gen_no > N) { - return p; + return TAG_CLOSURE(tag,p); } // if it's a pointer into to-space, then we're done if (bd->flags & BF_EVACUATED) { - return p; + return TAG_CLOSURE(tag,p); } // large objects use the evacuated flag @@ -1073,7 +1078,7 @@ isAlive(StgClosure *p) // check the mark bit for compacted steps if ((bd->flags & BF_COMPACTED) && is_marked((P_)p,bd)) { - return p; + return TAG_CLOSURE(tag,p); } switch (info->type) { diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index 0de029edd5..f211401b05 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -200,7 +200,7 @@ scavenge_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size) StgWord bitmap; StgFunInfoTable *fun_info; - fun_info = get_fun_itbl(fun); + fun_info = get_fun_itbl(UNTAG_CLOSURE(fun)); ASSERT(fun_info->i.type != PAP); p = (StgPtr)payload; @@ -1720,7 +1720,7 @@ scavenge_stack(StgPtr p, StgPtr stack_end) StgFunInfoTable *fun_info; ret_fun->fun = evacuate(ret_fun->fun); - fun_info = get_fun_itbl(ret_fun->fun); + fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun)); p = scavenge_arg_block(fun_info, ret_fun->payload); goto follow_srt; } diff --git a/utils/genapply/GenApply.hs b/utils/genapply/GenApply.hs index b7cc6dd53c..c42ccb181a 100644 --- a/utils/genapply/GenApply.hs +++ b/utils/genapply/GenApply.hs @@ -1,10 +1,12 @@ -{-# OPTIONS -cpp #-} +{-# OPTIONS -cpp -fglasgow-exts #-} module Main(main) where #include "../../includes/ghcconfig.h" #include "../../includes/MachRegs.h" #include "../../includes/Constants.h" +-- Needed for TAG_BITS +#include "../../includes/MachDeps.h" import Text.PrettyPrint import Data.Word @@ -165,10 +167,16 @@ mkApplyFastName args mkApplyInfoName args = mkApplyName args <> text "_info" +mb_tag_node arity | Just tag <- tagForArity arity = mkTagStmt tag <> semi + | otherwise = empty + +mkTagStmt tag = text ("R1 = R1 + "++ show tag) + genMkPAP regstatus macro jump ticker disamb no_load_regs -- don't load argumnet regs before jumping args_in_regs -- arguments are already in regs is_pap args all_args_size fun_info_label + is_fun_case = smaller_arity_cases $$ exact_arity_case $$ larger_arity_case @@ -214,7 +222,8 @@ genMkPAP regstatus macro jump ticker disamb if is_pap then text "R2 = " <> mkApplyInfoName this_call_args <> semi - else empty, + else empty, + if is_fun_case then mb_tag_node arity else empty, text "jump " <> text jump <> semi ]) $$ text "}" @@ -294,9 +303,10 @@ genMkPAP regstatus macro jump ticker disamb -- text "TICK_SLOW_CALL_" <> text ticker <> text "_CORRECT();", reg_doc, text "Sp_adj(" <> int sp' <> text ");", - if is_pap - then text "R2 = " <> fun_info_label <> semi - else empty, + if is_pap + then text "R2 = " <> fun_info_label <> semi + else empty, + if is_fun_case then mb_tag_node n_args else empty, text "jump " <> text jump <> semi ]) @@ -319,6 +329,15 @@ genMkPAP regstatus macro jump ticker disamb nest 4 (vcat [ -- text "TICK_SLOW_CALL_" <> text ticker <> text "_TOO_FEW();", save_regs, + -- Before building the PAP, tag the function closure pointer + if is_fun_case then + vcat [ + text "if (arity < " <> int tAG_BITS_MAX <> text ") {", + text " R1 = R1 + arity" <> semi, + text "}" + ] + else empty + , text macro <> char '(' <> int n_args <> comma <> int all_args_size <> text "," <> fun_info_label <> @@ -332,6 +351,66 @@ genMkPAP regstatus macro jump ticker disamb = assignRegs regstatus stk_args_slow_offset args -- BUILD_PAP assumes args start at offset 1 +-- -------------------------------------- +-- Examine tag bits of function pointer and enter it +-- directly if needed. +-- TODO: remove the redundant case in the original code. +enterFastPath regstatus no_load_regs args_in_regs args + | Just tag <- tagForArity (length args) + = enterFastPathHelper tag regstatus no_load_regs args_in_regs args +enterFastPath _ _ _ _ = empty + +-- Copied from Constants.lhs & CgUtils.hs, i'd rather have this imported: +-- (arity,tag) +tAG_BITS = (TAG_BITS :: Int) +tAG_BITS_MAX = ((1 `shiftL` tAG_BITS) :: Int) + +tagForArity :: Int -> Maybe Int +tagForArity i | i < tAG_BITS_MAX = Just i + | otherwise = Nothing + +enterFastPathHelper tag regstatus no_load_regs args_in_regs args = + vcat [text "if (GETTAG(R1)==" <> int tag <> text ") {", + reg_doc, + text " Sp_adj(" <> int sp' <> text ");", + -- enter, but adjust offset with tag + text " jump " <> text "%GET_ENTRY(R1-" <> int tag <> text ");", + text "}" + ] + -- I don't totally understand this code, I copied it from + -- exact_arity_case + -- TODO: refactor + where + -- offset of arguments on the stack at slow apply calls. + stk_args_slow_offset = 1 + + stk_args_offset + | args_in_regs = 0 + | otherwise = stk_args_slow_offset + + (reg_doc, sp') + | no_load_regs || args_in_regs = (empty, stk_args_offset) + | otherwise = loadRegArgs regstatus stk_args_offset args + +tickForArity arity + | True + = empty + | Just tag <- tagForArity arity + = vcat [ + text "W_[TOTAL_CALLS] = W_[TOTAL_CALLS] + 1;", + text "W_[SLOW_CALLS_" <> int arity <> text "] = W_[SLOW_CALLS_" <> int arity <> text "] + 1;", + text "if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == " <> int arity <> text " ) {", + text " W_[RIGHT_ARITY_" <> int arity <> text "] = W_[RIGHT_ARITY_" <> int arity <> text "] + 1;", + text " if (GETTAG(R1)==" <> int tag <> text ") {", + text " W_[TAGGED_PTR_" <> int arity <> text "] = W_[TAGGED_PTR_" <> int arity <> text "] + 1;", + text " } else {", + -- force a halt when not tagged! +-- text " W_[0]=0;", + text " }", + text "}" + ] +tickForArity _ = text "W_[TOTAL_CALLS] = W_[TOTAL_CALLS] + 1;" + -- ----------------------------------------------------------------------------- -- generate an apply function @@ -388,6 +467,7 @@ genApply regstatus args = -- print " [IND_OLDGEN_PERM] &&ind_lbl" -- print " };" + tickForArity (length args), text "", text "IF_DEBUG(apply,foreign \"C\" debugBelch(\"" <> fun_ret_label <> text "... \"); foreign \"C\" printClosure(R1 \"ptr\"));", @@ -411,6 +491,12 @@ genApply regstatus args = vcat (do_assert args 1), text "again:", + + -- if pointer is tagged enter it fast! + enterFastPath regstatus False False args, + + -- Functions can be tagged, so we untag them! + text "R1 = UNTAG(R1);", text "info = %INFO_PTR(R1);", -- if fast == 1: @@ -428,7 +514,7 @@ genApply regstatus args = text "ASSERT(arity > 0);", genMkPAP regstatus "BUILD_PAP" "ENTRY_LBL(stg_BCO)" "FUN" "BCO" True{-stack apply-} False{-args on stack-} False{-not a PAP-} - args all_args_size fun_info_label + args all_args_size fun_info_label {- tag stmt -}False ]), text "}", @@ -445,9 +531,9 @@ genApply regstatus args = nest 4 (vcat [ text "arity = TO_W_(StgFunInfoExtra_arity(%FUN_INFO(info)));", text "ASSERT(arity > 0);", - genMkPAP regstatus "BUILD_PAP" "%GET_ENTRY(R1)" "FUN" "FUN" + genMkPAP regstatus "BUILD_PAP" "%GET_ENTRY(UNTAG(R1))" "FUN" "FUN" False{-reg apply-} False{-args on stack-} False{-not a PAP-} - args all_args_size fun_info_label + args all_args_size fun_info_label {- tag stmt -}True ]), text "}", @@ -461,7 +547,7 @@ genApply regstatus args = text "ASSERT(arity > 0);", genMkPAP regstatus "NEW_PAP" "stg_PAP_apply" "PAP" "PAP" True{-stack apply-} False{-args on stack-} True{-is a PAP-} - args all_args_size fun_info_label + args all_args_size fun_info_label {- tag stmt -}False ]), text "}", @@ -506,6 +592,7 @@ genApply regstatus args = text " IND_OLDGEN_PERM: {", nest 4 (vcat [ text "R1 = StgInd_indirectee(R1);", + -- An indirection node might contain a tagged pointer text "goto again;" ]), text "}", @@ -541,6 +628,14 @@ genApplyFast regstatus args = nest 4 (vcat [ text "W_ info;", text "W_ arity;", + + tickForArity (length args), + + -- if pointer is tagged enter it fast! + enterFastPath regstatus False True args, + + -- Functions can be tagged, so we untag them! + text "R1 = UNTAG(R1);", text "info = %GET_STD_INFO(R1);", text "switch [INVALID_OBJECT .. N_CLOSURE_TYPES] (TO_W_(%INFO_TYPE(info))) {", nest 4 (vcat [ @@ -554,9 +649,9 @@ genApplyFast regstatus args = nest 4 (vcat [ text "arity = TO_W_(StgFunInfoExtra_arity(%GET_FUN_INFO(R1)));", text "ASSERT(arity > 0);", - genMkPAP regstatus "BUILD_PAP" "%GET_ENTRY(R1)" "FUN" "FUN" + genMkPAP regstatus "BUILD_PAP" "%GET_ENTRY(UNTAG(R1))" "FUN" "FUN" False{-reg apply-} True{-args in regs-} False{-not a PAP-} - args all_args_size fun_info_label + args all_args_size fun_info_label {- tag stmt -}True ]), char '}', @@ -607,7 +702,7 @@ genStackApply regstatus args = (assign_regs, sp') = loadRegArgs regstatus 0 args body = vcat [assign_regs, text "Sp_adj" <> parens (int sp') <> semi, - text "jump %GET_ENTRY(R1);" + text "jump %GET_ENTRY(UNTAG(R1));" ] -- ----------------------------------------------------------------------------- |