summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/cmm/CmmLint.hs11
-rw-r--r--compiler/cmm/PprC.hs3
-rw-r--r--compiler/codeGen/CgBindery.lhs68
-rw-r--r--compiler/codeGen/CgClosure.lhs24
-rw-r--r--compiler/codeGen/CgCon.lhs27
-rw-r--r--compiler/codeGen/CgHeapery.lhs3
-rw-r--r--compiler/codeGen/CgInfoTbls.hs25
-rw-r--r--compiler/codeGen/CgPrimOp.hs3
-rw-r--r--compiler/codeGen/CgProf.hs15
-rw-r--r--compiler/codeGen/CgTailCall.lhs70
-rw-r--r--compiler/codeGen/CgUtils.hs62
-rw-r--r--compiler/codeGen/ClosureInfo.lhs38
-rw-r--r--compiler/main/Constants.lhs10
-rw-r--r--compiler/nativeGen/MachCodeGen.hs12
-rw-r--r--includes/Closures.h3
-rw-r--r--includes/Cmm.h45
-rw-r--r--includes/InfoTables.h2
-rw-r--r--includes/MachDeps.h10
-rw-r--r--includes/Rts.h40
-rw-r--r--includes/Storage.h2
-rw-r--r--includes/mkDerivedConstants.c4
-rw-r--r--rts/Apply.cmm34
-rw-r--r--rts/HeapStackCheck.cmm32
-rw-r--r--rts/Interpreter.c11
-rw-r--r--rts/PrimOps.cmm9
-rw-r--r--rts/RetainerProfile.c22
-rw-r--r--rts/RtsAPI.c38
-rw-r--r--rts/Sanity.c17
-rw-r--r--rts/Sparks.c6
-rw-r--r--rts/Stable.c11
-rw-r--r--rts/Stats.c55
-rw-r--r--rts/StgMiscClosures.cmm10
-rw-r--r--rts/StgStartup.cmm1
-rw-r--r--rts/StgStdThunks.cmm29
-rw-r--r--rts/sm/Compact.c37
-rw-r--r--rts/sm/Evac.c82
-rw-r--r--rts/sm/GC.c13
-rw-r--r--rts/sm/Scav.c4
-rw-r--r--utils/genapply/GenApply.hs119
39 files changed, 832 insertions, 175 deletions
diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs
index 130dba05f9..d8d6c9bb46 100644
--- a/compiler/cmm/CmmLint.hs
+++ b/compiler/cmm/CmmLint.hs
@@ -88,7 +88,8 @@ cmmCheckMachOp op args
= return (resultRepOfMachOp op)
isWordOffsetReg (CmmGlobal Sp) = True
-isWordOffsetReg (CmmGlobal Hp) = True
+-- No warnings for unaligned arithmetic, which is used to tag dynamic constructor closures.
+--isWordOffsetReg (CmmGlobal Hp) = True
isWordOffsetReg _ = False
isOffsetOp (MO_Add _) = True
@@ -98,14 +99,18 @@ isOffsetOp _ = False
-- This expression should be an address from which a word can be loaded:
-- check for funny-looking sub-word offsets.
cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)])
- | isOffsetOp op && i `rem` fromIntegral wORD_SIZE /= 0
+ | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0
= cmmLintDubiousWordOffset e
cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg])
- | isOffsetOp op && i `rem` fromIntegral wORD_SIZE /= 0
+ | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0
= cmmLintDubiousWordOffset e
cmmCheckWordAddress _
= return ()
+-- No warnings for unaligned arithmetic with the node register,
+-- which is used to extract fields from tagged constructor closures.
+notNodeReg (CmmReg reg) | reg == nodeReg = False
+notNodeReg _ = True
lintCmmStmt :: CmmStmt -> CmmLint ()
lintCmmStmt stmt@(CmmAssign reg expr) = do
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 77d337df93..6032dc255c 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -322,8 +322,9 @@ pprExpr e = case e of
-> char '*' <> pprAsPtrReg r
CmmLoad (CmmRegOff r off) rep
- | isPtrReg r && rep == wordRep
+ | isPtrReg r && rep == wordRep && (off `rem` wORD_SIZE == 0)
-- ToDo: check that the offset is a word multiple?
+ -- (For tagging to work, I had to avoid unaligned loads. --ARY)
-> pprAsPtrReg r <> brackets (ppr (off `shiftR` wordShift))
CmmLoad expr rep ->
diff --git a/compiler/codeGen/CgBindery.lhs b/compiler/codeGen/CgBindery.lhs
index d5a2c69d60..7447222d45 100644
--- a/compiler/codeGen/CgBindery.lhs
+++ b/compiler/codeGen/CgBindery.lhs
@@ -11,7 +11,8 @@ module CgBindery (
cgIdInfoId, cgIdInfoArgRep, cgIdInfoLF,
- stableIdInfo, heapIdInfo,
+ stableIdInfo, heapIdInfo,
+ taggedStableIdInfo, taggedHeapIdInfo,
letNoEscapeIdInfo, idInfoToAmode,
addBindC, addBindsC,
@@ -22,7 +23,7 @@ module CgBindery (
getLiveStackBindings,
bindArgsToStack, rebindToStack,
- bindNewToNode, bindNewToReg, bindArgsToRegs,
+ bindNewToNode, bindNewToUntagNode, bindNewToReg, bindArgsToRegs,
bindNewToTemp,
getArgAmode, getArgAmodes,
getCgIdInfo,
@@ -38,11 +39,13 @@ import CgStackery
import CgUtils
import CLabel
import ClosureInfo
+import Constants
import Cmm
import PprCmm ( {- instance Outputable -} )
import SMRep
import Id
+import DataCon
import VarEnv
import VarSet
import Literal
@@ -52,6 +55,7 @@ import StgSyn
import Unique
import UniqSet
import Outputable
+
\end{code}
@@ -80,23 +84,44 @@ data CgIdInfo
, cg_rep :: CgRep
, cg_vol :: VolatileLoc
, cg_stb :: StableLoc
- , cg_lf :: LambdaFormInfo }
+ , cg_lf :: LambdaFormInfo
+ , cg_tag :: {-# UNPACK #-} !Int -- tag to be added in idInfoToAmode
+ }
mkCgIdInfo id vol stb lf
= CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb,
- cg_lf = lf, cg_rep = idCgRep id }
+ cg_lf = lf, cg_rep = idCgRep id, cg_tag = tag }
+ where
+ tag
+ | Just con <- isDataConWorkId_maybe id,
+ {- Is this an identifier for a static constructor closure? -}
+ isNullaryRepDataCon con
+ {- If yes, is this a nullary constructor?
+ If yes, we assume that the constructor is evaluated and can
+ be tagged.
+ -}
+ = tagForCon con
+
+ | otherwise
+ = funTagLFInfo lf
voidIdInfo id = CgIdInfo { cg_id = id, cg_vol = NoVolatileLoc
, cg_stb = VoidLoc, cg_lf = mkLFArgument id
- , cg_rep = VoidArg }
+ , cg_rep = VoidArg, cg_tag = 0 }
-- Used just for VoidRep things
data VolatileLoc -- These locations die across a call
= NoVolatileLoc
| RegLoc CmmReg -- In one of the registers (global or local)
| VirHpLoc VirtualHpOffset -- Hp+offset (address of closure)
- | VirNodeLoc VirtualHpOffset -- Cts of offset indirect from Node
- -- ie *(Node+offset)
+ | VirNodeLoc ByteOff -- Cts of offset indirect from Node
+ -- ie *(Node+offset).
+ -- NB. Byte offset, because we subtract R1's
+ -- tag from the offset.
+
+mkTaggedCgIdInfo id vol stb lf con
+ = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb,
+ cg_lf = lf, cg_rep = idCgRep id, cg_tag = tagForCon con }
\end{code}
@StableLoc@ encodes where an Id can be found, used by
@@ -121,7 +146,7 @@ data StableLoc
\begin{code}
instance Outputable CgIdInfo where
- ppr (CgIdInfo id rep vol stb lf)
+ ppr (CgIdInfo id rep vol stb lf _) -- TODO, pretty pring the tag info
= ppr id <+> ptext SLIT("-->") <+> vcat [ppr vol, ppr stb]
instance Outputable VolatileLoc where
@@ -149,19 +174,29 @@ stableIdInfo id amode lf_info = mkCgIdInfo id NoVolatileLoc (StableLoc amode)
heapIdInfo id offset lf_info = mkCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info
letNoEscapeIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLNE sp) lf_info
stackIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLoc sp) lf_info
-nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc offset) NoStableLoc lf_info
+nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc (wORD_SIZE*offset)) NoStableLoc lf_info
regIdInfo id reg lf_info = mkCgIdInfo id (RegLoc reg) NoStableLoc lf_info
+taggedStableIdInfo id amode lf_info con
+ = mkTaggedCgIdInfo id NoVolatileLoc (StableLoc amode) lf_info con
+taggedHeapIdInfo id offset lf_info con
+ = mkTaggedCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info con
+untagNodeIdInfo id offset lf_info tag
+ = mkCgIdInfo id (VirNodeLoc (wORD_SIZE*offset - tag)) NoStableLoc lf_info
+
+
idInfoToAmode :: CgIdInfo -> FCode CmmExpr
idInfoToAmode info
= case cg_vol info of {
RegLoc reg -> returnFC (CmmReg reg) ;
- VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetW (CmmReg nodeReg) nd_off) mach_rep) ;
- VirHpLoc hp_off -> getHpRelOffset hp_off ;
+ VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetB (CmmReg nodeReg) nd_off)
+ mach_rep) ;
+ VirHpLoc hp_off -> do { off <- getHpRelOffset hp_off
+ ; return $! maybeTag off };
NoVolatileLoc ->
case cg_stb info of
- StableLoc amode -> returnFC amode
+ StableLoc amode -> returnFC $! maybeTag amode
VirStkLoc sp_off -> do { sp_rel <- getSpRelOffset sp_off
; return (CmmLoad sp_rel mach_rep) }
@@ -177,6 +212,11 @@ idInfoToAmode info
where
mach_rep = argMachRep (cg_rep info)
+ maybeTag amode -- add the tag, if we have one
+ | tag == 0 = amode
+ | otherwise = cmmOffsetB amode tag
+ where tag = cg_tag info
+
cgIdInfoId :: CgIdInfo -> Id
cgIdInfoId = cg_id
@@ -389,6 +429,10 @@ bindNewToNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Code
bindNewToNode id offset lf_info
= addBindC id (nodeIdInfo id offset lf_info)
+bindNewToUntagNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Int -> Code
+bindNewToUntagNode id offset lf_info tag
+ = addBindC id (untagNodeIdInfo id offset lf_info tag)
+
-- Create a new temporary whose unique is that in the id,
-- bind the id to it, and return the addressing mode for the
-- temporary.
diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs
index fabf434d07..86e13ab383 100644
--- a/compiler/codeGen/CgClosure.lhs
+++ b/compiler/codeGen/CgClosure.lhs
@@ -177,7 +177,14 @@ cgRhsClosure bndr cc bndr_info fvs upd_flag args body = do
-- BUILD ITS INFO TABLE AND CODE
; forkClosureBody (do
{ -- Bind the fvs
- let bind_fv (info, offset)
+ let
+ -- A function closure pointer may be tagged, so we
+ -- must take it into account when accessing the free variables.
+ mbtag = tagForArity (length args)
+ bind_fv (info, offset)
+ | Just tag <- mbtag
+ = bindNewToUntagNode (cgIdInfoId info) offset (cgIdInfoLF info) tag
+ | otherwise
= bindNewToNode (cgIdInfoId info) offset (cgIdInfoLF info)
; mapCs bind_fv bind_details
@@ -236,7 +243,7 @@ NB: Thunks cannot have a primitive type!
closureCodeBody binder_info cl_info cc [{- No args i.e. thunk -}] body = do
{ body_absC <- getCgStmts $ do
{ tickyEnterThunk cl_info
- ; ldvEnter (CmmReg nodeReg) -- NB: Node always points when profiling
+ ; ldvEnterClosure cl_info -- NB: Node always points when profiling
; thunkWrapper cl_info $ do
-- We only enter cc after setting up update so
-- that cc of enclosing scope will be recorded
@@ -400,8 +407,19 @@ funWrapper :: ClosureInfo -- Closure whose code body this is
funWrapper closure_info arg_regs reg_save_code fun_body = do
{ let node_points = nodeMustPointToIt (closureLFInfo closure_info)
+ {-
+ -- Debugging: check that R1 has the correct tag
+ ; let tag = funTag closure_info
+ ; whenC (tag /= 0 && node_points) $ do
+ l <- newLabelC
+ stmtC (CmmCondBranch (CmmMachOp mo_wordEq [cmmGetTag (CmmReg nodeReg),
+ CmmLit (mkIntCLit tag)]) l)
+ stmtC (CmmStore (CmmLit (mkWordCLit 0)) (CmmLit (mkWordCLit 0)))
+ labelC l
+ -}
+
-- Enter for Ldv profiling
- ; whenC node_points (ldvEnter (CmmReg nodeReg))
+ ; whenC node_points (ldvEnterClosure closure_info)
-- GranSim yeild poin
; granYield arg_regs node_points
diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs
index a2c8578d18..91d7098f3e 100644
--- a/compiler/codeGen/CgCon.lhs
+++ b/compiler/codeGen/CgCon.lhs
@@ -43,8 +43,10 @@ import Id
import Type
import PrelInfo
import Outputable
-import Util
import ListSetOps
+#ifdef DEBUG
+import Util ( lengthIs )
+#endif
\end{code}
@@ -93,7 +95,7 @@ cgTopRhsCon id con args
; emitDataLits closure_label closure_rep
-- RETURN
- ; returnFC (id, stableIdInfo id (mkLblExpr closure_label) lf_info) }
+ ; returnFC (id, taggedStableIdInfo id (mkLblExpr closure_label) lf_info con) }
\end{code}
%************************************************************************
@@ -134,9 +136,10 @@ at all.
\begin{code}
buildDynCon binder cc con []
= do this_pkg <- getThisPackage
- returnFC (stableIdInfo binder
+ returnFC (taggedStableIdInfo binder
(mkLblExpr (mkClosureLabel this_pkg (dataConName con)))
- (mkConLFInfo con))
+ (mkConLFInfo con)
+ con)
\end{code}
The following three paragraphs about @Char@-like and @Int@-like
@@ -170,7 +173,7 @@ buildDynCon binder cc con [arg_amode]
offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1)
-- INTLIKE closures consist of a header and one word payload
intlike_amode = CmmLit (cmmLabelOffW intlike_lbl offsetW)
- ; returnFC (stableIdInfo binder intlike_amode (mkConLFInfo con)) }
+ ; returnFC (taggedStableIdInfo binder intlike_amode (mkConLFInfo con) con) }
buildDynCon binder cc con [arg_amode]
| maybeCharLikeCon con
@@ -181,7 +184,7 @@ buildDynCon binder cc con [arg_amode]
offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1)
-- CHARLIKE closures consist of a header and one word payload
charlike_amode = CmmLit (cmmLabelOffW charlike_lbl offsetW)
- ; returnFC (stableIdInfo binder charlike_amode (mkConLFInfo con)) }
+ ; returnFC (taggedStableIdInfo binder charlike_amode (mkConLFInfo con) con) }
\end{code}
Now the general case.
@@ -194,7 +197,7 @@ buildDynCon binder ccs con args
(closure_info, amodes_w_offsets) = layOutDynConstr this_pkg con args
; hp_off <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets
- ; returnFC (heapIdInfo binder hp_off lf_info) }
+ ; returnFC (taggedHeapIdInfo binder hp_off lf_info con) }
where
lf_info = mkConLFInfo con
@@ -223,7 +226,9 @@ bindConArgs :: DataCon -> [Id] -> Code
bindConArgs con args
= do this_pkg <- getThisPackage
let
- bind_arg (arg, offset) = bindNewToNode arg offset (mkLFArgument arg)
+ -- The binding below forces the masking out of the tag bits
+ -- when accessing the constructor field.
+ bind_arg (arg, offset) = bindNewToUntagNode arg offset (mkLFArgument arg) (tagForCon con)
(_, args_w_offsets) = layOutDynConstr this_pkg con (addIdReps args)
--
ASSERT(not (isUnboxedTupleCon con)) return ()
@@ -386,11 +391,12 @@ cgTyCon tycon
-- Put the table after the data constructor decls, because the
-- datatype closure table (for enumeration types)
-- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff
+ -- Note that the closure pointers are tagged.
; extra <-
if isEnumerationTyCon tycon then do
tbl <- getCmm (emitRODataLits (mkLocalClosureTableLabel
(tyConName tycon))
- [ CmmLabel (mkLocalClosureLabel (dataConName con))
+ [ CmmLabelOff (mkLocalClosureLabel (dataConName con)) (tagForCon con)
| con <- tyConDataCons tycon])
return [tbl]
else
@@ -434,6 +440,9 @@ cgDataCon data_con
body_code = do {
-- NB: We don't set CC when entering data (WDP 94/06)
tickyReturnOldCon (length arg_things)
+ -- The case continuation code is expecting a tagged pointer
+ ; stmtC (CmmAssign nodeReg
+ (tagCons data_con (CmmReg nodeReg)))
; performReturn emitReturnInstr }
-- noStmts: Ptr to thing already in Node
diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs
index 3bba211aa1..b89452e1de 100644
--- a/compiler/codeGen/CgHeapery.lhs
+++ b/compiler/codeGen/CgHeapery.lhs
@@ -288,6 +288,9 @@ hpStkCheck cl_info is_fun reg_save_code code
= noStmts
| otherwise
= oneStmt (CmmAssign nodeReg (CmmLit (CmmLabel closure_lbl)))
+ -- Strictly speaking, we should tag node here. But if
+ -- node doesn't point to the closure, the code for the closure
+ -- cannot depend on the value of R1 anyway, so we're safe.
closure_lbl = closureLabelFromCI cl_info
full_save_code = node_asst `plusStmts` reg_save_code
diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs
index 4e38485455..e9751fa748 100644
--- a/compiler/codeGen/CgInfoTbls.hs
+++ b/compiler/codeGen/CgInfoTbls.hs
@@ -15,6 +15,7 @@ module CgInfoTbls (
stdInfoTableSizeB,
entryCode, closureInfoPtr,
getConstrTag,
+ cmmGetClosureType,
infoTable, infoTableClosureType,
infoTablePtrs, infoTableNonPtrs,
funInfoTable, makeRelativeRefTo
@@ -273,14 +274,24 @@ emitAlgReturnTarget
emitAlgReturnTarget name branches mb_deflt fam_sz
= do { blks <- getCgStmts $
- emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
- -- NB: tag_expr is zero-based
+ -- is the constructor tag in the node reg?
+ if isSmallFamily fam_sz
+ then do -- yes, node has constr. tag
+ let tag_expr = cmmConstrTag1 (CmmReg nodeReg)
+ branches' = [(tag+1,branch)|(tag,branch)<-branches]
+ emitSwitch tag_expr branches' mb_deflt 1 fam_sz
+ else do -- no, get tag from info table
+ let -- Note that ptr _always_ has tag 1
+ -- when the family size is big enough
+ untagged_ptr = cmmRegOffB nodeReg (-1)
+ tag_expr = getConstrTag (untagged_ptr)
+ emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
; lbl <- emitReturnTarget name blks
; return (lbl, Nothing) }
-- Nothing: the internal branches in the switch don't have
-- global labels, so we can't use them at the 'call site'
where
- tag_expr = getConstrTag (CmmReg nodeReg)
+ uniq = getUnique name
--------------------------------
emitReturnInstr :: Code
@@ -346,6 +357,14 @@ getConstrTag closure_ptr
where
info_table = infoTable (closureInfoPtr closure_ptr)
+cmmGetClosureType :: CmmExpr -> CmmExpr
+-- Takes a closure pointer, and return the closure type
+-- obtained from the info table
+cmmGetClosureType closure_ptr
+ = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableClosureType info_table]
+ where
+ info_table = infoTable (closureInfoPtr closure_ptr)
+
infoTable :: CmmExpr -> CmmExpr
-- Takes an info pointer (the first word of a closure)
-- and returns a pointer to the first word of the standard-form
diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs
index d26d9c6901..e489d73646 100644
--- a/compiler/codeGen/CgPrimOp.hs
+++ b/compiler/codeGen/CgPrimOp.hs
@@ -183,8 +183,9 @@ emitPrimOp [res] AddrToHValueOp [arg] live
= stmtC (CmmAssign (CmmLocal res) arg)
-- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
+-- Note: argument may be tagged!
emitPrimOp [res] DataToTagOp [arg] live
- = stmtC (CmmAssign (CmmLocal res) (getConstrTag arg))
+ = stmtC (CmmAssign (CmmLocal res) (getConstrTag (cmmUntag arg)))
{- Freezing arrays-of-ptrs requires changing an info table, for the
benefit of the generational collector. It needs to scavenge mutable
diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs
index 27ee54c50d..651f0eaa82 100644
--- a/compiler/codeGen/CgProf.hs
+++ b/compiler/codeGen/CgProf.hs
@@ -20,7 +20,7 @@ module CgProf (
emitSetCCC, emitCCS,
-- Lag/drag/void stuff
- ldvEnter, ldvRecordCreate
+ ldvEnter, ldvEnterClosure, ldvRecordCreate
) where
#include "HsVersions.h"
@@ -242,9 +242,12 @@ enter_cost_centre closure_info ccs body
where
enc_ccs = CmmLit (mkCCostCentreStack ccs)
re_entrant = closureReEntrant closure_info
- node_ccs = costCentreFrom (CmmReg nodeReg)
+ node_ccs = costCentreFrom (cmmOffsetB (CmmReg nodeReg) (-node_tag))
is_box = isBox body
+ -- if this is a function, then node will be tagged; we must subract the tag
+ node_tag = funTag closure_info
+
-- set the current CCS when entering a PAP
enterCostCentrePAP :: CmmExpr -> Code
enterCostCentrePAP closure =
@@ -448,9 +451,14 @@ ldvRecordCreate closure = stmtC $ CmmStore (ldvWord closure) dynLdvInit
-- The closure is not IND or IND_OLDGEN because neither is considered for LDV
-- profiling.
--
+ldvEnterClosure :: ClosureInfo -> Code
+ldvEnterClosure closure_info = ldvEnter (cmmOffsetB (CmmReg nodeReg) (-tag))
+ where tag = funTag closure_info
+ -- don't forget to substract node's tag
+
ldvEnter :: CmmExpr -> Code
-- Argument is a closure pointer
-ldvEnter cl_ptr
+ldvEnter cl_ptr
= ifProfiling $
-- if (era > 0) {
-- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
@@ -458,6 +466,7 @@ ldvEnter cl_ptr
emitIf (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit])
(stmtC (CmmStore ldv_wd new_ldv_wd))
where
+ -- don't forget to substract node's tag
ldv_wd = ldvWord cl_ptr
new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd wordRep)
(CmmLit (mkWordCLit lDV_CREATE_MASK)))
diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs
index 22cecb7249..952702674f 100644
--- a/compiler/codeGen/CgTailCall.lhs
+++ b/compiler/codeGen/CgTailCall.lhs
@@ -27,6 +27,7 @@ import CgUtils
import CgTicky
import ClosureInfo
import SMRep
+import MachOp
import Cmm
import CmmUtils
import CLabel
@@ -102,7 +103,8 @@ performTailCall fun_info arg_amodes pending_assts
| otherwise
= do { fun_amode <- idInfoToAmode fun_info
- ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
+ ; let assignSt = CmmAssign nodeReg fun_amode
+ node_asst = oneStmt assignSt
opt_node_asst | nodeMustPointToIt lf_info = node_asst
| otherwise = noStmts
; EndOfBlockInfo sp _ <- getEndOfBlockInfo
@@ -113,8 +115,15 @@ performTailCall fun_info arg_amodes pending_assts
-- Node must always point to things we enter
EnterIt -> do
{ emitSimultaneously (node_asst `plusStmts` pending_assts)
- ; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
- ; doFinalJump sp False (stmtC (CmmJump target [])) }
+ ; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
+ enterClosure = stmtC (CmmJump target [])
+ -- If this is a scrutinee
+ -- let's check if the closure is a constructor
+ -- so we can directly jump to the alternatives switch
+ -- statement.
+ jumpInstr = getEndOfBlockInfo >>=
+ maybeSwitchOnCons enterClosure
+ ; doFinalJump sp False jumpInstr }
-- A function, but we have zero arguments. It is already in WHNF,
-- so we can just return it.
@@ -149,6 +158,7 @@ performTailCall fun_info arg_amodes pending_assts
; directCall sp apply_lbl args extra_args
(node_asst `plusStmts` pending_assts)
+
}
-- A direct function call (possibly with some left-over arguments)
@@ -169,8 +179,58 @@ performTailCall fun_info arg_amodes pending_assts
where
fun_name = idName (cgIdInfoId fun_info)
lf_info = cgIdInfoLF fun_info
-
-
+ untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg))
+ -- Test if closure is a constructor
+ maybeSwitchOnCons enterClosure eob
+ | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob
+ = do { is_constr <- newLabelC
+ -- Is the pointer tagged?
+ -- Yes, jump to switch statement
+ ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg))
+ is_constr)
+ -- No, enter the closure.
+ ; enterClosure
+ ; labelC is_constr
+ ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) [])
+ }
+{-
+ -- This is a scrutinee for a case expression
+ -- so let's see if we can directly inspect the closure
+ | EndOfBlockInfo _ (CaseAlts lbl _ _ _) <- eob
+ = do { no_cons <- newLabelC
+ -- Both the NCG and gcc optimize away the temp
+ ; z <- newTemp wordRep
+ ; stmtC (CmmAssign z tag_expr)
+ ; let tag = CmmReg z
+ -- Is the closure a cons?
+ ; stmtC (CmmCondBranch (cond1 tag) no_cons)
+ ; stmtC (CmmCondBranch (cond2 tag) no_cons)
+ -- Yes, jump to switch statement
+ ; stmtC (CmmJump (CmmLit (CmmLabel lbl)) [])
+ ; labelC no_cons
+ -- No, enter the closure.
+ ; enterClosure
+ }
+-}
+ -- No case expression involved, enter the closure.
+ | otherwise
+ = do { stmtC untag_node
+ ; enterClosure
+ }
+ where
+ --cond1 tag = cmmULtWord tag lowCons
+ -- More efficient than the above?
+ tag_expr = cmmGetClosureType (CmmReg nodeReg)
+ cond1 tag = cmmEqWord tag (CmmLit (mkIntCLit 0))
+ cond2 tag = cmmUGtWord tag highCons
+ lowCons = CmmLit (mkIntCLit 1)
+ -- CONSTR
+ highCons = CmmLit (mkIntCLit 8)
+ -- CONSTR_NOCAF_STATIC (from ClosureType.h)
+
+
+untagCmmAssign (CmmAssign r cmmExpr) = CmmAssign r (cmmUntag cmmExpr)
+untagCmmAssign stmt = stmt
directCall sp lbl args extra_args assts = do
let
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
index c66fc9ebdd..8d3578e1ef 100644
--- a/compiler/codeGen/CgUtils.hs
+++ b/compiler/codeGen/CgUtils.hs
@@ -22,12 +22,17 @@ module CgUtils (
callerSaveVolatileRegs, get_GlobalReg_addr,
cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord,
+ cmmUGtWord,
cmmOffsetExprW, cmmOffsetExprB,
cmmRegOffW, cmmRegOffB,
cmmLabelOffW, cmmLabelOffB,
cmmOffsetW, cmmOffsetB,
cmmOffsetLitW, cmmOffsetLitB,
cmmLoadIndexW,
+ cmmConstrTag, cmmConstrTag1,
+
+ tagForCon, tagCons, isSmallFamily,
+ cmmUntag, cmmIsTagged, cmmGetTag,
addToMem, addToMemE,
mkWordCLit,
@@ -43,6 +48,7 @@ module CgUtils (
import CgMonad
import TyCon
+import DataCon
import Id
import Constants
import SMRep
@@ -61,7 +67,9 @@ import Util
import DynFlags
import FastString
import PackageConfig
+#ifdef DEBUG
import Outputable
+#endif
import Data.Char
import Data.Bits
@@ -164,6 +172,9 @@ cmmEqWord e1 e2 = CmmMachOp mo_wordEq [e1, e2]
cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2]
cmmUGeWord e1 e2 = CmmMachOp mo_wordUGe [e1, e2]
cmmUGtWord e1 e2 = CmmMachOp mo_wordUGt [e1, e2]
+--cmmShlWord e1 e2 = CmmMachOp mo_wordShl [e1, e2]
+--cmmUShrWord e1 e2 = CmmMachOp mo_wordUShr [e1, e2]
+cmmSubWord e1 e2 = CmmMachOp mo_wordSub [e1, e2]
cmmNegate :: CmmExpr -> CmmExpr
cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep)
@@ -172,6 +183,57 @@ cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprRep e)) [e]
blankWord :: CmmStatic
blankWord = CmmUninitialised wORD_SIZE
+-- Tagging --
+-- Tag bits mask
+--cmmTagBits = CmmLit (mkIntCLit tAG_BITS)
+cmmTagMask = CmmLit (mkIntCLit tAG_MASK)
+cmmPointerMask = CmmLit (mkIntCLit (complement tAG_MASK))
+
+-- Used to untag a possibly tagged pointer
+-- A static label need not be untagged
+cmmUntag e@(CmmLit (CmmLabel _)) = e
+-- Default case
+cmmUntag e = (e `cmmAndWord` cmmPointerMask)
+
+cmmGetTag e = (e `cmmAndWord` cmmTagMask)
+
+-- Test if a closure pointer is untagged
+cmmIsTagged e = (e `cmmAndWord` cmmTagMask)
+ `cmmNeWord` CmmLit zeroCLit
+
+cmmConstrTag e = (e `cmmAndWord` cmmTagMask) `cmmSubWord` (CmmLit (mkIntCLit 1))
+-- Get constructor tag, but one based.
+cmmConstrTag1 e = e `cmmAndWord` cmmTagMask
+
+{-
+ The family size of a data type (the number of constructors)
+ can be either:
+ * small, if the family size < 2**tag_bits
+ * big, otherwise.
+
+ Small families can have the constructor tag in the tag
+ bits.
+ Big families only use the tag value 1 to represent
+ evaluatedness.
+-}
+isSmallFamily fam_size = fam_size <= mAX_PTR_TAG
+
+tagForCon con = tag
+ where
+ con_tag = dataConTagZ con
+ fam_size = tyConFamilySize (dataConTyCon con)
+ tag | isSmallFamily fam_size = con_tag + 1
+ | otherwise = 1
+
+--Tag an expression, to do: refactor, this appears in some other module.
+tagCons con expr = cmmOffsetB expr (tagForCon con)
+
+-- Copied from CgInfoTbls.hs
+-- We keep the *zero-indexed* tag in the srt_len field of the info
+-- table of a data constructor.
+dataConTagZ :: DataCon -> ConTagZ
+dataConTagZ con = dataConTag con - fIRST_TAG
+
-----------------------
-- Making literals
diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs
index d0d2ed98b2..d537a7b3d9 100644
--- a/compiler/codeGen/ClosureInfo.lhs
+++ b/compiler/codeGen/ClosureInfo.lhs
@@ -23,7 +23,7 @@ module ClosureInfo (
mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo,
mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape,
- mkClosureInfo, mkConInfo,
+ mkClosureInfo, mkConInfo, maybeIsLFCon,
closureSize, closureNonHdrSize,
closureGoodStuffSize, closurePtrsSize,
@@ -35,6 +35,7 @@ module ClosureInfo (
closureNeedsUpdSpace, closureIsThunk,
closureSingleEntry, closureReEntrant, isConstrClosure_maybe,
closureFunInfo, isStandardFormThunk, isKnownFun,
+ funTag, funTagLFInfo, tagForArity,
enterIdLabel, enterLocalIdLabel, enterReturnPtLabel,
@@ -58,6 +59,7 @@ module ClosureInfo (
#include "../includes/MachDeps.h"
#include "HsVersions.h"
+--import CgUtils
import StgSyn
import SMRep
@@ -277,6 +279,10 @@ might_be_a_function ty
mkConLFInfo :: DataCon -> LambdaFormInfo
mkConLFInfo con = LFCon con
+maybeIsLFCon :: LambdaFormInfo -> Maybe DataCon
+maybeIsLFCon (LFCon con) = Just con
+maybeIsLFCon _ = Nothing
+
mkSelectorLFInfo id offset updatable
= LFThunk NotTopLevel False updatable (SelectorThunk offset)
(might_be_a_function (idType id))
@@ -804,10 +810,32 @@ isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con
isConstrClosure_maybe _ = Nothing
closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr)
-closureFunInfo (ClosureInfo { closureLFInfo = LFReEntrant _ arity _ arg_desc})
- = Just (arity, arg_desc)
-closureFunInfo _
- = Nothing
+closureFunInfo (ClosureInfo { closureLFInfo = lf_info }) = lfFunInfo lf_info
+closureFunInfo _ = Nothing
+
+lfFunInfo :: LambdaFormInfo -> Maybe (Int, ArgDescr)
+lfFunInfo (LFReEntrant _ arity _ arg_desc) = Just (arity, arg_desc)
+lfFunInfo _ = Nothing
+
+funTag :: ClosureInfo -> Int
+funTag (ClosureInfo { closureLFInfo = lf_info }) = funTagLFInfo lf_info
+funTag _ = 0
+
+-- maybe this should do constructor tags too?
+funTagLFInfo :: LambdaFormInfo -> Int
+funTagLFInfo lf
+ -- A function is tagged with its arity
+ | Just (arity,_) <- lfFunInfo lf,
+ Just tag <- tagForArity arity
+ = tag
+
+ -- other closures (and unknown ones) are not tagged
+ | otherwise
+ = 0
+
+tagForArity :: Int -> Maybe Int
+tagForArity i | i <= mAX_PTR_TAG = Just i
+ | otherwise = Nothing
\end{code}
\begin{code}
diff --git a/compiler/main/Constants.lhs b/compiler/main/Constants.lhs
index 4f13af8828..2e0c4d4095 100644
--- a/compiler/main/Constants.lhs
+++ b/compiler/main/Constants.lhs
@@ -6,6 +6,8 @@
\begin{code}
module Constants (module Constants) where
+import Data.Bits (shiftL)
+
-- This magical #include brings in all the everybody-knows-these magic
-- constants unfortunately, we need to be *explicit* about which one
-- we want; if we just hope a -I... will get the right one, we could
@@ -108,6 +110,14 @@ wORD_SIZE = (SIZEOF_HSWORD :: Int)
wORD_SIZE_IN_BITS = wORD_SIZE * 8 :: Int
\end{code}
+Amount of pointer bits used for semi-tagging constructor closures
+
+\begin{code}
+tAG_BITS = (TAG_BITS :: Int)
+tAG_MASK = ((1 `shiftL` tAG_BITS) - 1) :: Int
+mAX_PTR_TAG = tAG_MASK :: Int
+\end{code}
+
Size of a C int, in bytes. May be smaller than wORD_SIZE.
\begin{code}
diff --git a/compiler/nativeGen/MachCodeGen.hs b/compiler/nativeGen/MachCodeGen.hs
index 2c07016a4f..cc940749f9 100644
--- a/compiler/nativeGen/MachCodeGen.hs
+++ b/compiler/nativeGen/MachCodeGen.hs
@@ -2216,6 +2216,18 @@ condIntCode cond (CmmLoad x pk) (CmmLit lit) | not (is64BitLit lit) = do
--
return (CondCode False cond code)
+-- anything vs zero, using a mask
+-- TODO: Add some sanity checking!!!!
+condIntCode cond (CmmMachOp (MO_And rep) [x,o2]) (CmmLit (CmmInt 0 pk))
+ | (CmmLit (CmmInt mask pk2)) <- o2
+ = do
+ (x_reg, x_code) <- getSomeReg x
+ let
+ code = x_code `snocOL`
+ TEST pk (OpImm (ImmInteger mask)) (OpReg x_reg)
+ --
+ return (CondCode False cond code)
+
-- anything vs zero
condIntCode cond x (CmmLit (CmmInt 0 pk)) = do
(x_reg, x_code) <- getSomeReg x
diff --git a/includes/Closures.h b/includes/Closures.h
index 64582ba6b5..df53ceedd3 100644
--- a/includes/Closures.h
+++ b/includes/Closures.h
@@ -306,7 +306,8 @@ typedef struct {
*/
typedef struct {
const struct _StgInfoTable* info;
- StgWord size;
+ StgHalfWord size;
+ StgHalfWord tag;
StgClosure * fun;
StgClosure * payload[FLEXIBLE_ARRAY];
} StgRetFun;
diff --git a/includes/Cmm.h b/includes/Cmm.h
index b23a37be04..cecf92640b 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -91,12 +91,34 @@
#if SIZEOF_VOID_P == 4
#define W_ bits32
+/* Maybe it's better to include MachDeps.h */
+#define TAG_BITS 2
#elif SIZEOF_VOID_P == 8
#define W_ bits64
+/* Maybe it's better to include MachDeps.h */
+#define TAG_BITS 3
#else
#error Unknown word size
#endif
+/*
+ * The RTS must UNTAG a pointer before dereferencing it.
+ * The use of UNTAG follows the following rules of thumb:
+ *
+ * - Any pointer might be tagged.
+ * - Except the pointers that are passed in R1 to RTS functions.
+ * - R1 is also untagged when entering constructor code.
+ *
+ * TODO:
+ *
+ * - Remove redundancies of tagging and untagging in code generation.
+ * - Optimize getTag or dataToTag# ?
+ *
+ */
+#define TAG_MASK ((1 << TAG_BITS) - 1)
+#define UNTAG(p) (p & ~TAG_MASK)
+#define GETTAG(p) (p & TAG_MASK)
+
#if SIZEOF_INT == 4
#define CInt bits32
#elif SIZEOF_INT == 8
@@ -228,11 +250,23 @@
ToDo: range should end in N_CLOSURE_TYPES-1, not N_CLOSURE_TYPES,
but switch doesn't allow us to use exprs there yet.
+
+ If R1 points to a tagged object it points either to
+ * A constructor.
+ * A function with arity <= TAG_MASK.
+ In both cases the right thing to do is to return.
+ Note: it is rather lucky that we can use the tag bits to do this
+ for both objects. Maybe it points to a brittle design?
+
+ Indirections can contain tagged pointers, so their tag is checked.
-------------------------------------------------------------------------- */
#define ENTER() \
again: \
W_ info; \
+ if (GETTAG(R1) != 0) { \
+ jump %ENTRY_CODE(Sp(0)); \
+ } \
info = %INFO_PTR(R1); \
switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \
(TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \
@@ -247,14 +281,13 @@
goto again; \
} \
case \
- BCO, \
FUN, \
FUN_1_0, \
FUN_0_1, \
FUN_2_0, \
FUN_1_1, \
- FUN_0_2, \
- FUN_STATIC, \
+ FUN_STATIC, \
+ BCO, \
PAP: \
{ \
jump %ENTRY_CODE(Sp(0)); \
@@ -265,6 +298,10 @@
} \
}
+// The FUN cases almost never happen: a pointer to a non-static FUN
+// should always be tagged. This unfortunately isn't true for the
+// interpreter right now, which leaves untagged FUNs on the stack.
+
/* -----------------------------------------------------------------------------
Constants.
-------------------------------------------------------------------------- */
@@ -375,7 +412,7 @@
(TO_W_(%INFO_TYPE(%STD_INFO(p))) != INVALID_OBJECT) && \
(TO_W_(%INFO_TYPE(%STD_INFO(p))) < N_CLOSURE_TYPES))
-#define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(p)))
+#define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p))))
/*
* The layout of the StgFunInfoExtra part of an info table changes
diff --git a/includes/InfoTables.h b/includes/InfoTables.h
index a8e76b05b3..bbffea6468 100644
--- a/includes/InfoTables.h
+++ b/includes/InfoTables.h
@@ -164,7 +164,7 @@ typedef struct {
extern StgWord16 closure_flags[];
-#define closureFlags(c) (closure_flags[get_itbl(c)->type])
+#define closureFlags(c) (closure_flags[get_itbl(UNTAG_CLOSURE(c))->type])
#define closure_HNF(c) ( closureFlags(c) & _HNF)
#define closure_BITMAP(c) ( closureFlags(c) & _BTM)
diff --git a/includes/MachDeps.h b/includes/MachDeps.h
index abe4405d5e..7b71f7c378 100644
--- a/includes/MachDeps.h
+++ b/includes/MachDeps.h
@@ -105,4 +105,14 @@
#endif
#endif
+#ifndef TAG_BITS
+#if SIZEOF_HSWORD == 4
+#define TAG_BITS 2
+#else
+#define TAG_BITS 3
+#endif
+#endif
+
+#define TAG_MASK ((1 << TAG_BITS) - 1)
+
#endif /* MACHDEPS_H */
diff --git a/includes/Rts.h b/includes/Rts.h
index d009618442..eba8146fd2 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -107,6 +107,29 @@ extern void _assertFail (const char *, unsigned int);
#define FMT_Int64 "lld"
#endif
+/*
+ * Macros for untagging and retagging closure pointers
+ * For more information look at the comments in Cmm.h
+ */
+
+static inline StgWord
+GET_CLOSURE_TAG(StgClosure * p)
+{
+ return (StgWord)p & TAG_MASK;
+}
+
+static inline StgClosure *
+UNTAG_CLOSURE(StgClosure * p)
+{
+ return (StgClosure*)((StgWord)p & ~TAG_MASK);
+}
+
+static inline StgClosure *
+TAG_CLOSURE(StgWord tag,StgClosure * p)
+{
+ return (StgClosure*)((StgWord)p | tag);
+}
+
/* -----------------------------------------------------------------------------
Include everything STG-ish
-------------------------------------------------------------------------- */
@@ -207,6 +230,23 @@ extern void stg_exit(int n) GNU_ATTRIBUTE(__noreturn__);
/* declarations for runtime flags/values */
#define MAX_RTS_ARGS 32
+#ifdef DEBUG
+#define TICK_VAR(arity) \
+ extern StgInt SLOW_CALLS_##arity; \
+ extern StgInt RIGHT_ARITY_##arity; \
+ extern StgInt TAGGED_PTR_##arity;
+
+#define TICK_VAR_INI(arity) \
+ StgInt SLOW_CALLS_##arity = 1; \
+ StgInt RIGHT_ARITY_##arity = 1; \
+ StgInt TAGGED_PTR_##arity = 0;
+
+extern StgInt TOTAL_CALLS;
+
+TICK_VAR(1)
+TICK_VAR(2)
+#endif
+
/* -----------------------------------------------------------------------------
Assertions and Debuggery
-------------------------------------------------------------------------- */
diff --git a/includes/Storage.h b/includes/Storage.h
index 604e49e043..92a856c963 100644
--- a/includes/Storage.h
+++ b/includes/Storage.h
@@ -303,7 +303,7 @@ void dirty_MUT_VAR(StgRegTable *reg, StgClosure *p);
((StgInfoTable *)(INFO_PTR_TO_STRUCT(p)))->type < N_CLOSURE_TYPES)
#define LOOKS_LIKE_CLOSURE_PTR(p) \
- (LOOKS_LIKE_INFO_PTR(((StgClosure *)(p))->header.info))
+ (LOOKS_LIKE_INFO_PTR((UNTAG_CLOSURE((StgClosure *)(p)))->header.info))
/* -----------------------------------------------------------------------------
Macros for calculating how big a closure will be (used during allocation)
diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c
index 2fe99b6ba5..aa3c6730f8 100644
--- a/includes/mkDerivedConstants.c
+++ b/includes/mkDerivedConstants.c
@@ -403,6 +403,10 @@ main(int argc, char *argv[])
struct_field(StgLargeBitmap, size);
field_offset(StgLargeBitmap, bitmap);
+ struct_field(StgRetFun, size);
+ struct_field(StgRetFun, tag);
+ struct_field(StgRetFun, fun);
+
struct_size(snEntry);
struct_field(snEntry,sn_obj);
struct_field(snEntry,addr);
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index e0ca03944c..cf8a108006 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -90,8 +90,6 @@ stg_PAP_apply
// Enter PAP cost centre
ENTER_CCS_PAP_CL(pap);
- R1 = StgPAP_fun(pap);
-
// Reload the stack
W_ i;
W_ p;
@@ -105,14 +103,30 @@ for:
goto for;
}
+ R1 = StgPAP_fun(pap);
+
+/* DEBUGGING CODE, ensures that arity 1 and 2 functions are entered tagged
+ if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == 1 ) {
+ if (GETTAG(R1)!=1) {
+ W_[0]=1;
+ }
+ }
+
+ if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == 2 ) {
+ if (GETTAG(R1)!=2) {
+ W_[0]=1;
+ }
+ }
+*/
+
// Off we go!
TICK_ENT_VIA_NODE();
#ifdef NO_ARG_REGS
- jump %GET_ENTRY(R1);
+ jump %GET_ENTRY(UNTAG(R1));
#else
W_ info;
- info = %GET_FUN_INFO(R1);
+ info = %GET_FUN_INFO(UNTAG(R1));
W_ type;
type = TO_W_(StgFunInfoExtra_fun_type(info));
if (type == ARG_GEN) {
@@ -167,8 +181,6 @@ INFO_TABLE(stg_AP,/*special layout*/0,0,AP,"AP","AP")
// Enter PAP cost centre
ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL
- R1 = StgAP_fun(ap);
-
// Reload the stack
W_ i;
W_ p;
@@ -182,14 +194,16 @@ for:
goto for;
}
+ R1 = StgAP_fun(ap);
+
// Off we go!
TICK_ENT_VIA_NODE();
#ifdef NO_ARG_REGS
- jump %GET_ENTRY(R1);
+ jump %GET_ENTRY(UNTAG(R1));
#else
W_ info;
- info = %GET_FUN_INFO(R1);
+ info = %GET_FUN_INFO(UNTAG(R1));
W_ type;
type = TO_W_(StgFunInfoExtra_fun_type(info));
if (type == ARG_GEN) {
@@ -246,8 +260,6 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK")
// Enter PAP cost centre
ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL
- R1 = StgAP_STACK_fun(ap);
-
// Reload the stack
W_ i;
W_ p;
@@ -264,5 +276,7 @@ for:
// Off we go!
TICK_ENT_VIA_NODE();
+ R1 = StgAP_STACK_fun(ap);
+
ENTER();
}
diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm
index f40fbf5519..3c66e7806f 100644
--- a/rts/HeapStackCheck.cmm
+++ b/rts/HeapStackCheck.cmm
@@ -551,6 +551,8 @@ INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, "ptr" W_ unused )
+---------------------+
| f_closure |
+---------------------+
+ | tag |
+ +- - - - - - - - - - -+
| size |
+---------------------+
| stg_gc_fun_info |
@@ -567,8 +569,11 @@ __stg_gc_fun
W_ size;
W_ info;
W_ type;
+ W_ tag;
+ W_ ret_fun;
- info = %GET_FUN_INFO(R1);
+ tag = GETTAG(R1);
+ info = %GET_FUN_INFO(UNTAG(R1));
// cache the size
type = TO_W_(StgFunInfoExtra_fun_type(info));
@@ -579,7 +584,7 @@ __stg_gc_fun
#ifdef TABLES_NEXT_TO_CODE
// bitmap field holds an offset
size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
- + %GET_ENTRY(R1) /* ### */ );
+ + %GET_ENTRY(UNTAG(R1)) /* ### */ );
#else
size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
#endif
@@ -591,9 +596,11 @@ __stg_gc_fun
#ifdef NO_ARG_REGS
// we don't have to save any registers away
Sp_adj(-3);
- Sp(2) = R1;
- Sp(1) = size;
Sp(0) = stg_gc_fun_info;
+ ret_fun = Sp;
+ StgRetFun_size(ret_fun) = HALF_W_(size);
+ StgRetFun_tag(ret_fun) = HALF_W_(tag);
+ StgRetFun_fun(ret_fun) = R1;
GC_GENERIC
#else
W_ type;
@@ -602,9 +609,11 @@ __stg_gc_fun
if (type == ARG_GEN || type == ARG_GEN_BIG) {
// regs already saved by the heap check code
Sp_adj(-3);
- Sp(2) = R1;
- Sp(1) = size;
Sp(0) = stg_gc_fun_info;
+ ret_fun = Sp;
+ StgRetFun_size(ret_fun) = HALF_W_(size);
+ StgRetFun_tag(ret_fun) = HALF_W_(tag);
+ StgRetFun_fun(ret_fun) = R1;
// DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
GC_GENERIC
} else {
@@ -624,17 +633,22 @@ __stg_gc_fun
INFO_TABLE_RET( stg_gc_fun, RET_FUN )
{
- R1 = Sp(2);
+ // Grab the fun, but remember to add in the tag. The GC doesn't
+ // guarantee to retain the tag on the pointer, so we have to do
+ // it manually, because the function entry code assumes it.
+ W_ ret_fun;
+ ret_fun = Sp;
+ R1 = StgRetFun_fun(ret_fun) | TO_W_(StgRetFun_tag(ret_fun));
Sp_adj(3);
#ifdef NO_ARG_REGS
// Minor optimisation: there are no argument registers to load up,
// so we can just jump straight to the function's entry point.
- jump %GET_ENTRY(R1);
+ jump %GET_ENTRY(UNTAG(R1));
#else
W_ info;
W_ type;
- info = %GET_FUN_INFO(R1);
+ info = %GET_FUN_INFO(UNTAG(R1));
type = TO_W_(StgFunInfoExtra_fun_type(info));
if (type == ARG_GEN || type == ARG_GEN_BIG) {
jump StgFunInfoExtra_slow_apply(info);
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index 6663445995..527ebde0d0 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -224,7 +224,7 @@ interpretBCO (Capability* cap)
// +---------------+
//
else if (Sp[0] == (W_)&stg_apply_interp_info) {
- obj = (StgClosure *)Sp[1];
+ obj = UNTAG_CLOSURE((StgClosure *)Sp[1]);
Sp += 2;
goto run_BCO_fun;
}
@@ -244,6 +244,7 @@ eval:
obj = (StgClosure*)Sp[0]; Sp++;
eval_obj:
+ obj = UNTAG_CLOSURE(obj);
INTERP_TICK(it_total_evals);
IF_DEBUG(interpreter,
@@ -327,7 +328,7 @@ eval_obj:
Sp[i] = (W_)ap->payload[i];
}
- obj = (StgClosure*)ap->fun;
+ obj = UNTAG_CLOSURE((StgClosure*)ap->fun);
ASSERT(get_itbl(obj)->type == BCO);
goto run_BCO_fun;
}
@@ -531,7 +532,7 @@ do_apply:
pap = (StgPAP *)obj;
// we only cope with PAPs whose function is a BCO
- if (get_itbl(pap->fun)->type != BCO) {
+ if (get_itbl(UNTAG_CLOSURE(pap->fun))->type != BCO) {
goto defer_apply_to_sched;
}
@@ -556,7 +557,7 @@ do_apply:
for (i = 0; i < pap->n_args; i++) {
Sp[i] = (W_)pap->payload[i];
}
- obj = pap->fun;
+ obj = UNTAG_CLOSURE(pap->fun);
goto run_BCO_fun;
}
else if (arity == n) {
@@ -564,7 +565,7 @@ do_apply:
for (i = 0; i < pap->n_args; i++) {
Sp[i] = (W_)pap->payload[i];
}
- obj = pap->fun;
+ obj = UNTAG_CLOSURE(pap->fun);
goto run_BCO_fun;
}
else /* arity > n */ {
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 7c75fca0e8..cb8626e5dd 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -1869,7 +1869,7 @@ unpackClosurezh_fast
// TODO: Consider the absence of ptrs or nonptrs as a special case ?
W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
- info = %GET_STD_INFO(R1);
+ info = %GET_STD_INFO(UNTAG(R1));
// Some closures have non-standard layout, so we omit those here.
W_ type;
@@ -1899,6 +1899,9 @@ out:
ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
+ W_ clos;
+ clos = UNTAG(R1);
+
ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
@@ -1907,7 +1910,7 @@ out:
p = 0;
for:
if(p < ptrs) {
- W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(R1,p);
+ W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
p = p + 1;
goto for;
}
@@ -1917,7 +1920,7 @@ for:
p = 0;
for2:
if(p < nptrs) {
- W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(R1, p+ptrs);
+ W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
p = p + 1;
goto for2;
}
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c
index 9f29acae19..2613b9e4bc 100644
--- a/rts/RetainerProfile.c
+++ b/rts/RetainerProfile.c
@@ -1486,7 +1486,9 @@ retainStack( StgClosure *c, retainer c_child_r,
* ------------------------------------------------------------------------- */
static INLINE StgPtr
-retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun,
+retain_PAP_payload (StgClosure *pap, /* NOT tagged */
+ retainer c_child_r, /* NOT tagged */
+ StgClosure *fun, /* tagged */
StgClosure** payload, StgWord n_args)
{
StgPtr p;
@@ -1494,6 +1496,7 @@ retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun,
StgFunInfoTable *fun_info;
retainClosure(fun, pap, c_child_r);
+ fun = UNTAG_CLOSURE(fun);
fun_info = get_fun_itbl(fun);
ASSERT(fun_info->i.type != PAP);
@@ -1542,9 +1545,9 @@ retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun,
static void
retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 )
{
- // c = Current closure
- // cp = Current closure's Parent
- // r = current closures' most recent Retainer
+ // c = Current closure (possibly tagged)
+ // cp = Current closure's Parent (NOT tagged)
+ // r = current closures' most recent Retainer (NOT tagged)
// c_child_r = current closure's children's most recent retainer
// first_child = first child of c
StgClosure *c, *cp, *first_child;
@@ -1582,6 +1585,8 @@ loop:
//debugBelch("inner_loop");
inner_loop:
+ c = UNTAG_CLOSURE(c);
+
// c = current closure under consideration,
// cp = current closure's parent,
// r = current closure's most recent retainer
@@ -1794,16 +1799,19 @@ inner_loop:
static void
retainRoot( StgClosure **tl )
{
+ StgClosure *c;
+
// We no longer assume that only TSOs and WEAKs are roots; any closure can
// be a root.
ASSERT(isEmptyRetainerStack());
currentStackBoundary = stackTop;
- if (*tl != &stg_END_TSO_QUEUE_closure && isRetainer(*tl)) {
- retainClosure(*tl, *tl, getRetainerFrom(*tl));
+ c = UNTAG_CLOSURE(*tl);
+ if (c != &stg_END_TSO_QUEUE_closure && isRetainer(c)) {
+ retainClosure(c, c, getRetainerFrom(c));
} else {
- retainClosure(*tl, *tl, CCS_SYSTEM);
+ retainClosure(c, c, CCS_SYSTEM);
}
// NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl)));
diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c
index 69fac8d474..716b4a2f2b 100644
--- a/rts/RtsAPI.c
+++ b/rts/RtsAPI.c
@@ -22,6 +22,10 @@
/* ----------------------------------------------------------------------------
Building Haskell objects from C datatypes.
+
+ TODO: Currently this code does not tag created pointers,
+ however it is not unsafe (the contructor code will do it)
+ just inefficient.
------------------------------------------------------------------------- */
HaskellObj
rts_mkChar (Capability *cap, HsChar c)
@@ -221,7 +225,7 @@ rts_getChar (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == Czh_con_info ||
// p->header.info == Czh_static_info);
- return (StgChar)(StgWord)(p->payload[0]);
+ return (StgChar)(StgWord)(UNTAG_CLOSURE(p)->payload[0]);
}
HsInt
@@ -230,7 +234,7 @@ rts_getInt (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == Izh_con_info ||
// p->header.info == Izh_static_info);
- return (HsInt)(p->payload[0]);
+ return (HsInt)(UNTAG_CLOSURE(p)->payload[0]);
}
HsInt8
@@ -239,7 +243,7 @@ rts_getInt8 (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == I8zh_con_info ||
// p->header.info == I8zh_static_info);
- return (HsInt8)(HsInt)(p->payload[0]);
+ return (HsInt8)(HsInt)(UNTAG_CLOSURE(p)->payload[0]);
}
HsInt16
@@ -248,7 +252,7 @@ rts_getInt16 (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == I16zh_con_info ||
// p->header.info == I16zh_static_info);
- return (HsInt16)(HsInt)(p->payload[0]);
+ return (HsInt16)(HsInt)(UNTAG_CLOSURE(p)->payload[0]);
}
HsInt32
@@ -257,7 +261,7 @@ rts_getInt32 (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == I32zh_con_info ||
// p->header.info == I32zh_static_info);
- return (HsInt32)(HsInt)(p->payload[0]);
+ return (HsInt32)(HsInt)(UNTAG_CLOSURE(p)->payload[0]);
}
HsInt64
@@ -267,7 +271,7 @@ rts_getInt64 (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == I64zh_con_info ||
// p->header.info == I64zh_static_info);
- tmp = (HsInt64*)&(p->payload[0]);
+ tmp = (HsInt64*)&(UNTAG_CLOSURE(p)->payload[0]);
return *tmp;
}
HsWord
@@ -276,7 +280,7 @@ rts_getWord (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == Wzh_con_info ||
// p->header.info == Wzh_static_info);
- return (HsWord)(p->payload[0]);
+ return (HsWord)(UNTAG_CLOSURE(p)->payload[0]);
}
HsWord8
@@ -285,7 +289,7 @@ rts_getWord8 (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == W8zh_con_info ||
// p->header.info == W8zh_static_info);
- return (HsWord8)(HsWord)(p->payload[0]);
+ return (HsWord8)(HsWord)(UNTAG_CLOSURE(p)->payload[0]);
}
HsWord16
@@ -294,7 +298,7 @@ rts_getWord16 (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == W16zh_con_info ||
// p->header.info == W16zh_static_info);
- return (HsWord16)(HsWord)(p->payload[0]);
+ return (HsWord16)(HsWord)(UNTAG_CLOSURE(p)->payload[0]);
}
HsWord32
@@ -303,7 +307,7 @@ rts_getWord32 (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == W32zh_con_info ||
// p->header.info == W32zh_static_info);
- return (HsWord32)(HsWord)(p->payload[0]);
+ return (HsWord32)(HsWord)(UNTAG_CLOSURE(p)->payload[0]);
}
@@ -314,7 +318,7 @@ rts_getWord64 (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == W64zh_con_info ||
// p->header.info == W64zh_static_info);
- tmp = (HsWord64*)&(p->payload[0]);
+ tmp = (HsWord64*)&(UNTAG_CLOSURE(p)->payload[0]);
return *tmp;
}
@@ -324,7 +328,7 @@ rts_getFloat (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == Fzh_con_info ||
// p->header.info == Fzh_static_info);
- return (float)(PK_FLT((P_)p->payload));
+ return (float)(PK_FLT((P_)UNTAG_CLOSURE(p)->payload));
}
HsDouble
@@ -333,7 +337,7 @@ rts_getDouble (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == Dzh_con_info ||
// p->header.info == Dzh_static_info);
- return (double)(PK_DBL((P_)p->payload));
+ return (double)(PK_DBL((P_)UNTAG_CLOSURE(p)->payload));
}
HsStablePtr
@@ -342,7 +346,7 @@ rts_getStablePtr (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == StablePtr_con_info ||
// p->header.info == StablePtr_static_info);
- return (StgStablePtr)(p->payload[0]);
+ return (StgStablePtr)(UNTAG_CLOSURE(p)->payload[0]);
}
HsPtr
@@ -351,7 +355,7 @@ rts_getPtr (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == Ptr_con_info ||
// p->header.info == Ptr_static_info);
- return (Capability *)(p->payload[0]);
+ return (Capability *)(UNTAG_CLOSURE(p)->payload[0]);
}
HsFunPtr
@@ -360,7 +364,7 @@ rts_getFunPtr (HaskellObj p)
// See comment above:
// ASSERT(p->header.info == FunPtr_con_info ||
// p->header.info == FunPtr_static_info);
- return (void *)(p->payload[0]);
+ return (void *)(UNTAG_CLOSURE(p)->payload[0]);
}
HsBool
@@ -368,7 +372,7 @@ rts_getBool (HaskellObj p)
{
StgInfoTable *info;
- info = get_itbl((StgClosure *)p);
+ info = get_itbl((StgClosure *)UNTAG_CLOSURE(p));
if (info->srt_bitmap == 0) { // srt_bitmap is the constructor tag
return 0;
} else {
diff --git a/rts/Sanity.c b/rts/Sanity.c
index 7de8ec7d0a..a2ddff87d6 100644
--- a/rts/Sanity.c
+++ b/rts/Sanity.c
@@ -80,13 +80,16 @@ checkLargeBitmap( StgPtr payload, StgLargeBitmap* large_bitmap, nat size )
static void
checkClosureShallow( StgClosure* p )
{
- ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
+ StgClosure *q;
+
+ q = UNTAG_CLOSURE(p);
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
/* Is it a static closure? */
- if (!HEAP_ALLOCED(p)) {
- ASSERT(closure_STATIC(p));
+ if (!HEAP_ALLOCED(q)) {
+ ASSERT(closure_STATIC(q));
} else {
- ASSERT(!closure_STATIC(p));
+ ASSERT(!closure_STATIC(q));
}
}
@@ -162,7 +165,7 @@ checkStackFrame( StgPtr c )
StgRetFun *ret_fun;
ret_fun = (StgRetFun *)c;
- fun_info = get_fun_itbl(ret_fun->fun);
+ fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun));
size = ret_fun->size;
switch (fun_info->f.fun_type) {
case ARG_GEN:
@@ -206,6 +209,7 @@ checkPAP (StgClosure *fun, StgClosure** payload, StgWord n_args)
StgClosure *p;
StgFunInfoTable *fun_info;
+ fun = UNTAG_CLOSURE(fun);
ASSERT(LOOKS_LIKE_CLOSURE_PTR(fun));
fun_info = get_fun_itbl(fun);
@@ -241,6 +245,7 @@ checkClosure( StgClosure* p )
ASSERT(LOOKS_LIKE_INFO_PTR(p->header.info));
+ p = UNTAG_CLOSURE(p);
/* Is it a static closure (i.e. in the data segment)? */
if (!HEAP_ALLOCED(p)) {
ASSERT(closure_STATIC(p));
@@ -815,7 +820,7 @@ checkStaticObjects ( StgClosure* static_objects )
switch (info->type) {
case IND_STATIC:
{
- StgClosure *indirectee = ((StgIndStatic *)p)->indirectee;
+ StgClosure *indirectee = UNTAG_CLOSURE(((StgIndStatic *)p)->indirectee);
ASSERT(LOOKS_LIKE_CLOSURE_PTR(indirectee));
ASSERT(LOOKS_LIKE_INFO_PTR(indirectee->header.info));
diff --git a/rts/Sparks.c b/rts/Sparks.c
index ca60e1338c..0ff4ee4cce 100644
--- a/rts/Sparks.c
+++ b/rts/Sparks.c
@@ -200,6 +200,12 @@ newSpark (StgRegTable *reg, StgClosure *p)
{
StgSparkPool *pool = &(reg->rSparks);
+ /* I am not sure whether this is the right thing to do.
+ * Maybe it is better to exploit the tag information
+ * instead of throwing it away?
+ */
+ p = UNTAG_CLOSURE(p);
+
ASSERT_SPARK_POOL_INVARIANTS(pool);
if (closure_SHOULD_SPARK(p)) {
diff --git a/rts/Stable.c b/rts/Stable.c
index e5e8dfbdd0..0ed18bcec2 100644
--- a/rts/Stable.c
+++ b/rts/Stable.c
@@ -177,6 +177,9 @@ exitStablePtrTable(void)
/*
* get at the real stuff...remove indirections.
+ * It untags pointers before dereferencing and
+ * retags the real stuff with its tag (if there
+ * is any) when returning.
*
* ToDo: move to a better home.
*/
@@ -184,16 +187,18 @@ static
StgClosure*
removeIndirections(StgClosure* p)
{
- StgClosure* q = p;
+ StgWord tag = GET_CLOSURE_TAG(p);
+ StgClosure* q = UNTAG_CLOSURE(p);
while (get_itbl(q)->type == IND ||
get_itbl(q)->type == IND_STATIC ||
get_itbl(q)->type == IND_OLDGEN ||
get_itbl(q)->type == IND_PERM ||
get_itbl(q)->type == IND_OLDGEN_PERM ) {
- q = ((StgInd *)q)->indirectee;
+ tag = GET_CLOSURE_TAG(q);
+ q = UNTAG_CLOSURE(((StgInd *)q)->indirectee);
}
- return q;
+ return TAG_CLOSURE(tag,q);
}
static StgWord
diff --git a/rts/Stats.c b/rts/Stats.c
index 9342118ade..f18e26fbd5 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -441,6 +441,52 @@ stat_endHeapCensus(void)
were left unused when the heap-check failed.
-------------------------------------------------------------------------- */
+#ifdef DEBUG
+#define TICK_VAR(arity) \
+ extern StgInt SLOW_CALLS_##arity; \
+ extern StgInt RIGHT_ARITY_##arity; \
+ extern StgInt TAGGED_PTR_##arity;
+
+#define TICK_VAR_INI(arity) \
+ StgInt SLOW_CALLS_##arity = 1; \
+ StgInt RIGHT_ARITY_##arity = 1; \
+ StgInt TAGGED_PTR_##arity = 0;
+
+extern StgInt TOTAL_CALLS;
+
+TICK_VAR(1)
+TICK_VAR(2)
+
+TICK_VAR_INI(1)
+TICK_VAR_INI(2)
+
+StgInt TOTAL_CALLS=1;
+#endif
+
+/* Report the value of a counter */
+#define REPORT(counter) \
+ { \
+ ullong_format_string(counter,temp,rtsTrue/*commas*/); \
+ statsPrintf(" (" #counter ") : %s\n",temp); \
+ }
+
+/* Report the value of a counter as a percentage of another counter */
+#define REPORT_PCT(counter,countertot) \
+ statsPrintf(" (" #counter ") %% of (" #countertot ") : %.1f%%\n", \
+ counter*100.0/countertot)
+
+#define TICK_PRINT(arity) \
+ REPORT(SLOW_CALLS_##arity); \
+ REPORT_PCT(RIGHT_ARITY_##arity,SLOW_CALLS_##arity); \
+ REPORT_PCT(TAGGED_PTR_##arity,RIGHT_ARITY_##arity); \
+ REPORT(RIGHT_ARITY_##arity); \
+ REPORT(TAGGED_PTR_##arity)
+
+#define TICK_PRINT_TOT(arity) \
+ statsPrintf(" (SLOW_CALLS_" #arity ") %% of (TOTAL_CALLS) : %.1f%%\n", \
+ SLOW_CALLS_##arity * 100.0/TOTAL_CALLS)
+
+
void
stat_exit(int alloc)
{
@@ -557,6 +603,15 @@ stat_exit(int alloc)
TICK_TO_DBL(time - GC_tot_time -
PROF_VAL(RP_tot_time + HC_tot_time) - InitUserTime) * 100
/ TICK_TO_DBL(etime));
+
+ /*
+ TICK_PRINT(1);
+ TICK_PRINT(2);
+ REPORT(TOTAL_CALLS);
+ TICK_PRINT_TOT(1);
+ TICK_PRINT_TOT(2);
+ */
+
#if USE_PAPI
/* PAPI reporting, should put somewhere else?
* Note that the cycles are counted _after_ the initialization of the RTS -- AR */
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index e092e3fdc0..58cbaf9d56 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -167,7 +167,7 @@ INFO_TABLE_FUN( stg_BCO, 4, 0, BCO, "BCO", "BCO", ARG_BCO )
INFO_TABLE(stg_IND,1,0,IND,"IND","IND")
{
TICK_ENT_DYN_IND(); /* tick */
- R1 = StgInd_indirectee(R1);
+ R1 = UNTAG(StgInd_indirectee(R1));
TICK_ENT_VIA_NODE();
jump %GET_ENTRY(R1);
}
@@ -183,7 +183,7 @@ INFO_TABLE(stg_IND_direct,1,0,IND,"IND","IND")
INFO_TABLE(stg_IND_STATIC,1,0,IND_STATIC,"IND_STATIC","IND_STATIC")
{
TICK_ENT_STATIC_IND(); /* tick */
- R1 = StgInd_indirectee(R1);
+ R1 = UNTAG(StgInd_indirectee(R1));
TICK_ENT_VIA_NODE();
jump %GET_ENTRY(R1);
}
@@ -220,7 +220,7 @@ INFO_TABLE(stg_IND_PERM,1,0,IND_PERM,"IND_PERM","IND_PERM")
StgHeader_info(R1) = stg_IND_info;
#endif /* TICKY_TICKY */
- R1 = StgInd_indirectee(R1);
+ R1 = UNTAG(StgInd_indirectee(R1));
#if defined(TICKY_TICKY) && !defined(PROFILING)
TICK_ENT_VIA_NODE();
@@ -233,7 +233,7 @@ INFO_TABLE(stg_IND_PERM,1,0,IND_PERM,"IND_PERM","IND_PERM")
INFO_TABLE(stg_IND_OLDGEN,1,0,IND_OLDGEN,"IND_OLDGEN","IND_OLDGEN")
{
TICK_ENT_STATIC_IND(); /* tick */
- R1 = StgInd_indirectee(R1);
+ R1 = UNTAG(StgInd_indirectee(R1));
TICK_ENT_VIA_NODE();
jump %GET_ENTRY(R1);
}
@@ -262,7 +262,7 @@ INFO_TABLE(stg_IND_OLDGEN_PERM,1,0,IND_OLDGEN_PERM,"IND_OLDGEN_PERM","IND_OLDGEN
StgHeader_info(R1) = stg_IND_OLDGEN_info;
#endif /* TICKY_TICKY */
- R1 = StgInd_indirectee(R1);
+ R1 = UNTAG(StgInd_indirectee(R1));
TICK_ENT_VIA_NODE();
jump %GET_ENTRY(R1);
diff --git a/rts/StgStartup.cmm b/rts/StgStartup.cmm
index 5b0f7e2a5f..b5a5cdcb2f 100644
--- a/rts/StgStartup.cmm
+++ b/rts/StgStartup.cmm
@@ -142,6 +142,7 @@ stg_threadFinished
forceIO takes care of this, performing the IO action and entering the
results that comes back.
+
------------------------------------------------------------------------- */
INFO_TABLE_RET( stg_forceIO, RET_SMALL)
diff --git a/rts/StgStdThunks.cmm b/rts/StgStdThunks.cmm
index db9c254233..20ceb6aaba 100644
--- a/rts/StgStdThunks.cmm
+++ b/rts/StgStdThunks.cmm
@@ -39,10 +39,23 @@
#define RET_PARAMS
#endif
+/*
+ * TODO: On return, we can use a more efficient
+ * untagging (we know the constructor tag).
+ *
+ * When entering stg_sel_#_upd, we know R1 points to its closure,
+ * so it's untagged.
+ * The payload might be a thunk or a constructor,
+ * so we enter it.
+ *
+ * When returning, we know for sure it is a constructor,
+ * so we untag it before accessing the field.
+ *
+ */
#define SELECTOR_CODE_UPD(offset) \
INFO_TABLE_RET(stg_sel_ret_##offset##_upd, RET_SMALL, RET_PARAMS) \
{ \
- R1 = StgClosure_payload(R1,offset); \
+ R1 = StgClosure_payload(UNTAG(R1),offset); \
GET_SAVED_CCCS; \
Sp = Sp + SIZEOF_StgHeader; \
ENTER(); \
@@ -58,8 +71,11 @@
ENTER_CCS_THUNK(R1); \
SAVE_CCCS(WITHUPD_FRAME_SIZE); \
W_[Sp-WITHUPD_FRAME_SIZE] = stg_sel_ret_##offset##_upd_info; \
- R1 = StgThunk_payload(R1,0); \
Sp = Sp - WITHUPD_FRAME_SIZE; \
+ R1 = StgThunk_payload(R1,0); \
+ if (GETTAG(R1) != 0) { \
+ jump RET_LBL(stg_sel_ret_##offset##_upd); \
+ } \
jump %GET_ENTRY(R1); \
}
/* NOTE: no need to ENTER() here, we know the closure cannot evaluate to a function,
@@ -85,10 +101,10 @@ SELECTOR_CODE_UPD(15)
#define SELECTOR_CODE_NOUPD(offset) \
INFO_TABLE_RET(stg_sel_ret_##offset##_noupd, RET_SMALL, RET_PARAMS) \
{ \
- R1 = StgClosure_payload(R1,offset); \
+ R1 = StgClosure_payload(UNTAG(R1),offset); \
GET_SAVED_CCCS; \
Sp = Sp + SIZEOF_StgHeader; \
- jump %GET_ENTRY(R1); \
+ ENTER(); \
} \
\
INFO_TABLE_SELECTOR(stg_sel_##offset##_noupd, offset, THUNK_SELECTOR, "stg_sel_noupd", "stg_sel_noupd")\
@@ -101,8 +117,11 @@ SELECTOR_CODE_UPD(15)
ENTER_CCS_THUNK(R1); \
SAVE_CCCS(NOUPD_FRAME_SIZE); \
W_[Sp-NOUPD_FRAME_SIZE] = stg_sel_ret_##offset##_noupd_info; \
- R1 = StgThunk_payload(R1,0); \
Sp = Sp - NOUPD_FRAME_SIZE; \
+ R1 = StgThunk_payload(R1,0); \
+ if (GETTAG(R1) != 0) { \
+ jump RET_LBL(stg_sel_ret_##offset##_noupd); \
+ } \
jump %GET_ENTRY(R1); \
}
diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c
index feebef87aa..e8d154059b 100644
--- a/rts/sm/Compact.c
+++ b/rts/sm/Compact.c
@@ -55,23 +55,32 @@
STATIC_INLINE void
thread (StgClosure **p)
{
- StgPtr q = *(StgPtr *)p;
+ StgClosure *q0 = *p;
+ StgPtr q = (StgPtr)UNTAG_CLOSURE(q0);
+ nat tag = GET_CLOSURE_TAG(q0);
bdescr *bd;
// It doesn't look like a closure at the moment, because the info
// ptr is possibly threaded:
// ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
+
+ // We need one tag value here, because we a non-zero tag to
+ // indicate "not an info pointer". So we add one to the existing
+ // tag. If this would overflow the tag bits, we throw away the
+ // original tag (which is safe but pessimistic; tags are optional).
+ if (tag == TAG_MASK) tag = 0;
- if (HEAP_ALLOCED(q)) {
+ if (HEAP_ALLOCED(q))
+ {
bd = Bdescr(q);
// a handy way to discover whether the ptr is into the
// compacted area of the old gen, is that the EVACUATED flag
// is zero (it's non-zero for all the other areas of live
// memory).
- if ((bd->flags & BF_EVACUATED) == 0) {
-
+ if ((bd->flags & BF_EVACUATED) == 0)
+ {
*(StgPtr)p = (StgWord)*q;
- *q = (StgWord)p + 1; // set the low bit
+ *q = (StgWord)p + tag + 1; // set the low bit
}
}
}
@@ -84,11 +93,15 @@ STATIC_INLINE void
unthread( StgPtr p, StgPtr free )
{
StgWord q = *p, r;
+ nat tag;
+ StgPtr q1;
- while ((q & 1) != 0) {
- q -= 1; // unset the low bit again
- r = *((StgPtr)q);
- *((StgPtr)q) = (StgWord)free;
+ while (GET_CLOSURE_TAG((StgClosure *)q) != 0) {
+ q -= 1; // restore the original tag
+ tag = GET_CLOSURE_TAG((StgClosure *)q);
+ q1 = (StgPtr)UNTAG_CLOSURE((StgClosure *)q);
+ r = *q1;
+ *q1 = (StgWord)free + tag;
q = r;
}
*p = q;
@@ -97,10 +110,10 @@ unthread( StgPtr p, StgPtr free )
STATIC_INLINE StgInfoTable *
get_threaded_info( StgPtr p )
{
- StgPtr q = (P_)GET_INFO((StgClosure *)p);
+ StgPtr q = (P_)GET_INFO(UNTAG_CLOSURE((StgClosure *)p));
- while (((StgWord)q & 1) != 0) {
- q = (P_)*((StgPtr)((StgWord)q-1));
+ while (GET_CLOSURE_TAG((StgClosure *)q) != 0) {
+ q = (P_)*((StgPtr)((StgWord)(UNTAG_CLOSURE((StgClosure *)q))));
}
ASSERT(LOOKS_LIKE_INFO_PTR(q));
diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c
index dda5659675..d437e3f786 100644
--- a/rts/sm/Evac.c
+++ b/rts/sm/Evac.c
@@ -39,7 +39,7 @@ upd_evacuee(StgClosure *p, StgClosure *dest)
STATIC_INLINE StgClosure *
-copy(StgClosure *src, nat size, step *stp)
+copy_tag(StgClosure *src, nat size, step *stp,StgWord tag)
{
StgPtr to, from;
nat i;
@@ -75,6 +75,10 @@ copy(StgClosure *src, nat size, step *stp)
for (i = 0; i < size; i++) { // unroll for small i
to[i] = from[i];
}
+
+ /* retag pointer before updating EVACUATE closure and returning */
+ to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to);
+
upd_evacuee((StgClosure *)from,(StgClosure *)to);
#ifdef PROFILING
@@ -89,7 +93,7 @@ copy(StgClosure *src, nat size, step *stp)
// that will not be scavenged. Used for object that have no pointer
// fields.
STATIC_INLINE StgClosure *
-copy_noscav(StgClosure *src, nat size, step *stp)
+copy_noscav_tag(StgClosure *src, nat size, step *stp, StgWord tag)
{
StgPtr to, from;
nat i;
@@ -125,6 +129,10 @@ copy_noscav(StgClosure *src, nat size, step *stp)
for (i = 0; i < size; i++) { // unroll for small i
to[i] = from[i];
}
+
+ /* retag pointer before updating EVACUATE closure and returning */
+ to = (StgPtr)TAG_CLOSURE(tag,(StgClosure*)to);
+
upd_evacuee((StgClosure *)from,(StgClosure *)to);
#ifdef PROFILING
@@ -184,6 +192,19 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *stp)
}
+/* Copy wrappers that don't tag the closure after copying */
+STATIC_INLINE StgClosure *
+copy(StgClosure *src, nat size, step *stp)
+{
+ return copy_tag(src,size,stp,0);
+}
+
+STATIC_INLINE StgClosure *
+copy_noscav(StgClosure *src, nat size, step *stp)
+{
+ return copy_noscav_tag(src,size,stp,0);
+}
+
/* -----------------------------------------------------------------------------
Evacuate a large object
@@ -295,13 +316,18 @@ evacuate(StgClosure *q)
bdescr *bd = NULL;
step *stp;
const StgInfoTable *info;
+ StgWord tag;
loop:
+ /* The tag and the pointer are split, to be merged after evacing */
+ tag = GET_CLOSURE_TAG(q);
+ q = UNTAG_CLOSURE(q);
+
ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
if (!HEAP_ALLOCED(q)) {
- if (!major_gc) return q;
+ if (!major_gc) return TAG_CLOSURE(tag,q);
info = get_itbl(q);
switch (info->type) {
@@ -338,14 +364,16 @@ loop:
if (*STATIC_LINK(info,(StgClosure *)q) == NULL) {
*STATIC_LINK(info,(StgClosure *)q) = static_objects;
static_objects = (StgClosure *)q;
+ /* I am assuming that static_objects pointers are not
+ * written to other objects, and thus, no need to retag. */
}
- return q;
+ return TAG_CLOSURE(tag,q);
case CONSTR_NOCAF_STATIC:
/* no need to put these on the static linked list, they don't need
* to be scavenged.
*/
- return q;
+ return TAG_CLOSURE(tag,q);
default:
barf("evacuate(static): strange closure type %d", (int)(info->type));
@@ -365,7 +393,7 @@ loop:
failed_to_evac = rtsTrue;
TICK_GC_FAILED_PROMOTION();
}
- return q;
+ return TAG_CLOSURE(tag,q);
}
if ((bd->flags & (BF_LARGE | BF_COMPACTED | BF_EVACUATED)) != 0) {
@@ -380,7 +408,7 @@ loop:
failed_to_evac = rtsTrue;
TICK_GC_FAILED_PROMOTION();
}
- return q;
+ return TAG_CLOSURE(tag,q);
}
/* evacuate large objects by re-linking them onto a different list.
@@ -393,7 +421,7 @@ loop:
goto loop;
}
evacuate_large((P_)q);
- return q;
+ return TAG_CLOSURE(tag,q);
}
/* If the object is in a step that we're compacting, then we
@@ -408,7 +436,7 @@ loop:
}
push_mark_stack((P_)q);
}
- return q;
+ return TAG_CLOSURE(tag,q);
}
}
@@ -429,20 +457,24 @@ loop:
if (q->header.info == Czh_con_info &&
// unsigned, so always true: (StgChar)w >= MIN_CHARLIKE &&
(StgChar)w <= MAX_CHARLIKE) {
- return (StgClosure *)CHARLIKE_CLOSURE((StgChar)w);
+ return TAG_CLOSURE(tag,
+ (StgClosure *)CHARLIKE_CLOSURE((StgChar)w)
+ );
}
if (q->header.info == Izh_con_info &&
(StgInt)w >= MIN_INTLIKE && (StgInt)w <= MAX_INTLIKE) {
- return (StgClosure *)INTLIKE_CLOSURE((StgInt)w);
+ return TAG_CLOSURE(tag,
+ (StgClosure *)INTLIKE_CLOSURE((StgInt)w)
+ );
}
// else
- return copy_noscav(q,sizeofW(StgHeader)+1,stp);
+ return copy_noscav_tag(q,sizeofW(StgHeader)+1,stp,tag);
}
case FUN_0_1:
case FUN_1_0:
case CONSTR_1_0:
- return copy(q,sizeofW(StgHeader)+1,stp);
+ return copy_tag(q,sizeofW(StgHeader)+1,stp,tag);
case THUNK_1_0:
case THUNK_0_1:
@@ -462,27 +494,27 @@ loop:
case FUN_1_1:
case FUN_2_0:
+ case FUN_0_2:
case CONSTR_1_1:
case CONSTR_2_0:
- case FUN_0_2:
- return copy(q,sizeofW(StgHeader)+2,stp);
+ return copy_tag(q,sizeofW(StgHeader)+2,stp,tag);
case CONSTR_0_2:
- return copy_noscav(q,sizeofW(StgHeader)+2,stp);
+ return copy_noscav_tag(q,sizeofW(StgHeader)+2,stp,tag);
case THUNK:
return copy(q,thunk_sizeW_fromITBL(info),stp);
case FUN:
- case CONSTR:
case IND_PERM:
case IND_OLDGEN_PERM:
case WEAK:
case STABLE_NAME:
- return copy(q,sizeW_fromITBL(info),stp);
+ case CONSTR:
+ return copy_tag(q,sizeW_fromITBL(info),stp,tag);
case BCO:
- return copy(q,bco_sizeW((StgBCO *)q),stp);
+ return copy(q,bco_sizeW((StgBCO *)q),stp);
case CAF_BLACKHOLE:
case SE_CAF_BLACKHOLE:
@@ -739,7 +771,9 @@ eval_thunk_selector( nat field, StgSelector * p )
const StgInfoTable *info_ptr;
StgClosure *selectee;
- selectee = p->selectee;
+ // The selectee might be a constructor closure,
+ // so we untag the pointer.
+ selectee = UNTAG_CLOSURE(p->selectee);
// Save the real info pointer (NOTE: not the same as get_itbl()).
info_ptr = p->header.info;
@@ -814,7 +848,7 @@ selector_loop:
{
StgClosure *q;
q = selectee->payload[field];
- if (is_to_space(q)) {
+ if (is_to_space(UNTAG_CLOSURE(q))) {
goto bale_out;
} else {
return q;
@@ -826,7 +860,8 @@ selector_loop:
case IND_OLDGEN:
case IND_OLDGEN_PERM:
case IND_STATIC:
- selectee = ((StgInd *)selectee)->indirectee;
+ // Again, we might need to untag a constructor.
+ selectee = UNTAG_CLOSURE( ((StgInd *)selectee)->indirectee );
goto selector_loop;
case EVACUATED:
@@ -880,7 +915,8 @@ selector_loop:
// indirection.
LDV_RECORD_CREATE(selectee);
- selectee = val;
+ // Of course this pointer might be tagged
+ selectee = UNTAG_CLOSURE(val);
goto selector_loop;
}
}
diff --git a/rts/sm/GC.c b/rts/sm/GC.c
index 1fee394139..216d3cbe44 100644
--- a/rts/sm/GC.c
+++ b/rts/sm/GC.c
@@ -1031,6 +1031,7 @@ GarbageCollect ( rtsBool force_major_gc )
closure if it is alive, or NULL otherwise.
NOTE: Use it before compaction only!
+ It untags and (if needed) retags pointers to closures.
-------------------------------------------------------------------------- */
@@ -1039,8 +1040,12 @@ isAlive(StgClosure *p)
{
const StgInfoTable *info;
bdescr *bd;
+ StgWord tag;
while (1) {
+ /* The tag and the pointer are split, to be merged later when needed. */
+ tag = GET_CLOSURE_TAG(p);
+ p = UNTAG_CLOSURE(p);
ASSERT(LOOKS_LIKE_CLOSURE_PTR(p));
info = get_itbl(p);
@@ -1052,18 +1057,18 @@ isAlive(StgClosure *p)
// for static closures with an empty SRT or CONSTR_STATIC_NOCAFs.
//
if (!HEAP_ALLOCED(p)) {
- return p;
+ return TAG_CLOSURE(tag,p);
}
// ignore closures in generations that we're not collecting.
bd = Bdescr((P_)p);
if (bd->gen_no > N) {
- return p;
+ return TAG_CLOSURE(tag,p);
}
// if it's a pointer into to-space, then we're done
if (bd->flags & BF_EVACUATED) {
- return p;
+ return TAG_CLOSURE(tag,p);
}
// large objects use the evacuated flag
@@ -1073,7 +1078,7 @@ isAlive(StgClosure *p)
// check the mark bit for compacted steps
if ((bd->flags & BF_COMPACTED) && is_marked((P_)p,bd)) {
- return p;
+ return TAG_CLOSURE(tag,p);
}
switch (info->type) {
diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c
index 0de029edd5..f211401b05 100644
--- a/rts/sm/Scav.c
+++ b/rts/sm/Scav.c
@@ -200,7 +200,7 @@ scavenge_PAP_payload (StgClosure *fun, StgClosure **payload, StgWord size)
StgWord bitmap;
StgFunInfoTable *fun_info;
- fun_info = get_fun_itbl(fun);
+ fun_info = get_fun_itbl(UNTAG_CLOSURE(fun));
ASSERT(fun_info->i.type != PAP);
p = (StgPtr)payload;
@@ -1720,7 +1720,7 @@ scavenge_stack(StgPtr p, StgPtr stack_end)
StgFunInfoTable *fun_info;
ret_fun->fun = evacuate(ret_fun->fun);
- fun_info = get_fun_itbl(ret_fun->fun);
+ fun_info = get_fun_itbl(UNTAG_CLOSURE(ret_fun->fun));
p = scavenge_arg_block(fun_info, ret_fun->payload);
goto follow_srt;
}
diff --git a/utils/genapply/GenApply.hs b/utils/genapply/GenApply.hs
index b7cc6dd53c..c42ccb181a 100644
--- a/utils/genapply/GenApply.hs
+++ b/utils/genapply/GenApply.hs
@@ -1,10 +1,12 @@
-{-# OPTIONS -cpp #-}
+{-# OPTIONS -cpp -fglasgow-exts #-}
module Main(main) where
#include "../../includes/ghcconfig.h"
#include "../../includes/MachRegs.h"
#include "../../includes/Constants.h"
+-- Needed for TAG_BITS
+#include "../../includes/MachDeps.h"
import Text.PrettyPrint
import Data.Word
@@ -165,10 +167,16 @@ mkApplyFastName args
mkApplyInfoName args
= mkApplyName args <> text "_info"
+mb_tag_node arity | Just tag <- tagForArity arity = mkTagStmt tag <> semi
+ | otherwise = empty
+
+mkTagStmt tag = text ("R1 = R1 + "++ show tag)
+
genMkPAP regstatus macro jump ticker disamb
no_load_regs -- don't load argumnet regs before jumping
args_in_regs -- arguments are already in regs
is_pap args all_args_size fun_info_label
+ is_fun_case
= smaller_arity_cases
$$ exact_arity_case
$$ larger_arity_case
@@ -214,7 +222,8 @@ genMkPAP regstatus macro jump ticker disamb
if is_pap
then text "R2 = " <> mkApplyInfoName this_call_args <> semi
- else empty,
+ else empty,
+ if is_fun_case then mb_tag_node arity else empty,
text "jump " <> text jump <> semi
]) $$
text "}"
@@ -294,9 +303,10 @@ genMkPAP regstatus macro jump ticker disamb
-- text "TICK_SLOW_CALL_" <> text ticker <> text "_CORRECT();",
reg_doc,
text "Sp_adj(" <> int sp' <> text ");",
- if is_pap
- then text "R2 = " <> fun_info_label <> semi
- else empty,
+ if is_pap
+ then text "R2 = " <> fun_info_label <> semi
+ else empty,
+ if is_fun_case then mb_tag_node n_args else empty,
text "jump " <> text jump <> semi
])
@@ -319,6 +329,15 @@ genMkPAP regstatus macro jump ticker disamb
nest 4 (vcat [
-- text "TICK_SLOW_CALL_" <> text ticker <> text "_TOO_FEW();",
save_regs,
+ -- Before building the PAP, tag the function closure pointer
+ if is_fun_case then
+ vcat [
+ text "if (arity < " <> int tAG_BITS_MAX <> text ") {",
+ text " R1 = R1 + arity" <> semi,
+ text "}"
+ ]
+ else empty
+ ,
text macro <> char '(' <> int n_args <> comma <>
int all_args_size <>
text "," <> fun_info_label <>
@@ -332,6 +351,66 @@ genMkPAP regstatus macro jump ticker disamb
= assignRegs regstatus stk_args_slow_offset args
-- BUILD_PAP assumes args start at offset 1
+-- --------------------------------------
+-- Examine tag bits of function pointer and enter it
+-- directly if needed.
+-- TODO: remove the redundant case in the original code.
+enterFastPath regstatus no_load_regs args_in_regs args
+ | Just tag <- tagForArity (length args)
+ = enterFastPathHelper tag regstatus no_load_regs args_in_regs args
+enterFastPath _ _ _ _ = empty
+
+-- Copied from Constants.lhs & CgUtils.hs, i'd rather have this imported:
+-- (arity,tag)
+tAG_BITS = (TAG_BITS :: Int)
+tAG_BITS_MAX = ((1 `shiftL` tAG_BITS) :: Int)
+
+tagForArity :: Int -> Maybe Int
+tagForArity i | i < tAG_BITS_MAX = Just i
+ | otherwise = Nothing
+
+enterFastPathHelper tag regstatus no_load_regs args_in_regs args =
+ vcat [text "if (GETTAG(R1)==" <> int tag <> text ") {",
+ reg_doc,
+ text " Sp_adj(" <> int sp' <> text ");",
+ -- enter, but adjust offset with tag
+ text " jump " <> text "%GET_ENTRY(R1-" <> int tag <> text ");",
+ text "}"
+ ]
+ -- I don't totally understand this code, I copied it from
+ -- exact_arity_case
+ -- TODO: refactor
+ where
+ -- offset of arguments on the stack at slow apply calls.
+ stk_args_slow_offset = 1
+
+ stk_args_offset
+ | args_in_regs = 0
+ | otherwise = stk_args_slow_offset
+
+ (reg_doc, sp')
+ | no_load_regs || args_in_regs = (empty, stk_args_offset)
+ | otherwise = loadRegArgs regstatus stk_args_offset args
+
+tickForArity arity
+ | True
+ = empty
+ | Just tag <- tagForArity arity
+ = vcat [
+ text "W_[TOTAL_CALLS] = W_[TOTAL_CALLS] + 1;",
+ text "W_[SLOW_CALLS_" <> int arity <> text "] = W_[SLOW_CALLS_" <> int arity <> text "] + 1;",
+ text "if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == " <> int arity <> text " ) {",
+ text " W_[RIGHT_ARITY_" <> int arity <> text "] = W_[RIGHT_ARITY_" <> int arity <> text "] + 1;",
+ text " if (GETTAG(R1)==" <> int tag <> text ") {",
+ text " W_[TAGGED_PTR_" <> int arity <> text "] = W_[TAGGED_PTR_" <> int arity <> text "] + 1;",
+ text " } else {",
+ -- force a halt when not tagged!
+-- text " W_[0]=0;",
+ text " }",
+ text "}"
+ ]
+tickForArity _ = text "W_[TOTAL_CALLS] = W_[TOTAL_CALLS] + 1;"
+
-- -----------------------------------------------------------------------------
-- generate an apply function
@@ -388,6 +467,7 @@ genApply regstatus args =
-- print " [IND_OLDGEN_PERM] &&ind_lbl"
-- print " };"
+ tickForArity (length args),
text "",
text "IF_DEBUG(apply,foreign \"C\" debugBelch(\"" <> fun_ret_label <>
text "... \"); foreign \"C\" printClosure(R1 \"ptr\"));",
@@ -411,6 +491,12 @@ genApply regstatus args =
vcat (do_assert args 1),
text "again:",
+
+ -- if pointer is tagged enter it fast!
+ enterFastPath regstatus False False args,
+
+ -- Functions can be tagged, so we untag them!
+ text "R1 = UNTAG(R1);",
text "info = %INFO_PTR(R1);",
-- if fast == 1:
@@ -428,7 +514,7 @@ genApply regstatus args =
text "ASSERT(arity > 0);",
genMkPAP regstatus "BUILD_PAP" "ENTRY_LBL(stg_BCO)" "FUN" "BCO"
True{-stack apply-} False{-args on stack-} False{-not a PAP-}
- args all_args_size fun_info_label
+ args all_args_size fun_info_label {- tag stmt -}False
]),
text "}",
@@ -445,9 +531,9 @@ genApply regstatus args =
nest 4 (vcat [
text "arity = TO_W_(StgFunInfoExtra_arity(%FUN_INFO(info)));",
text "ASSERT(arity > 0);",
- genMkPAP regstatus "BUILD_PAP" "%GET_ENTRY(R1)" "FUN" "FUN"
+ genMkPAP regstatus "BUILD_PAP" "%GET_ENTRY(UNTAG(R1))" "FUN" "FUN"
False{-reg apply-} False{-args on stack-} False{-not a PAP-}
- args all_args_size fun_info_label
+ args all_args_size fun_info_label {- tag stmt -}True
]),
text "}",
@@ -461,7 +547,7 @@ genApply regstatus args =
text "ASSERT(arity > 0);",
genMkPAP regstatus "NEW_PAP" "stg_PAP_apply" "PAP" "PAP"
True{-stack apply-} False{-args on stack-} True{-is a PAP-}
- args all_args_size fun_info_label
+ args all_args_size fun_info_label {- tag stmt -}False
]),
text "}",
@@ -506,6 +592,7 @@ genApply regstatus args =
text " IND_OLDGEN_PERM: {",
nest 4 (vcat [
text "R1 = StgInd_indirectee(R1);",
+ -- An indirection node might contain a tagged pointer
text "goto again;"
]),
text "}",
@@ -541,6 +628,14 @@ genApplyFast regstatus args =
nest 4 (vcat [
text "W_ info;",
text "W_ arity;",
+
+ tickForArity (length args),
+
+ -- if pointer is tagged enter it fast!
+ enterFastPath regstatus False True args,
+
+ -- Functions can be tagged, so we untag them!
+ text "R1 = UNTAG(R1);",
text "info = %GET_STD_INFO(R1);",
text "switch [INVALID_OBJECT .. N_CLOSURE_TYPES] (TO_W_(%INFO_TYPE(info))) {",
nest 4 (vcat [
@@ -554,9 +649,9 @@ genApplyFast regstatus args =
nest 4 (vcat [
text "arity = TO_W_(StgFunInfoExtra_arity(%GET_FUN_INFO(R1)));",
text "ASSERT(arity > 0);",
- genMkPAP regstatus "BUILD_PAP" "%GET_ENTRY(R1)" "FUN" "FUN"
+ genMkPAP regstatus "BUILD_PAP" "%GET_ENTRY(UNTAG(R1))" "FUN" "FUN"
False{-reg apply-} True{-args in regs-} False{-not a PAP-}
- args all_args_size fun_info_label
+ args all_args_size fun_info_label {- tag stmt -}True
]),
char '}',
@@ -607,7 +702,7 @@ genStackApply regstatus args =
(assign_regs, sp') = loadRegArgs regstatus 0 args
body = vcat [assign_regs,
text "Sp_adj" <> parens (int sp') <> semi,
- text "jump %GET_ENTRY(R1);"
+ text "jump %GET_ENTRY(UNTAG(R1));"
]
-- -----------------------------------------------------------------------------