diff options
author | Simon Marlow <marlowsd@gmail.com> | 2012-10-03 09:30:56 +0100 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2012-10-08 09:04:40 +0100 |
commit | a7c0387d20c1c9994d1100b14fbb8fb4e28a259e (patch) | |
tree | b95d0a512f951a4a463f1aa5178b0cd5c4fdb410 | |
parent | aed37acd4d157791381800d5de960a2461bcbef3 (diff) | |
download | haskell-a7c0387d20c1c9994d1100b14fbb8fb4e28a259e.tar.gz |
Produce new-style Cmm from the Cmm parser
The main change here is that the Cmm parser now allows high-level cmm
code with argument-passing and function calls. For example:
foo ( gcptr a, bits32 b )
{
if (b > 0) {
// we can make tail calls passing arguments:
jump stg_ap_0_fast(a);
}
return (x,y);
}
More details on the new cmm syntax are in Note [Syntax of .cmm files]
in CmmParse.y.
The old syntax is still more-or-less supported for those occasional
code fragments that really need to explicitly manipulate the stack.
However there are a couple of differences: it is now obligatory to
give a list of live GlobalRegs on every jump, e.g.
jump %ENTRY_CODE(Sp(0)) [R1];
Again, more details in Note [Syntax of .cmm files].
I have rewritten most of the .cmm files in the RTS into the new
syntax, except for AutoApply.cmm which is generated by the genapply
program: this file could be generated in the new syntax instead and
would probably be better off for it, but I ran out of enthusiasm.
Some other changes in this batch:
- The PrimOp calling convention is gone, primops now use the ordinary
NativeNodeCall convention. This means that primops and "foreign
import prim" code must be written in high-level cmm, but they can
now take more than 10 arguments.
- CmmSink now does constant-folding (should fix #7219)
- .cmm files now go through the cmmPipeline, and as a result we
generate better code in many cases. All the object files generated
for the RTS .cmm files are now smaller. Performance should be
better too, but I haven't measured it yet.
- RET_DYN frames are removed from the RTS, lots of code goes away
- we now have some more canned GC points to cover unboxed-tuples with
2-4 pointers, which will reduce code size a little.
85 files changed, 3306 insertions, 3613 deletions
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs index a5d559e9ff..04312321cc 100644 --- a/compiler/cmm/CLabel.hs +++ b/compiler/cmm/CLabel.hs @@ -72,7 +72,7 @@ module CLabel ( mkCmmRetLabel, mkCmmCodeLabel, mkCmmDataLabel, - mkCmmGcPtrLabel, + mkCmmClosureLabel, mkRtsApFastLabel, @@ -331,7 +331,7 @@ data CmmLabelInfo | CmmRet -- ^ misc rts return points, suffix _ret | CmmData -- ^ misc rts data bits, eg CHARLIKE_closure | CmmCode -- ^ misc rts code - | CmmGcPtr -- ^ GcPtrs eg CHARLIKE_closure + | CmmClosure -- ^ closures eg CHARLIKE_closure | CmmPrimCall -- ^ a prim call to some hand written Cmm code deriving (Eq, Ord) @@ -418,7 +418,7 @@ mkCAFBlackHoleEntryLabel = CmmLabel rtsPackageId (fsLit "stg_CAF_BLACKHOL ----- mkCmmInfoLabel, mkCmmEntryLabel, mkCmmRetInfoLabel, mkCmmRetLabel, - mkCmmCodeLabel, mkCmmDataLabel, mkCmmGcPtrLabel + mkCmmCodeLabel, mkCmmDataLabel, mkCmmClosureLabel :: PackageId -> FastString -> CLabel mkCmmInfoLabel pkg str = CmmLabel pkg str CmmInfo @@ -427,7 +427,7 @@ mkCmmRetInfoLabel pkg str = CmmLabel pkg str CmmRetInfo mkCmmRetLabel pkg str = CmmLabel pkg str CmmRet mkCmmCodeLabel pkg str = CmmLabel pkg str CmmCode mkCmmDataLabel pkg str = CmmLabel pkg str CmmData -mkCmmGcPtrLabel pkg str = CmmLabel pkg str CmmGcPtr +mkCmmClosureLabel pkg str = CmmLabel pkg str CmmClosure -- Constructing RtsLabels @@ -543,6 +543,7 @@ mkPlainModuleInitLabel mod = PlainModuleInitLabel mod toClosureLbl :: CLabel -> CLabel toClosureLbl (IdLabel n c _) = IdLabel n c Closure +toClosureLbl (CmmLabel m str _) = CmmLabel m str CmmClosure toClosureLbl l = pprPanic "toClosureLbl" (ppr l) toSlowEntryLbl :: CLabel -> CLabel @@ -774,7 +775,7 @@ isGcPtrLabel lbl = case labelType lbl of -- whether it be code, data, or static GC object. labelType :: CLabel -> CLabelType labelType (CmmLabel _ _ CmmData) = DataLabel -labelType (CmmLabel _ _ CmmGcPtr) = GcPtrLabel +labelType (CmmLabel _ _ CmmClosure) = GcPtrLabel labelType (CmmLabel _ _ CmmCode) = CodeLabel labelType (CmmLabel _ _ CmmInfo) = DataLabel labelType (CmmLabel _ _ CmmEntry) = CodeLabel @@ -1001,7 +1002,6 @@ pprCLbl (LargeBitmapLabel u) = text "b" <> pprUnique u <> pp_cSEP <> ptext (sLi pprCLbl (CmmLabel _ str CmmCode) = ftext str pprCLbl (CmmLabel _ str CmmData) = ftext str -pprCLbl (CmmLabel _ str CmmGcPtr) = ftext str pprCLbl (CmmLabel _ str CmmPrimCall) = ftext str pprCLbl (RtsLabel (RtsApFast str)) = ftext str <> ptext (sLit "_fast") @@ -1046,6 +1046,9 @@ pprCLbl (CmmLabel _ fs CmmRetInfo) pprCLbl (CmmLabel _ fs CmmRet) = ftext fs <> ptext (sLit "_ret") +pprCLbl (CmmLabel _ fs CmmClosure) + = ftext fs <> ptext (sLit "_closure") + pprCLbl (RtsLabel (RtsPrimOp primop)) = ptext (sLit "stg_") <> ppr primop diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index 2dedee0d52..8409f0dbeb 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -109,9 +109,14 @@ data CmmStackInfo -- number of bytes of arguments on the stack on entry to the -- the proc. This is filled in by StgCmm.codeGen, and used -- by the stack allocator later. - updfr_space :: Maybe ByteOff + updfr_space :: Maybe ByteOff, -- XXX: this never contains anything useful, but it should. -- See comment in CmmLayoutStack. + do_layout :: Bool + -- Do automatic stack layout for this proc. This is + -- True for all code generated by the code generator, + -- but is occasionally False for hand-written Cmm where + -- we want to do the stack manipulation manually. } -- | Info table as a haskell data type diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs index ecaab57d76..304f4c2170 100644 --- a/compiler/cmm/CmmBuildInfoTables.hs +++ b/compiler/cmm/CmmBuildInfoTables.hs @@ -235,8 +235,8 @@ to_SRT dflags top_srt off len bmp tbl = CmmData RelocatableReadOnlyData $ Statics srt_desc_lbl $ map CmmStaticLit ( cmmLabelOffW dflags top_srt off - : mkWordCLit dflags (toStgWord dflags (fromIntegral len)) - : map (mkWordCLit dflags) bmp) + : mkWordCLit dflags (fromIntegral len) + : map (mkStgWordCLit dflags) bmp) return (Just tbl, C_SRT srt_desc_lbl 0 (srtEscape dflags)) | otherwise = return (Nothing, C_SRT top_srt off (toStgHalfWord dflags (fromStgWord (head bmp)))) @@ -252,7 +252,8 @@ localCAFInfo :: CAFEnv -> CmmDecl -> (CAFSet, Maybe CLabel) localCAFInfo _ (CmmData _ _) = (Set.empty, Nothing) localCAFInfo cafEnv proc@(CmmProc _ top_l (CmmGraph {g_entry=entry})) = case topInfoTable proc of - Just (CmmInfoTable { cit_rep = rep }) | not (isStaticRep rep) + Just (CmmInfoTable { cit_rep = rep }) + | not (isStaticRep rep) && not (isStackRep rep) -> (cafs, Just (toClosureLbl top_l)) _other -> (cafs, Nothing) where diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs index 235fe7f911..180b2d7eab 100644 --- a/compiler/cmm/CmmCallConv.hs +++ b/compiler/cmm/CmmCallConv.hs @@ -8,7 +8,8 @@ module CmmCallConv ( ParamLocation(..), assignArgumentsPos, - globalArgRegs + assignStack, + globalArgRegs, realArgRegs ) where #include "HsVersions.h" @@ -18,7 +19,6 @@ import SMRep import Cmm (Convention(..)) import PprCmm () -import qualified Data.List as L import DynFlags import Outputable @@ -33,15 +33,22 @@ instance Outputable ParamLocation where ppr (RegisterParam g) = ppr g ppr (StackParam p) = ppr p --- | JD: For the new stack story, I want arguments passed on the stack to manifest as --- positive offsets in a CallArea, not negative offsets from the stack pointer. --- Also, I want byte offsets, not word offsets. -assignArgumentsPos :: DynFlags -> Convention -> (a -> CmmType) -> [a] -> - [(a, ParamLocation)] +-- | -- Given a list of arguments, and a function that tells their types, -- return a list showing where each argument is passed -assignArgumentsPos dflags conv arg_ty reps = assignments - where -- The calling conventions (CgCallConv.hs) are complicated, to say the least +-- +assignArgumentsPos :: DynFlags + -> ByteOff -- stack offset to start with + -> Convention + -> (a -> CmmType) -- how to get a type from an arg + -> [a] -- args + -> ( + ByteOff -- bytes of stack args + , [(a, ParamLocation)] -- args and locations + ) + +assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) + where regs = case (reps, conv) of (_, NativeNodeCall) -> getRegsWithNode dflags (_, NativeDirectCall) -> getRegsWithoutNode dflags @@ -49,23 +56,14 @@ assignArgumentsPos dflags conv arg_ty reps = assignments (_, NativeReturn) -> getRegsWithNode dflags -- GC calling convention *must* put values in registers (_, GC) -> allRegs dflags - (_, PrimOpCall) -> allRegs dflags - ([_], PrimOpReturn) -> allRegs dflags - (_, PrimOpReturn) -> getRegsWithNode dflags (_, Slow) -> noRegs -- The calling conventions first assign arguments to registers, -- then switch to the stack when we first run out of registers - -- (even if there are still available registers for args of a different type). - -- When returning an unboxed tuple, we also separate the stack - -- arguments by pointerhood. - (reg_assts, stk_args) = assign_regs [] reps regs - stk_args' = case conv of NativeReturn -> part - PrimOpReturn -> part - GC | length stk_args /= 0 -> panic "Failed to allocate registers for GC call" - _ -> stk_args - where part = uncurry (++) - (L.partition (not . isGcPtrType . arg_ty) stk_args) - stk_assts = assign_stk 0 [] (reverse stk_args') + -- (even if there are still available registers for args of a + -- different type). When returning an unboxed tuple, we also + -- separate the stack arguments by pointerhood. + (reg_assts, stk_args) = assign_regs [] reps regs + (stk_off, stk_assts) = assignStack dflags off arg_ty stk_args assignments = reg_assts ++ stk_assts assign_regs assts [] _ = (assts, []) @@ -88,11 +86,21 @@ assignArgumentsPos dflags conv arg_ty reps = assignments gcp | isGcPtrType ty = VGcPtr | otherwise = VNonGcPtr - assign_stk _ assts [] = assts - assign_stk offset assts (r:rs) = assign_stk off' ((r, StackParam off') : assts) rs + +assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a] + -> ( + ByteOff -- bytes of stack args + , [(a, ParamLocation)] -- args and locations + ) +assignStack dflags offset arg_ty args = assign_stk offset [] (reverse args) + where + assign_stk offset assts [] = (offset, assts) + assign_stk offset assts (r:rs) + = assign_stk off' ((r, StackParam off') : assts) rs where w = typeWidth (arg_ty r) - size = (((widthInBytes w - 1) `div` wORD_SIZE dflags) + 1) * wORD_SIZE dflags + size = (((widthInBytes w - 1) `div` word_size) + 1) * word_size off' = offset + size + word_size = wORD_SIZE dflags ----------------------------------------------------------------------------- -- Local information about the registers available @@ -158,3 +166,9 @@ globalArgRegs dflags = map ($ VGcPtr) (allVanillaRegs dflags) ++ allFloatRegs dflags ++ allDoubleRegs dflags ++ allLongRegs dflags + +realArgRegs :: DynFlags -> [GlobalReg] +realArgRegs dflags = map ($VGcPtr) (realVanillaRegs dflags) ++ + realFloatRegs dflags ++ + realDoubleRegs dflags ++ + realLongRegs dflags diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index aa2925fe53..4028efddf6 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -97,15 +97,17 @@ cmmCfgOptsProc _ top = top blockConcat :: Bool -> CmmGraph -> (CmmGraph, BlockEnv BlockId) blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } - = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map) + = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map') where - -- we might be able to shortcut the entry BlockId itself - new_entry + -- we might be able to shortcut the entry BlockId itself. + -- remember to update the shortcut_map', since we also have to + -- update the info_tbls mapping now. + (new_entry, shortcut_map') | Just entry_blk <- mapLookup entry_id new_blocks , Just dest <- canShortcut entry_blk - = dest + = (dest, mapInsert entry_id dest shortcut_map) | otherwise - = entry_id + = (entry_id, shortcut_map) blocks = postorderDfs g blockmap = foldr addBlock emptyBody blocks diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs index cd838821b3..017d120d84 100644 --- a/compiler/cmm/CmmCvt.hs +++ b/compiler/cmm/CmmCvt.hs @@ -22,19 +22,23 @@ cmmOfZgraph tops = map mapTop tops where mapTop (CmmProc h l g) = CmmProc (info_tbls h) l (ofZgraph g) mapTop (CmmData s ds) = CmmData s ds -data ValueDirection = Arguments | Results +add_hints :: [a] -> [ForeignHint] -> [Old.CmmHinted a] +add_hints args hints = zipWith Old.CmmHinted args hints -add_hints :: ForeignTarget -> ValueDirection -> [a] -> [Old.CmmHinted a] -add_hints conv vd args = zipWith Old.CmmHinted args (get_hints conv vd) - -get_hints :: ForeignTarget -> ValueDirection -> [ForeignHint] -get_hints (ForeignTarget _ (ForeignConvention _ hints _)) Arguments = hints -get_hints (ForeignTarget _ (ForeignConvention _ _ hints)) Results = hints -get_hints (PrimTarget _) _vd = repeat NoHint +get_hints :: ForeignTarget -> ([ForeignHint], [ForeignHint]) +get_hints (PrimTarget op) = (res_hints ++ repeat NoHint, + arg_hints ++ repeat NoHint) + where (res_hints, arg_hints) = callishMachOpHints op +get_hints (ForeignTarget _ (ForeignConvention _ arg_hints res_hints _)) + = (res_hints, arg_hints) cmm_target :: ForeignTarget -> Old.CmmCallTarget cmm_target (PrimTarget op) = Old.CmmPrim op Nothing -cmm_target (ForeignTarget e (ForeignConvention cc _ _)) = Old.CmmCallee e cc +cmm_target (ForeignTarget e (ForeignConvention cc _ _ _)) = Old.CmmCallee e cc + +get_ret :: ForeignTarget -> CmmReturnInfo +get_ret (PrimTarget _) = CmmMayReturn +get_ret (ForeignTarget _ (ForeignConvention _ _ _ ret)) = ret ofZgraph :: CmmGraph -> Old.ListGraph Old.CmmStmt ofZgraph g = Old.ListGraph $ mapMaybe convert_block $ postorderDfs g @@ -83,11 +87,14 @@ ofZgraph g = Old.ListGraph $ mapMaybe convert_block $ postorderDfs g CmmAssign l r -> Old.CmmAssign l r CmmStore l r -> Old.CmmStore l r CmmUnsafeForeignCall (PrimTarget MO_Touch) _ _ -> Old.CmmNop - CmmUnsafeForeignCall target ress args -> + CmmUnsafeForeignCall target ress args -> Old.CmmCall (cmm_target target) - (add_hints target Results ress) - (add_hints target Arguments args) - Old.CmmMayReturn + (add_hints ress res_hints) + (add_hints args arg_hints) + (get_ret target) + where + (res_hints, arg_hints) = get_hints target + last :: CmmNode O C -> () -> [Old.CmmStmt] last node _ = stmts diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs index 6aa4d6cbfa..dec6b5d09d 100644 --- a/compiler/cmm/CmmInfo.hs +++ b/compiler/cmm/CmmInfo.hs @@ -155,7 +155,7 @@ type InfoTableContents = ( [CmmLit] -- The standard part mkInfoTableContents :: DynFlags -> CmmInfoTable - -> Maybe StgHalfWord -- Override default RTS type tag? + -> Maybe Int -- Override default RTS type tag? -> UniqSM ([RawCmmDecl], -- Auxiliary top decls InfoTableContents) -- Info tbl + extra bits @@ -178,22 +178,19 @@ mkInfoTableContents dflags ; let std_info = mkStdInfoTable dflags prof_lits rts_tag srt_bitmap liveness_lit rts_tag | Just tag <- mb_rts_tag = tag - | null liveness_data = rET_SMALL dflags -- Fits in extra_bits - | otherwise = rET_BIG dflags -- Does not; extra_bits is - -- a label + | null liveness_data = rET_SMALL -- Fits in extra_bits + | otherwise = rET_BIG -- Does not; extra_bits is + -- a label ; return (prof_data ++ liveness_data, (std_info, srt_label)) } | HeapRep _ ptrs nonptrs closure_type <- smrep - = do { let layout = packHalfWordsCLit - dflags - (toStgHalfWord dflags (toInteger ptrs)) - (toStgHalfWord dflags (toInteger nonptrs)) + = do { let layout = packIntsCLit dflags ptrs nonptrs ; (prof_lits, prof_data) <- mkProfLits dflags prof ; let (srt_label, srt_bitmap) = mkSRTLit dflags srt ; (mb_srt_field, mb_layout, extra_bits, ct_data) <- mk_pieces closure_type srt_label ; let std_info = mkStdInfoTable dflags prof_lits - (mb_rts_tag `orElse` rtsClosureType dflags smrep) + (mb_rts_tag `orElse` rtsClosureType smrep) (mb_srt_field `orElse` srt_bitmap) (mb_layout `orElse` layout) ; return (prof_data ++ ct_data, (std_info, extra_bits)) } @@ -205,24 +202,25 @@ mkInfoTableContents dflags , [RawCmmDecl]) -- Auxiliary data decls mk_pieces (Constr con_tag con_descr) _no_srt -- A data constructor = do { (descr_lit, decl) <- newStringLit con_descr - ; return (Just con_tag, Nothing, [descr_lit], [decl]) } + ; return ( Just (toStgHalfWord dflags (fromIntegral con_tag)) + , Nothing, [descr_lit], [decl]) } mk_pieces Thunk srt_label = return (Nothing, Nothing, srt_label, []) mk_pieces (ThunkSelector offset) _no_srt - = return (Just (toStgHalfWord dflags 0), Just (mkWordCLit dflags offset), [], []) + = return (Just (toStgHalfWord dflags 0), Just (mkWordCLit dflags (fromIntegral offset)), [], []) -- Layout known (one free var); we use the layout field for offset mk_pieces (Fun arity (ArgSpec fun_type)) srt_label - = do { let extra_bits = packHalfWordsCLit dflags fun_type arity : srt_label + = do { let extra_bits = packIntsCLit dflags fun_type arity : srt_label ; return (Nothing, Nothing, extra_bits, []) } mk_pieces (Fun arity (ArgGen arg_bits)) srt_label = do { (liveness_lit, liveness_data) <- mkLivenessBits dflags arg_bits - ; let fun_type | null liveness_data = aRG_GEN dflags - | otherwise = aRG_GEN_BIG dflags - extra_bits = [ packHalfWordsCLit dflags fun_type arity + ; let fun_type | null liveness_data = aRG_GEN + | otherwise = aRG_GEN_BIG + extra_bits = [ packIntsCLit dflags fun_type arity , srt_lit, liveness_lit, slow_entry ] ; return (Nothing, Nothing, extra_bits, liveness_data) } where @@ -233,9 +231,14 @@ mkInfoTableContents dflags mk_pieces BlackHole _ = panic "mk_pieces: BlackHole" - mkInfoTableContents _ _ _ = panic "mkInfoTableContents" -- NonInfoTable dealt with earlier +packIntsCLit :: DynFlags -> Int -> Int -> CmmLit +packIntsCLit dflags a b = packHalfWordsCLit dflags + (toStgHalfWord dflags (fromIntegral a)) + (toStgHalfWord dflags (fromIntegral b)) + + mkSRTLit :: DynFlags -> C_SRT -> ([CmmLit], -- srt_label, if any @@ -314,7 +317,7 @@ mkLivenessBits dflags liveness [mkRODataLits bitmap_lbl lits]) } | otherwise -- Fits in one word - = return (mkWordCLit dflags bitmap_word, []) + = return (mkStgWordCLit dflags bitmap_word, []) where n_bits = length liveness @@ -328,7 +331,8 @@ mkLivenessBits dflags liveness bitmap_word = toStgWord dflags (fromIntegral n_bits) .|. (small_bitmap `shiftL` bITMAP_BITS_SHIFT dflags) - lits = mkWordCLit dflags (toStgWord dflags (fromIntegral n_bits)) : map (mkWordCLit dflags) bitmap + lits = mkWordCLit dflags (fromIntegral n_bits) + : map (mkStgWordCLit dflags) bitmap -- The first word is the size. The structure must match -- StgLargeBitmap in includes/rts/storage/InfoTable.h @@ -348,8 +352,8 @@ mkLivenessBits dflags liveness mkStdInfoTable :: DynFlags -> (CmmLit,CmmLit) -- Closure type descr and closure descr (profiling) - -> StgHalfWord -- Closure RTS tag - -> StgHalfWord -- SRT length + -> Int -- Closure RTS tag + -> StgHalfWord -- SRT length -> CmmLit -- layout field -> [CmmLit] @@ -365,7 +369,7 @@ mkStdInfoTable dflags (type_descr, closure_descr) cl_type srt_len layout_lit | dopt Opt_SccProfilingOn dflags = [type_descr, closure_descr] | otherwise = [] - type_lit = packHalfWordsCLit dflags cl_type srt_len + type_lit = packHalfWordsCLit dflags (toStgHalfWord dflags (fromIntegral cl_type)) srt_len ------------------------------------------------------------------------- -- diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs index 6f75f5451c..5c4045778a 100644 --- a/compiler/cmm/CmmLayoutStack.hs +++ b/compiler/cmm/CmmLayoutStack.hs @@ -933,7 +933,7 @@ lowerSafeForeignCall dflags block (ret_args, regs, copyout) = copyOutOflow dflags NativeReturn Jump (Young succ) (map (CmmReg . CmmLocal) res) - updfr (0, []) + updfr [] -- NB. after resumeThread returns, the top-of-stack probably contains -- the stack frame for succ, but it might not: if the current thread @@ -973,14 +973,14 @@ callSuspendThread :: DynFlags -> LocalReg -> Bool -> CmmNode O O callSuspendThread dflags id intrbl = CmmUnsafeForeignCall (ForeignTarget (foreignLbl (fsLit "suspendThread")) - (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint])) + (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn)) [id] [CmmReg (CmmGlobal BaseReg), mkIntExpr dflags (fromEnum intrbl)] callResumeThread :: LocalReg -> LocalReg -> CmmNode O O callResumeThread new_base id = CmmUnsafeForeignCall (ForeignTarget (foreignLbl (fsLit "resumeThread")) - (ForeignConvention CCallConv [AddrHint] [AddrHint])) + (ForeignConvention CCallConv [AddrHint] [AddrHint] CmmMayReturn)) [new_base] [CmmReg (CmmLocal id)] -- ----------------------------------------------------------------------------- diff --git a/compiler/cmm/CmmLex.x b/compiler/cmm/CmmLex.x index ddd681d25e..718eb27c82 100644 --- a/compiler/cmm/CmmLex.x +++ b/compiler/cmm/CmmLex.x @@ -23,9 +23,9 @@ module CmmLex ( CmmToken(..), cmmlex, ) where -import OldCmm -import Lexer +import CmmExpr +import Lexer import SrcLoc import UniqFM import StringBuffer @@ -147,6 +147,7 @@ data CmmToken | CmmT_align | CmmT_goto | CmmT_if + | CmmT_call | CmmT_jump | CmmT_foreign | CmmT_never @@ -157,6 +158,7 @@ data CmmToken | CmmT_switch | CmmT_case | CmmT_default + | CmmT_push | CmmT_bits8 | CmmT_bits16 | CmmT_bits32 @@ -224,8 +226,9 @@ reservedWordsFM = listToUFM $ ( "align", CmmT_align ), ( "goto", CmmT_goto ), ( "if", CmmT_if ), - ( "jump", CmmT_jump ), - ( "foreign", CmmT_foreign ), + ( "call", CmmT_call ), + ( "jump", CmmT_jump ), + ( "foreign", CmmT_foreign ), ( "never", CmmT_never ), ( "prim", CmmT_prim ), ( "return", CmmT_return ), @@ -233,8 +236,9 @@ reservedWordsFM = listToUFM $ ( "import", CmmT_import ), ( "switch", CmmT_switch ), ( "case", CmmT_case ), - ( "default", CmmT_default ), - ( "bits8", CmmT_bits8 ), + ( "default", CmmT_default ), + ( "push", CmmT_push ), + ( "bits8", CmmT_bits8 ), ( "bits16", CmmT_bits16 ), ( "bits32", CmmT_bits32 ), ( "bits64", CmmT_bits64 ), diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index 520c7e7a7d..c00cdb5b5a 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -16,7 +16,7 @@ module CmmMachOp , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64 -- CallishMachOp - , CallishMachOp(..) + , CallishMachOp(..), callishMachOpHints , pprCallishMachOp ) where @@ -463,3 +463,10 @@ data CallishMachOp pprCallishMachOp :: CallishMachOp -> SDoc pprCallishMachOp mo = text (show mo) +callishMachOpHints :: CallishMachOp -> ([ForeignHint], [ForeignHint]) +callishMachOpHints op = case op of + MO_Memcpy -> ([], [AddrHint,AddrHint,NoHint,NoHint]) + MO_Memset -> ([], [AddrHint,NoHint,NoHint,NoHint]) + MO_Memmove -> ([], [AddrHint,AddrHint,NoHint,NoHint]) + _ -> ([],[]) + -- empty lists indicate NoHint diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs index ae7ac091de..b7bb270bd6 100644 --- a/compiler/cmm/CmmNode.hs +++ b/compiler/cmm/CmmNode.hs @@ -9,8 +9,9 @@ -- for details module CmmNode ( - CmmNode(..), ForeignHint(..), CmmFormal, CmmActual, + CmmNode(..), CmmFormal, CmmActual, UpdFrameOffset, Convention(..), ForeignConvention(..), ForeignTarget(..), + CmmReturnInfo(..), mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf, mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors ) where @@ -228,14 +229,31 @@ type CmmFormal = LocalReg type UpdFrameOffset = ByteOff +-- | A convention maps a list of values (function arguments or return +-- values) to registers or stack locations. data Convention - = NativeDirectCall -- Native C-- call skipping the node (closure) argument - | NativeNodeCall -- Native C-- call including the node argument - | NativeReturn -- Native C-- return - | Slow -- Slow entry points: all args pushed on the stack - | GC -- Entry to the garbage collector: uses the node reg! - | PrimOpCall -- Calling prim ops - | PrimOpReturn -- Returning from prim ops + = NativeDirectCall + -- ^ top-level Haskell functions use @NativeDirectCall@, which + -- maps arguments to registers starting with R2, according to + -- how many registers are available on the platform. This + -- convention ignores R1, because for a top-level function call + -- the function closure is implicit, and doesn't need to be passed. + | NativeNodeCall + -- ^ non-top-level Haskell functions, which pass the address of + -- the function closure in R1 (regardless of whether R1 is a + -- real register or not), and the rest of the arguments in + -- registers or on the stack. + | NativeReturn + -- ^ a native return. The convention for returns depends on + -- how many values are returned: for just one value returned, + -- the appropriate register is used (R1, F1, etc.). regardless + -- of whether it is a real register or not. For multiple + -- values returned, they are mapped to registers or the stack. + | Slow + -- ^ Slow entry points: all args pushed on the stack + | GC + -- ^ Entry to the garbage collector: uses the node reg! + -- (TODO: I don't think we need this --SDM) deriving( Eq ) data ForeignConvention @@ -243,8 +261,14 @@ data ForeignConvention CCallConv -- Which foreign-call convention [ForeignHint] -- Extra info about the args [ForeignHint] -- Extra info about the result + CmmReturnInfo deriving Eq +data CmmReturnInfo + = CmmMayReturn + | CmmNeverReturns + deriving ( Eq ) + data ForeignTarget -- The target of a foreign call = ForeignTarget -- A foreign procedure CmmExpr -- Its address @@ -253,12 +277,6 @@ data ForeignTarget -- The target of a foreign call CallishMachOp -- Which one deriving Eq -data ForeignHint - = NoHint | AddrHint | SignedHint - deriving( Eq ) - -- Used to give extra per-argument or per-result - -- information needed by foreign calling conventions - -------------------------------------------------- -- Instances of register and slot users / definers diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index 32afa1d078..843626303a 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -14,6 +14,7 @@ module CmmOpt ( #include "HsVersions.h" +import CmmUtils import OldCmm import DynFlags import CLabel @@ -184,22 +185,22 @@ cmmMachOpFoldM dflags mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], ar -- Make a RegOff if we can cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)] - = Just $ CmmRegOff reg (fromIntegral (narrowS rep n)) + = Just $ cmmRegOff reg (fromIntegral (narrowS rep n)) cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)] - = Just $ CmmRegOff reg (off + fromIntegral (narrowS rep n)) + = Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n)) cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)] - = Just $ CmmRegOff reg (- fromIntegral (narrowS rep n)) + = Just $ cmmRegOff reg (- fromIntegral (narrowS rep n)) cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)] - = Just $ CmmRegOff reg (off - fromIntegral (narrowS rep n)) + = Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n)) -- Fold label(+/-)offset into a CmmLit where possible -cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)] - = Just $ CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i))) -cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)] - = Just $ CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i))) -cmmMachOpFoldM _ (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)] - = Just $ CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i)))) +cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)] + = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i))) +cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit] + = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i))) +cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)] + = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i)))) -- Comparison of literal with widened operand: perform the comparison diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index 8c3559b774..22e28a8a9d 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -1,14 +1,160 @@ ----------------------------------------------------------------------------- -- --- (c) The University of Glasgow, 2004-2006 +-- (c) The University of Glasgow, 2004-2012 -- -- Parser for concrete Cmm. --- This doesn't just parse the Cmm file, we also do some code generation --- along the way for switches and foreign calls etc. -- ----------------------------------------------------------------------------- --- TODO: Add support for interruptible/uninterruptible foreign call specification +{- ----------------------------------------------------------------------------- +Note [Syntax of .cmm files] + +NOTE: You are very much on your own in .cmm. There is very little +error checking at all: + + * Type errors are detected by the (optional) -dcmm-lint pass, if you + don't turn this on then a type error will likely result in a panic + from the native code generator. + + * Passing the wrong number of arguments or arguments of the wrong + type is not detected. + +There are two ways to write .cmm code: + + (1) High-level Cmm code delegates the stack handling to GHC, and + never explicitly mentions Sp or registers. + + (2) Low-level Cmm manages the stack itself, and must know about + calling conventions. + +Whether you want high-level or low-level Cmm is indicated by the +presence of an argument list on a procedure. For example: + +foo ( gcptr a, bits32 b ) +{ + // this is high-level cmm code + + if (b > 0) { + // we can make tail calls passing arguments: + jump stg_ap_0_fast(a); + } + + push (stg_upd_frame_info, a) { + // stack frames can be explicitly pushed + + (x,y) = call wibble(a,b,3,4); + // calls pass arguments and return results using the native + // Haskell calling convention. The code generator will automatically + // construct a stack frame and an info table for the continuation. + + return (x,y); + // we can return multiple values from the current proc + } +} + +bar +{ + // this is low-level cmm code, indicated by the fact that we did not + // put an argument list on bar. + + x = R1; // the calling convention is explicit: better be careful + // that this works on all platforms! + + jump %ENTRY_CODE(Sp(0)) +} + +Here is a list of rules for high-level and low-level code. If you +break the rules, you get a panic (for using a high-level construct in +a low-level proc), or wrong code (when using low-level code in a +high-level proc). This stuff isn't checked! (TODO!) + +High-level only: + + - tail-calls with arguments, e.g. + jump stg_fun (arg1, arg2); + + - function calls: + (ret1,ret2) = call stg_fun (arg1, arg2); + + This makes a call with the NativeNodeCall convention, and the + values are returned to the following code using the NativeReturn + convention. + + - returning: + return (ret1, ret2) + + These use the NativeReturn convention to return zero or more + results to the caller. + + - pushing stack frames: + push (info_ptr, field1, ..., fieldN) { ... statements ... } + +Low-level only: + + - References to Sp, R1-R8, F1-F4 etc. + + NB. foreign calls may clobber the argument registers R1-R8, F1-F4 + etc., so ensure they are saved into variables around foreign + calls. + + - SAVE_THREAD_STATE() and LOAD_THREAD_STATE(), which modify Sp + directly. + +Both high-level and low-level code can use a raw tail-call: + + jump stg_fun [R1,R2] + +This always transfers control to a low-level Cmm function, but the +call can be made from high-level code. Arguments must be passed +explicitly in R/F/D/L registers. + +NB. you *must* specify the list of GlobalRegs that are passed via a +jump, otherwise the register allocator will assume that all the +GlobalRegs are dead at the jump. + + +A stack frame is written like this: + +INFO_TABLE_RET ( label, FRAME_TYPE, info_ptr, field1, ..., fieldN ) + return ( arg1, ..., argM ) +{ + ... code ... +} + +where field1 ... fieldN are the fields of the stack frame (with types) +arg1...argN are the values returned to the stack frame (with types). +The return values are assumed to be passed according to the +NativeReturn convention. + +On entry to the code, the stack frame looks like: + + |----------| + | fieldN | + | ... | + | field1 | + |----------| + | info_ptr | + |----------| + | argN | + | ... | <- Sp + +and some of the args may be in registers. + +We prepend the code by a copyIn of the args, and assign all the stack +frame fields to their formals. The initial "arg offset" for stack +layout purposes consists of the whole stack frame plus any args that +might be on the stack. + +A tail-call may pass a stack frame to the callee using the following +syntax: + +jump f (info_ptr, field1,..,fieldN) (arg1,..,argN) + +where info_ptr and field1..fieldN describe the stack frame, and +arg1..argN are the arguments passed to f using the NativeNodeCall +convention. + +----------------------------------------------------------------------------- -} { {-# LANGUAGE BangPatterns #-} -- required for versions of Happy before 1.18.6 @@ -21,30 +167,32 @@ module CmmParse ( parseCmmFile ) where -import CgMonad -import CgExtCode -import CgHeapery -import CgUtils -import CgProf -import CgTicky -import CgInfoTbls -import CgForeignCall -import CgTailCall -import CgStackery -import ClosureInfo -import CgCallConv -import CgClosure -import CostCentre - -import BlockId -import OldCmm -import OldPprCmm() +import StgCmmExtCode +import CmmCallConv +import StgCmmProf +import StgCmmHeap +import StgCmmMonad hiding ( getCode, getCodeR, emitLabel, emit, emitStore + , emitAssign, emitOutOfLine, withUpdFrameOff + , getUpdFrameOff ) +import qualified StgCmmMonad as F +import StgCmmUtils +import StgCmmForeign +import StgCmmExpr +import StgCmmClosure +import StgCmmLayout +import StgCmmTicky +import StgCmmBind ( emitBlackHoleCode, emitUpdateFrame ) + +import MkGraph +import Cmm import CmmUtils +import BlockId import CmmLex import CLabel import SMRep import Lexer +import CostCentre import ForeignCall import Module import Platform @@ -68,6 +216,7 @@ import Control.Monad import Data.Array import Data.Char ( ord ) import System.Exit +import Data.Maybe #include "HsVersions.h" } @@ -110,41 +259,43 @@ import System.Exit '&&' { L _ (CmmT_BoolAnd) } '||' { L _ (CmmT_BoolOr) } - 'CLOSURE' { L _ (CmmT_CLOSURE) } - 'INFO_TABLE' { L _ (CmmT_INFO_TABLE) } - 'INFO_TABLE_RET' { L _ (CmmT_INFO_TABLE_RET) } - 'INFO_TABLE_FUN' { L _ (CmmT_INFO_TABLE_FUN) } - 'INFO_TABLE_CONSTR' { L _ (CmmT_INFO_TABLE_CONSTR) } - 'INFO_TABLE_SELECTOR' { L _ (CmmT_INFO_TABLE_SELECTOR) } - 'else' { L _ (CmmT_else) } - 'export' { L _ (CmmT_export) } - 'section' { L _ (CmmT_section) } - 'align' { L _ (CmmT_align) } - 'goto' { L _ (CmmT_goto) } - 'if' { L _ (CmmT_if) } - 'jump' { L _ (CmmT_jump) } - 'foreign' { L _ (CmmT_foreign) } - 'never' { L _ (CmmT_never) } - 'prim' { L _ (CmmT_prim) } - 'return' { L _ (CmmT_return) } - 'returns' { L _ (CmmT_returns) } - 'import' { L _ (CmmT_import) } - 'switch' { L _ (CmmT_switch) } - 'case' { L _ (CmmT_case) } - 'default' { L _ (CmmT_default) } - 'bits8' { L _ (CmmT_bits8) } - 'bits16' { L _ (CmmT_bits16) } - 'bits32' { L _ (CmmT_bits32) } - 'bits64' { L _ (CmmT_bits64) } - 'float32' { L _ (CmmT_float32) } - 'float64' { L _ (CmmT_float64) } - 'gcptr' { L _ (CmmT_gcptr) } - - GLOBALREG { L _ (CmmT_GlobalReg $$) } - NAME { L _ (CmmT_Name $$) } - STRING { L _ (CmmT_String $$) } - INT { L _ (CmmT_Int $$) } - FLOAT { L _ (CmmT_Float $$) } + 'CLOSURE' { L _ (CmmT_CLOSURE) } + 'INFO_TABLE' { L _ (CmmT_INFO_TABLE) } + 'INFO_TABLE_RET'{ L _ (CmmT_INFO_TABLE_RET) } + 'INFO_TABLE_FUN'{ L _ (CmmT_INFO_TABLE_FUN) } + 'INFO_TABLE_CONSTR'{ L _ (CmmT_INFO_TABLE_CONSTR) } + 'INFO_TABLE_SELECTOR'{ L _ (CmmT_INFO_TABLE_SELECTOR) } + 'else' { L _ (CmmT_else) } + 'export' { L _ (CmmT_export) } + 'section' { L _ (CmmT_section) } + 'align' { L _ (CmmT_align) } + 'goto' { L _ (CmmT_goto) } + 'if' { L _ (CmmT_if) } + 'call' { L _ (CmmT_call) } + 'jump' { L _ (CmmT_jump) } + 'foreign' { L _ (CmmT_foreign) } + 'never' { L _ (CmmT_never) } + 'prim' { L _ (CmmT_prim) } + 'return' { L _ (CmmT_return) } + 'returns' { L _ (CmmT_returns) } + 'import' { L _ (CmmT_import) } + 'switch' { L _ (CmmT_switch) } + 'case' { L _ (CmmT_case) } + 'default' { L _ (CmmT_default) } + 'push' { L _ (CmmT_push) } + 'bits8' { L _ (CmmT_bits8) } + 'bits16' { L _ (CmmT_bits16) } + 'bits32' { L _ (CmmT_bits32) } + 'bits64' { L _ (CmmT_bits64) } + 'float32' { L _ (CmmT_float32) } + 'float64' { L _ (CmmT_float64) } + 'gcptr' { L _ (CmmT_gcptr) } + + GLOBALREG { L _ (CmmT_GlobalReg $$) } + NAME { L _ (CmmT_Name $$) } + STRING { L _ (CmmT_String $$) } + INT { L _ (CmmT_Int $$) } + FLOAT { L _ (CmmT_Float $$) } %monad { P } { >>= } { return } %lexer { cmmlex } { L _ CmmT_EOF } @@ -166,18 +317,18 @@ import System.Exit %% -cmm :: { ExtCode } - : {- empty -} { return () } - | cmmtop cmm { do $1; $2 } +cmm :: { CmmParse () } + : {- empty -} { return () } + | cmmtop cmm { do $1; $2 } -cmmtop :: { ExtCode } - : cmmproc { $1 } - | cmmdata { $1 } - | decl { $1 } - | 'CLOSURE' '(' NAME ',' NAME lits ')' ';' - {% withThisPackage $ \pkg -> - do lits <- sequence $6; - staticClosure pkg $3 $5 (map getLit lits) } +cmmtop :: { CmmParse () } + : cmmproc { $1 } + | cmmdata { $1 } + | decl { $1 } + | 'CLOSURE' '(' NAME ',' NAME lits ')' ';' + {% withThisPackage $ \pkg -> + do lits <- sequence $6; + staticClosure pkg $3 $5 (map getLit lits) } -- The only static closures in the RTS are dummy closures like -- stg_END_TSO_QUEUE_closure and stg_dummy_ret. We don't need @@ -188,37 +339,37 @@ cmmtop :: { ExtCode } -- * payload is always empty -- * we can derive closure and info table labels from a single NAME -cmmdata :: { ExtCode } - : 'section' STRING '{' data_label statics '}' - { do lbl <- $4; - ss <- sequence $5; - code (emitDecl (CmmData (section $2) (Statics lbl $ concat ss))) } - -data_label :: { ExtFCode CLabel } - : NAME ':' - {% withThisPackage $ \pkg -> - return (mkCmmDataLabel pkg $1) } - -statics :: { [ExtFCode [CmmStatic]] } - : {- empty -} { [] } - | static statics { $1 : $2 } - +cmmdata :: { CmmParse () } + : 'section' STRING '{' data_label statics '}' + { do lbl <- $4; + ss <- sequence $5; + code (emitDecl (CmmData (section $2) (Statics lbl $ concat ss))) } + +data_label :: { CmmParse CLabel } + : NAME ':' + {% withThisPackage $ \pkg -> + return (mkCmmDataLabel pkg $1) } + +statics :: { [CmmParse [CmmStatic]] } + : {- empty -} { [] } + | static statics { $1 : $2 } + -- Strings aren't used much in the RTS HC code, so it doesn't seem -- worth allowing inline strings. C-- doesn't allow them anyway. -static :: { ExtFCode [CmmStatic] } - : type expr ';' { do e <- $2; - return [CmmStaticLit (getLit e)] } - | type ';' { return [CmmUninitialised - (widthInBytes (typeWidth $1))] } - | 'bits8' '[' ']' STRING ';' { return [mkString $4] } - | 'bits8' '[' INT ']' ';' { return [CmmUninitialised - (fromIntegral $3)] } - | typenot8 '[' INT ']' ';' { return [CmmUninitialised - (widthInBytes (typeWidth $1) * - fromIntegral $3)] } - | 'CLOSURE' '(' NAME lits ')' - { do { lits <- sequence $4 - ; dflags <- getDynFlags +static :: { CmmParse [CmmStatic] } + : type expr ';' { do e <- $2; + return [CmmStaticLit (getLit e)] } + | type ';' { return [CmmUninitialised + (widthInBytes (typeWidth $1))] } + | 'bits8' '[' ']' STRING ';' { return [mkString $4] } + | 'bits8' '[' INT ']' ';' { return [CmmUninitialised + (fromIntegral $3)] } + | typenot8 '[' INT ']' ';' { return [CmmUninitialised + (widthInBytes (typeWidth $1) * + fromIntegral $3)] } + | 'CLOSURE' '(' NAME lits ')' + { do { lits <- sequence $4 + ; dflags <- getDynFlags ; return $ map CmmStaticLit $ mkStaticClosure dflags (mkForeignLabel $3 Nothing ForeignLabelInExternalPackage IsData) -- mkForeignLabel because these are only used @@ -226,140 +377,140 @@ static :: { ExtFCode [CmmStatic] } dontCareCCS (map getLit lits) [] [] [] } } -- arrays of closures required for the CHARLIKE & INTLIKE arrays -lits :: { [ExtFCode CmmExpr] } - : {- empty -} { [] } - | ',' expr lits { $2 : $3 } - -cmmproc :: { ExtCode } --- TODO: add real SRT/info tables to parsed Cmm - : info maybe_formals_without_hints '{' body '}' - { do ((entry_ret_label, info, live, formals), stmts) <- - getCgStmtsEC' $ loopDecls $ do { - (entry_ret_label, info, live) <- $1; - formals <- sequence $2; +lits :: { [CmmParse CmmExpr] } + : {- empty -} { [] } + | ',' expr lits { $2 : $3 } + +cmmproc :: { CmmParse () } + : info maybe_conv maybe_formals maybe_body + { do ((entry_ret_label, info, stk_formals, formals), agraph) <- + getCodeR $ loopDecls $ do { + (entry_ret_label, info, stk_formals) <- $1; + formals <- sequence (fromMaybe [] $3); $4; - return (entry_ret_label, info, live, formals) } - blks <- code (cgStmtsToBlocks stmts) - code (emitInfoTableAndCode entry_ret_label info formals blks) } + return (entry_ret_label, info, stk_formals, formals) } + let do_layout = isJust $3 + code (emitProcWithStackFrame $2 info + entry_ret_label stk_formals formals agraph + do_layout ) } - | info maybe_formals_without_hints ';' - { do (entry_ret_label, info, live) <- $1; - formals <- sequence $2; - code (emitInfoTableAndCode entry_ret_label info formals []) } +maybe_conv :: { Convention } + : {- empty -} { NativeNodeCall } + | 'return' { NativeReturn } - | NAME maybe_formals_without_hints '{' body '}' - {% withThisPackage $ \pkg -> - do newFunctionName $1 pkg - (formals, stmts) <- - getCgStmtsEC' $ loopDecls $ do { - formals <- sequence $2; - $4; - return formals } - blks <- code (cgStmtsToBlocks stmts) - code (emitProc Nothing (mkCmmCodeLabel pkg $1) formals blks) } - -info :: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) } - : 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' stgHalfWord ',' STRING ',' STRING ')' - -- ptrs, nptrs, closure type, description, type - {% withThisPackage $ \pkg -> +maybe_body :: { CmmParse () } + : ';' { return () } + | '{' body '}' { $2 } + +info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } + : NAME + {% withThisPackage $ \pkg -> + do newFunctionName $1 pkg + return (mkCmmCodeLabel pkg $1, Nothing, []) } + + + | 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')' + -- ptrs, nptrs, closure type, description, type + {% withThisPackage $ \pkg -> do dflags <- getDynFlags let prof = profilingInfo dflags $11 $13 - rep = mkRTSRep $9 $ + rep = mkRTSRep (fromIntegral $9) $ mkHeapRep dflags False (fromIntegral $5) (fromIntegral $7) Thunk -- not really Thunk, but that makes the info table -- we want. return (mkCmmEntryLabel pkg $3, - CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 - , cit_rep = rep - , cit_prof = prof, cit_srt = NoC_SRT }, - []) } - - | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' stgHalfWord ',' STRING ',' STRING ',' stgHalfWord ')' - -- ptrs, nptrs, closure type, description, type, fun type - {% withThisPackage $ \pkg -> + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = NoC_SRT }, + []) } + + | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')' + -- ptrs, nptrs, closure type, description, type, fun type + {% withThisPackage $ \pkg -> do dflags <- getDynFlags let prof = profilingInfo dflags $11 $13 - ty = Fun (toStgHalfWord dflags 0) (ArgSpec $15) + ty = Fun 0 (ArgSpec (fromIntegral $15)) -- Arity zero, arg_type $15 - rep = mkRTSRep $9 $ + rep = mkRTSRep (fromIntegral $9) $ mkHeapRep dflags False (fromIntegral $5) (fromIntegral $7) ty return (mkCmmEntryLabel pkg $3, - CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 - , cit_rep = rep - , cit_prof = prof, cit_srt = NoC_SRT }, - []) } - -- we leave most of the fields zero here. This is only used - -- to generate the BCO info table in the RTS at the moment. - - | 'INFO_TABLE_CONSTR' '(' NAME ',' INT ',' INT ',' stgHalfWord ',' stgHalfWord ',' STRING ',' STRING ')' + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = NoC_SRT }, + []) } + -- we leave most of the fields zero here. This is only used + -- to generate the BCO info table in the RTS at the moment. + + | 'INFO_TABLE_CONSTR' '(' NAME ',' INT ',' INT ',' INT ',' INT ',' STRING ',' STRING ')' -- ptrs, nptrs, tag, closure type, description, type {% withThisPackage $ \pkg -> do dflags <- getDynFlags let prof = profilingInfo dflags $13 $15 - ty = Constr $9 -- Tag + ty = Constr (fromIntegral $9) -- Tag (stringToWord8s $13) - rep = mkRTSRep $11 $ + rep = mkRTSRep (fromIntegral $11) $ mkHeapRep dflags False (fromIntegral $5) (fromIntegral $7) ty return (mkCmmEntryLabel pkg $3, - CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 - , cit_rep = rep - , cit_prof = prof, cit_srt = NoC_SRT }, - []) } - - -- If profiling is on, this string gets duplicated, - -- but that's the way the old code did it we can fix it some other time. - - | 'INFO_TABLE_SELECTOR' '(' NAME ',' stgWord ',' stgHalfWord ',' STRING ',' STRING ')' - -- selector, closure type, description, type - {% withThisPackage $ \pkg -> + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = NoC_SRT }, + []) } + + -- If profiling is on, this string gets duplicated, + -- but that's the way the old code did it we can fix it some other time. + + | 'INFO_TABLE_SELECTOR' '(' NAME ',' INT ',' INT ',' STRING ',' STRING ')' + -- selector, closure type, description, type + {% withThisPackage $ \pkg -> do dflags <- getDynFlags let prof = profilingInfo dflags $9 $11 - ty = ThunkSelector $5 - rep = mkRTSRep $7 $ + ty = ThunkSelector (fromIntegral $5) + rep = mkRTSRep (fromIntegral $7) $ mkHeapRep dflags False 0 0 ty return (mkCmmEntryLabel pkg $3, - CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 - , cit_rep = rep - , cit_prof = prof, cit_srt = NoC_SRT }, - []) } - - | 'INFO_TABLE_RET' '(' NAME ',' stgHalfWord ')' - -- closure type (no live regs) - {% withThisPackage $ \pkg -> - do let prof = NoProfilingInfo - rep = mkRTSRep $5 $ mkStackRep [] + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = NoC_SRT }, + []) } + + | 'INFO_TABLE_RET' '(' NAME ',' INT ')' + -- closure type (no live regs) + {% withThisPackage $ \pkg -> + do let prof = NoProfilingInfo + rep = mkRTSRep (fromIntegral $5) $ mkStackRep [] return (mkCmmRetLabel pkg $3, - CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 - , cit_rep = rep - , cit_prof = prof, cit_srt = NoC_SRT }, - []) } - - | 'INFO_TABLE_RET' '(' NAME ',' stgHalfWord ',' formals_without_hints0 ')' - -- closure type, live regs - {% withThisPackage $ \pkg -> + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = NoC_SRT }, + []) } + + | 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals0 ')' + -- closure type, live regs + {% withThisPackage $ \pkg -> do dflags <- getDynFlags - live <- sequence (map (liftM Just) $7) - let prof = NoProfilingInfo - bitmap = mkLiveness dflags live - rep = mkRTSRep $5 $ mkStackRep bitmap + live <- sequence $7 + let prof = NoProfilingInfo + -- drop one for the info pointer + bitmap = mkLiveness dflags (map Just (drop 1 live)) + rep = mkRTSRep (fromIntegral $5) $ mkStackRep bitmap return (mkCmmRetLabel pkg $3, - CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 - , cit_rep = rep - , cit_prof = prof, cit_srt = NoC_SRT }, - []) } + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = NoC_SRT }, + live) } -body :: { ExtCode } - : {- empty -} { return () } - | decl body { do $1; $2 } - | stmt body { do $1; $2 } +body :: { CmmParse () } + : {- empty -} { return () } + | decl body { do $1; $2 } + | stmt body { do $1; $2 } -decl :: { ExtCode } - : type names ';' { mapM_ (newLocal $1) $2 } - | 'import' importNames ';' { mapM_ newImport $2 } - | 'export' names ';' { return () } -- ignore exports +decl :: { CmmParse () } + : type names ';' { mapM_ (newLocal $1) $2 } + | 'import' importNames ';' { mapM_ newImport $2 } + | 'export' names ';' { return () } -- ignore exports -- an imported function name, with optional packageId @@ -371,84 +522,96 @@ importNames importName :: { (FastString, CLabel) } - -- A label imported without an explicit packageId. - -- These are taken to come frome some foreign, unnamed package. - : NAME - { ($1, mkForeignLabel $1 Nothing ForeignLabelInExternalPackage IsFunction) } - - -- A label imported with an explicit packageId. - | STRING NAME - { ($2, mkCmmCodeLabel (fsToPackageId (mkFastString $1)) $2) } - - -names :: { [FastString] } - : NAME { [$1] } - | NAME ',' names { $1 : $3 } - -stmt :: { ExtCode } - : ';' { nopEC } - - | NAME ':' - { do l <- newLabel $1; code (labelC l) } - - | lreg '=' expr ';' - { do reg <- $1; e <- $3; stmtEC (CmmAssign reg e) } - | type '[' expr ']' '=' expr ';' - { doStore $1 $3 $6 } - - -- Gah! We really want to say "maybe_results" but that causes - -- a shift/reduce conflict with assignment. We either - -- we expand out the no-result and single result cases or - -- we tweak the syntax to avoid the conflict. The later - -- option is taken here because the other way would require - -- multiple levels of expanding and get unwieldy. - | maybe_results 'foreign' STRING expr '(' cmm_hint_exprs0 ')' safety vols opt_never_returns ';' - {% foreignCall $3 $1 $4 $6 $9 $8 $10 } - | maybe_results 'prim' '%' NAME '(' cmm_hint_exprs0 ')' safety vols ';' - {% primCall $1 $4 $6 $9 $8 } - -- stmt-level macros, stealing syntax from ordinary C-- function calls. - -- Perhaps we ought to use the %%-form? - | NAME '(' exprs0 ')' ';' - {% stmtMacro $1 $3 } - | 'switch' maybe_range expr '{' arms default '}' - { do as <- sequence $5; doSwitch $2 $3 as $6 } - | 'goto' NAME ';' - { do l <- lookupLabel $2; stmtEC (CmmBranch l) } + -- A label imported without an explicit packageId. + -- These are taken to come frome some foreign, unnamed package. + : NAME + { ($1, mkForeignLabel $1 Nothing ForeignLabelInExternalPackage IsFunction) } + + -- A label imported with an explicit packageId. + | STRING NAME + { ($2, mkCmmCodeLabel (fsToPackageId (mkFastString $1)) $2) } + + +names :: { [FastString] } + : NAME { [$1] } + | NAME ',' names { $1 : $3 } + +stmt :: { CmmParse () } + : ';' { return () } + + | NAME ':' + { do l <- newLabel $1; emitLabel l } + + + + | lreg '=' expr ';' + { do reg <- $1; e <- $3; emitAssign reg e } + | type '[' expr ']' '=' expr ';' + { doStore $1 $3 $6 } + + -- Gah! We really want to say "foreign_results" but that causes + -- a shift/reduce conflict with assignment. We either + -- we expand out the no-result and single result cases or + -- we tweak the syntax to avoid the conflict. The later + -- option is taken here because the other way would require + -- multiple levels of expanding and get unwieldy. + | foreign_results 'foreign' STRING expr '(' cmm_hint_exprs0 ')' safety opt_never_returns ';' + {% foreignCall $3 $1 $4 $6 $8 $9 } + | foreign_results 'prim' '%' NAME '(' exprs0 ')' ';' + {% primCall $1 $4 $6 } + -- stmt-level macros, stealing syntax from ordinary C-- function calls. + -- Perhaps we ought to use the %%-form? + | NAME '(' exprs0 ')' ';' + {% stmtMacro $1 $3 } + | 'switch' maybe_range expr '{' arms default '}' + { do as <- sequence $5; doSwitch $2 $3 as $6 } + | 'goto' NAME ';' + { do l <- lookupLabel $2; emit (mkBranch l) } + | 'return' '(' exprs0 ')' ';' + { doReturn $3 } | 'jump' expr vols ';' - { do e <- $2; stmtEC (CmmJump e $3) } - | 'return' ';' - { stmtEC CmmReturn } + { doRawJump $2 $3 } + | 'jump' expr '(' exprs0 ')' ';' + { doJumpWithStack $2 [] $4 } + | 'jump' expr '(' exprs0 ')' '(' exprs0 ')' ';' + { doJumpWithStack $2 $4 $7 } + | 'call' expr '(' exprs0 ')' ';' + { doCall $2 [] $4 } + | '(' formals ')' '=' 'call' expr '(' exprs0 ')' ';' + { doCall $6 $2 $8 } | 'if' bool_expr 'goto' NAME - { do l <- lookupLabel $4; cmmRawIf $2 l } - | 'if' bool_expr '{' body '}' else - { cmmIfThenElse $2 $4 $6 } + { do l <- lookupLabel $4; cmmRawIf $2 l } + | 'if' bool_expr '{' body '}' else + { cmmIfThenElse $2 $4 $6 } + | 'push' '(' exprs0 ')' maybe_body + { pushStackFrame $3 $5 } opt_never_returns :: { CmmReturnInfo } : { CmmMayReturn } | 'never' 'returns' { CmmNeverReturns } -bool_expr :: { ExtFCode BoolExpr } - : bool_op { $1 } - | expr { do e <- $1; return (BoolTest e) } - -bool_op :: { ExtFCode BoolExpr } - : bool_expr '&&' bool_expr { do e1 <- $1; e2 <- $3; - return (BoolAnd e1 e2) } - | bool_expr '||' bool_expr { do e1 <- $1; e2 <- $3; - return (BoolOr e1 e2) } - | '!' bool_expr { do e <- $2; return (BoolNot e) } - | '(' bool_op ')' { $2 } - --- This is not C-- syntax. What to do? -safety :: { CmmSafety } - : {- empty -} { CmmUnsafe } -- Default may change soon - | STRING {% parseSafety $1 } - --- This is not C-- syntax. What to do? -vols :: { Maybe [GlobalReg] } - : {- empty -} { Nothing } - | '[' ']' { Just [] } - | '[' globals ']' { Just $2 } +bool_expr :: { CmmParse BoolExpr } + : bool_op { $1 } + | expr { do e <- $1; return (BoolTest e) } + +bool_op :: { CmmParse BoolExpr } + : bool_expr '&&' bool_expr { do e1 <- $1; e2 <- $3; + return (BoolAnd e1 e2) } + | bool_expr '||' bool_expr { do e1 <- $1; e2 <- $3; + return (BoolOr e1 e2) } + | '!' bool_expr { do e <- $2; return (BoolNot e) } + | '(' bool_op ')' { $2 } + +safety :: { Safety } + : {- empty -} { PlayRisky } + | STRING {% parseSafety $1 } + +vols :: { [GlobalReg] } + : '[' ']' { [] } + | '[' '*' ']' {% do df <- getDynFlags + ; return (realArgRegs df) } + -- all of them + | '[' globals ']' { $2 } globals :: { [GlobalReg] } : GLOBALREG { [$1] } @@ -458,67 +621,67 @@ maybe_range :: { Maybe (Int,Int) } : '[' INT '..' INT ']' { Just (fromIntegral $2, fromIntegral $4) } | {- empty -} { Nothing } -arms :: { [ExtFCode ([Int],Either BlockId ExtCode)] } - : {- empty -} { [] } - | arm arms { $1 : $2 } +arms :: { [CmmParse ([Int],Either BlockId (CmmParse ()))] } + : {- empty -} { [] } + | arm arms { $1 : $2 } -arm :: { ExtFCode ([Int],Either BlockId ExtCode) } - : 'case' ints ':' arm_body { do b <- $4; return ($2, b) } +arm :: { CmmParse ([Int],Either BlockId (CmmParse ())) } + : 'case' ints ':' arm_body { do b <- $4; return ($2, b) } -arm_body :: { ExtFCode (Either BlockId ExtCode) } - : '{' body '}' { return (Right $2) } - | 'goto' NAME ';' { do l <- lookupLabel $2; return (Left l) } +arm_body :: { CmmParse (Either BlockId (CmmParse ())) } + : '{' body '}' { return (Right $2) } + | 'goto' NAME ';' { do l <- lookupLabel $2; return (Left l) } ints :: { [Int] } : INT { [ fromIntegral $1 ] } | INT ',' ints { fromIntegral $1 : $3 } -default :: { Maybe ExtCode } - : 'default' ':' '{' body '}' { Just $4 } - -- taking a few liberties with the C-- syntax here; C-- doesn't have - -- 'default' branches - | {- empty -} { Nothing } +default :: { Maybe (CmmParse ()) } + : 'default' ':' '{' body '}' { Just $4 } + -- taking a few liberties with the C-- syntax here; C-- doesn't have + -- 'default' branches + | {- empty -} { Nothing } -- Note: OldCmm doesn't support a first class 'else' statement, though -- CmmNode does. -else :: { ExtCode } - : {- empty -} { nopEC } - | 'else' '{' body '}' { $3 } +else :: { CmmParse () } + : {- empty -} { return () } + | 'else' '{' body '}' { $3 } -- we have to write this out longhand so that Happy's precedence rules -- can kick in. -expr :: { ExtFCode CmmExpr } - : expr '/' expr { mkMachOp MO_U_Quot [$1,$3] } - | expr '*' expr { mkMachOp MO_Mul [$1,$3] } - | expr '%' expr { mkMachOp MO_U_Rem [$1,$3] } - | expr '-' expr { mkMachOp MO_Sub [$1,$3] } - | expr '+' expr { mkMachOp MO_Add [$1,$3] } - | expr '>>' expr { mkMachOp MO_U_Shr [$1,$3] } - | expr '<<' expr { mkMachOp MO_Shl [$1,$3] } - | expr '&' expr { mkMachOp MO_And [$1,$3] } - | expr '^' expr { mkMachOp MO_Xor [$1,$3] } - | expr '|' expr { mkMachOp MO_Or [$1,$3] } - | expr '>=' expr { mkMachOp MO_U_Ge [$1,$3] } - | expr '>' expr { mkMachOp MO_U_Gt [$1,$3] } - | expr '<=' expr { mkMachOp MO_U_Le [$1,$3] } - | expr '<' expr { mkMachOp MO_U_Lt [$1,$3] } - | expr '!=' expr { mkMachOp MO_Ne [$1,$3] } - | expr '==' expr { mkMachOp MO_Eq [$1,$3] } - | '~' expr { mkMachOp MO_Not [$2] } - | '-' expr { mkMachOp MO_S_Neg [$2] } - | expr0 '`' NAME '`' expr0 {% do { mo <- nameToMachOp $3 ; - return (mkMachOp mo [$1,$5]) } } - | expr0 { $1 } - -expr0 :: { ExtFCode CmmExpr } - : INT maybe_ty { return (CmmLit (CmmInt $1 (typeWidth $2))) } - | FLOAT maybe_ty { return (CmmLit (CmmFloat $1 (typeWidth $2))) } - | STRING { do s <- code (newStringCLit $1); - return (CmmLit s) } - | reg { $1 } - | type '[' expr ']' { do e <- $3; return (CmmLoad e $1) } - | '%' NAME '(' exprs0 ')' {% exprOp $2 $4 } - | '(' expr ')' { $2 } +expr :: { CmmParse CmmExpr } + : expr '/' expr { mkMachOp MO_U_Quot [$1,$3] } + | expr '*' expr { mkMachOp MO_Mul [$1,$3] } + | expr '%' expr { mkMachOp MO_U_Rem [$1,$3] } + | expr '-' expr { mkMachOp MO_Sub [$1,$3] } + | expr '+' expr { mkMachOp MO_Add [$1,$3] } + | expr '>>' expr { mkMachOp MO_U_Shr [$1,$3] } + | expr '<<' expr { mkMachOp MO_Shl [$1,$3] } + | expr '&' expr { mkMachOp MO_And [$1,$3] } + | expr '^' expr { mkMachOp MO_Xor [$1,$3] } + | expr '|' expr { mkMachOp MO_Or [$1,$3] } + | expr '>=' expr { mkMachOp MO_U_Ge [$1,$3] } + | expr '>' expr { mkMachOp MO_U_Gt [$1,$3] } + | expr '<=' expr { mkMachOp MO_U_Le [$1,$3] } + | expr '<' expr { mkMachOp MO_U_Lt [$1,$3] } + | expr '!=' expr { mkMachOp MO_Ne [$1,$3] } + | expr '==' expr { mkMachOp MO_Eq [$1,$3] } + | '~' expr { mkMachOp MO_Not [$2] } + | '-' expr { mkMachOp MO_S_Neg [$2] } + | expr0 '`' NAME '`' expr0 {% do { mo <- nameToMachOp $3 ; + return (mkMachOp mo [$1,$5]) } } + | expr0 { $1 } + +expr0 :: { CmmParse CmmExpr } + : INT maybe_ty { return (CmmLit (CmmInt $1 (typeWidth $2))) } + | FLOAT maybe_ty { return (CmmLit (CmmFloat $1 (typeWidth $2))) } + | STRING { do s <- code (newStringCLit $1); + return (CmmLit s) } + | reg { $1 } + | type '[' expr ']' { do e <- $3; return (CmmLoad e $1) } + | '%' NAME '(' exprs0 ')' {% exprOp $2 $4 } + | '(' expr ')' { $2 } -- leaving out the type of a literal gives you the native word size in C-- @@ -526,81 +689,78 @@ maybe_ty :: { CmmType } : {- empty -} {% do dflags <- getDynFlags; return $ bWord dflags } | '::' type { $2 } -maybe_actuals :: { [ExtFCode HintedCmmActual] } - : {- empty -} { [] } - | '(' cmm_hint_exprs0 ')' { $2 } - -cmm_hint_exprs0 :: { [ExtFCode HintedCmmActual] } - : {- empty -} { [] } +cmm_hint_exprs0 :: { [CmmParse (CmmExpr, ForeignHint)] } + : {- empty -} { [] } | cmm_hint_exprs { $1 } -cmm_hint_exprs :: { [ExtFCode HintedCmmActual] } - : cmm_hint_expr { [$1] } - | cmm_hint_expr ',' cmm_hint_exprs { $1 : $3 } +cmm_hint_exprs :: { [CmmParse (CmmExpr, ForeignHint)] } + : cmm_hint_expr { [$1] } + | cmm_hint_expr ',' cmm_hint_exprs { $1 : $3 } -cmm_hint_expr :: { ExtFCode HintedCmmActual } - : expr { do e <- $1; return (CmmHinted e (inferCmmHint e)) } - | expr STRING {% do h <- parseCmmHint $2; - return $ do - e <- $1; return (CmmHinted e h) } +cmm_hint_expr :: { CmmParse (CmmExpr, ForeignHint) } + : expr { do e <- $1; + return (e, inferCmmHint e) } + | expr STRING {% do h <- parseCmmHint $2; + return $ do + e <- $1; return (e, h) } -exprs0 :: { [ExtFCode CmmExpr] } - : {- empty -} { [] } - | exprs { $1 } - -exprs :: { [ExtFCode CmmExpr] } - : expr { [ $1 ] } - | expr ',' exprs { $1 : $3 } - -reg :: { ExtFCode CmmExpr } - : NAME { lookupName $1 } - | GLOBALREG { return (CmmReg (CmmGlobal $1)) } - -maybe_results :: { [ExtFCode HintedCmmFormal] } - : {- empty -} { [] } - | '(' cmm_formals ')' '=' { $2 } - -cmm_formals :: { [ExtFCode HintedCmmFormal] } - : cmm_formal { [$1] } - | cmm_formal ',' { [$1] } - | cmm_formal ',' cmm_formals { $1 : $3 } - -cmm_formal :: { ExtFCode HintedCmmFormal } - : local_lreg { do e <- $1; return (CmmHinted e (inferCmmHint (CmmReg (CmmLocal e)))) } - | STRING local_lreg {% do h <- parseCmmHint $1; - return $ do - e <- $2; return (CmmHinted e h) } - -local_lreg :: { ExtFCode LocalReg } - : NAME { do e <- lookupName $1; - return $ - case e of - CmmReg (CmmLocal r) -> r - other -> pprPanic "CmmParse:" (ftext $1 <> text " not a local register") } - -lreg :: { ExtFCode CmmReg } - : NAME { do e <- lookupName $1; - return $ - case e of - CmmReg r -> r - other -> pprPanic "CmmParse:" (ftext $1 <> text " not a register") } - | GLOBALREG { return (CmmGlobal $1) } - -maybe_formals_without_hints :: { [ExtFCode LocalReg] } - : {- empty -} { [] } - | '(' formals_without_hints0 ')' { $2 } - -formals_without_hints0 :: { [ExtFCode LocalReg] } - : {- empty -} { [] } - | formals_without_hints { $1 } +exprs0 :: { [CmmParse CmmExpr] } + : {- empty -} { [] } + | exprs { $1 } -formals_without_hints :: { [ExtFCode LocalReg] } - : formal_without_hint ',' { [$1] } - | formal_without_hint { [$1] } - | formal_without_hint ',' formals_without_hints { $1 : $3 } +exprs :: { [CmmParse CmmExpr] } + : expr { [ $1 ] } + | expr ',' exprs { $1 : $3 } -formal_without_hint :: { ExtFCode LocalReg } - : type NAME { newLocal $1 $2 } +reg :: { CmmParse CmmExpr } + : NAME { lookupName $1 } + | GLOBALREG { return (CmmReg (CmmGlobal $1)) } + +foreign_results :: { [CmmParse (LocalReg, ForeignHint)] } + : {- empty -} { [] } + | '(' foreign_formals ')' '=' { $2 } + +foreign_formals :: { [CmmParse (LocalReg, ForeignHint)] } + : foreign_formal { [$1] } + | foreign_formal ',' { [$1] } + | foreign_formal ',' foreign_formals { $1 : $3 } + +foreign_formal :: { CmmParse (LocalReg, ForeignHint) } + : local_lreg { do e <- $1; return (e, (inferCmmHint (CmmReg (CmmLocal e)))) } + | STRING local_lreg {% do h <- parseCmmHint $1; + return $ do + e <- $2; return (e,h) } + +local_lreg :: { CmmParse LocalReg } + : NAME { do e <- lookupName $1; + return $ + case e of + CmmReg (CmmLocal r) -> r + other -> pprPanic "CmmParse:" (ftext $1 <> text " not a local register") } + +lreg :: { CmmParse CmmReg } + : NAME { do e <- lookupName $1; + return $ + case e of + CmmReg r -> r + other -> pprPanic "CmmParse:" (ftext $1 <> text " not a register") } + | GLOBALREG { return (CmmGlobal $1) } + +maybe_formals :: { Maybe [CmmParse LocalReg] } + : {- empty -} { Nothing } + | '(' formals0 ')' { Just $2 } + +formals0 :: { [CmmParse LocalReg] } + : {- empty -} { [] } + | formals { $1 } + +formals :: { [CmmParse LocalReg] } + : formal ',' { [$1] } + | formal { [$1] } + | formal ',' formals { $1 : $3 } + +formal :: { CmmParse LocalReg } + : type NAME { newLocal $1 $2 } type :: { CmmType } : 'bits8' { b8 } @@ -614,12 +774,6 @@ typenot8 :: { CmmType } | 'float64' { f64 } | 'gcptr' {% do dflags <- getDynFlags; return $ gcWord dflags } -stgWord :: { StgWord } - : INT {% do dflags <- getDynFlags; return $ toStgWord dflags $1 } - -stgHalfWord :: { StgHalfWord } - : INT {% do dflags <- getDynFlags; return $ toStgHalfWord dflags $1 } - { section :: String -> Section section "text" = Text @@ -632,11 +786,22 @@ section s = OtherSection s mkString :: String -> CmmStatic mkString s = CmmString (map (fromIntegral.ord) s) +-- | +-- Given an info table, decide what the entry convention for the proc +-- is. That is, for an INFO_TABLE_RET we want the return convention, +-- otherwise it is a NativeNodeCall. +-- +infoConv :: Maybe CmmInfoTable -> Convention +infoConv Nothing = NativeNodeCall +infoConv (Just info) + | isStackRep (cit_rep info) = NativeReturn + | otherwise = NativeNodeCall + -- mkMachOp infers the type of the MachOp from the type of its first -- argument. We assume that this is correct: for MachOps that don't have -- symmetrical args (e.g. shift ops), the first arg determines the type of -- the op. -mkMachOp :: (Width -> MachOp) -> [ExtFCode CmmExpr] -> ExtFCode CmmExpr +mkMachOp :: (Width -> MachOp) -> [CmmParse CmmExpr] -> CmmParse CmmExpr mkMachOp fn args = do dflags <- getDynFlags arg_exprs <- sequence args @@ -653,7 +818,7 @@ nameToMachOp name = Nothing -> fail ("unknown primitive " ++ unpackFS name) Just m -> return m -exprOp :: FastString -> [ExtFCode CmmExpr] -> P (ExtFCode CmmExpr) +exprOp :: FastString -> [CmmParse CmmExpr] -> P (CmmParse CmmExpr) exprOp name args_code = do dflags <- getDynFlags case lookupUFM (exprMacros dflags) name of @@ -755,10 +920,10 @@ callishMachOps = listToUFM $ -- ToDo: the rest, maybe ] -parseSafety :: String -> P CmmSafety -parseSafety "safe" = return (CmmSafe NoC_SRT) -parseSafety "unsafe" = return CmmUnsafe -parseSafety "interruptible" = return CmmInterruptible +parseSafety :: String -> P Safety +parseSafety "safe" = return PlaySafe +parseSafety "unsafe" = return PlayRisky +parseSafety "interruptible" = return PlayInterruptible parseSafety str = fail ("unrecognised safety: " ++ str) parseCmmHint :: String -> P ForeignHint @@ -788,7 +953,7 @@ happyError = srcParseFail -- ----------------------------------------------------------------------------- -- Statement-level macros -stmtMacro :: FastString -> [ExtFCode CmmExpr] -> P ExtCode +stmtMacro :: FastString -> [CmmParse CmmExpr] -> P (CmmParse ()) stmtMacro fun args_code = do case lookupUFM stmtMacros fun of Nothing -> fail ("unknown macro: " ++ unpackFS fun) @@ -796,49 +961,61 @@ stmtMacro fun args_code = do args <- sequence args_code code (fcode args) -stmtMacros :: UniqFM ([CmmExpr] -> Code) +stmtMacros :: UniqFM ([CmmExpr] -> FCode ()) stmtMacros = listToUFM [ ( fsLit "CCS_ALLOC", \[words,ccs] -> profAlloc words ccs ), + ( fsLit "ENTER_CCS_THUNK", \[e] -> enterCostCentreThunk e ), + ( fsLit "CLOSE_NURSERY", \[] -> emitCloseNursery ), - ( fsLit "ENTER_CCS_THUNK", \[e] -> enterCostCentreThunk e ), - ( fsLit "HP_CHK_GEN", \[words,liveness,reentry] -> - hpChkGen words liveness reentry ), - ( fsLit "HP_CHK_NP_ASSIGN_SP0", \[e,f] -> hpChkNodePointsAssignSp0 e f ), - ( fsLit "LOAD_THREAD_STATE", \[] -> emitLoadThreadState ), - ( fsLit "LDV_ENTER", \[e] -> ldvEnter e ), - ( fsLit "LDV_RECORD_CREATE", \[e] -> ldvRecordCreate e ), ( fsLit "OPEN_NURSERY", \[] -> emitOpenNursery ), + + -- completely generic heap and stack checks, for use in high-level cmm. + ( fsLit "HP_CHK_GEN", \[bytes] -> + heapStackCheckGen Nothing (Just bytes) ), + ( fsLit "STK_CHK_GEN", \[] -> + heapStackCheckGen (Just (CmmLit CmmHighStackMark)) Nothing ), + + -- A stack check for a fixed amount of stack. Sounds a bit strange, but + -- we use the stack for a bit of temporary storage in a couple of primops + ( fsLit "STK_CHK_GEN_N", \[bytes] -> + heapStackCheckGen (Just bytes) Nothing ), + + -- A stack check on entry to a thunk, where the argument is the thunk pointer. + ( fsLit "STK_CHK_NP" , \[node] -> entryHeapCheck' False node 0 [] (return ())), + + ( fsLit "LOAD_THREAD_STATE", \[] -> emitLoadThreadState ), + ( fsLit "SAVE_THREAD_STATE", \[] -> emitSaveThreadState ), + + ( fsLit "LDV_ENTER", \[e] -> ldvEnter e ), + ( fsLit "LDV_RECORD_CREATE", \[e] -> ldvRecordCreate e ), + ( fsLit "PUSH_UPD_FRAME", \[sp,e] -> emitPushUpdateFrame sp e ), - ( fsLit "SAVE_THREAD_STATE", \[] -> emitSaveThreadState ), ( fsLit "SET_HDR", \[ptr,info,ccs] -> - emitSetDynHdr ptr info ccs ), - ( fsLit "STK_CHK_GEN", \[words,liveness,reentry] -> - stkChkGen words liveness reentry ), - ( fsLit "STK_CHK_NP", \[e] -> stkChkNodePoints e ), + emitSetDynHdr ptr info ccs ), ( fsLit "TICK_ALLOC_PRIM", \[hdr,goods,slop] -> - tickyAllocPrim hdr goods slop ), - ( fsLit "TICK_ALLOC_PAP", \[goods,slop] -> - tickyAllocPAP goods slop ), - ( fsLit "TICK_ALLOC_UP_THK", \[goods,slop] -> - tickyAllocThunk goods slop ), - ( fsLit "UPD_BH_UPDATABLE", \[] -> emitBlackHoleCode False ), - ( fsLit "UPD_BH_SINGLE_ENTRY", \[] -> emitBlackHoleCode True ), - - ( fsLit "RET_P", \[a] -> emitRetUT [(PtrArg,a)]), - ( fsLit "RET_N", \[a] -> emitRetUT [(NonPtrArg,a)]), - ( fsLit "RET_PP", \[a,b] -> emitRetUT [(PtrArg,a),(PtrArg,b)]), - ( fsLit "RET_NN", \[a,b] -> emitRetUT [(NonPtrArg,a),(NonPtrArg,b)]), - ( fsLit "RET_NP", \[a,b] -> emitRetUT [(NonPtrArg,a),(PtrArg,b)]), - ( fsLit "RET_PPP", \[a,b,c] -> emitRetUT [(PtrArg,a),(PtrArg,b),(PtrArg,c)]), - ( fsLit "RET_NPP", \[a,b,c] -> emitRetUT [(NonPtrArg,a),(PtrArg,b),(PtrArg,c)]), - ( fsLit "RET_NNP", \[a,b,c] -> emitRetUT [(NonPtrArg,a),(NonPtrArg,b),(PtrArg,c)]), - ( fsLit "RET_NNN", \[a,b,c] -> emitRetUT [(NonPtrArg,a),(NonPtrArg,b),(NonPtrArg,c)]), - ( fsLit "RET_NNNN", \[a,b,c,d] -> emitRetUT [(NonPtrArg,a),(NonPtrArg,b),(NonPtrArg,c),(NonPtrArg,d)]), - ( fsLit "RET_NNNP", \[a,b,c,d] -> emitRetUT [(NonPtrArg,a),(NonPtrArg,b),(NonPtrArg,c),(PtrArg,d)]), - ( fsLit "RET_NPNP", \[a,b,c,d] -> emitRetUT [(NonPtrArg,a),(PtrArg,b),(NonPtrArg,c),(PtrArg,d)]) - + tickyAllocPrim hdr goods slop ), + ( fsLit "TICK_ALLOC_PAP", \[goods,slop] -> + tickyAllocPAP goods slop ), + ( fsLit "TICK_ALLOC_UP_THK", \[goods,slop] -> + tickyAllocThunk goods slop ), + ( fsLit "UPD_BH_UPDATABLE", \[reg] -> emitBlackHoleCode False reg ), + ( fsLit "UPD_BH_SINGLE_ENTRY", \[reg] -> emitBlackHoleCode True reg ) ] +emitPushUpdateFrame :: CmmExpr -> CmmExpr -> FCode () +emitPushUpdateFrame sp e = do + dflags <- getDynFlags + emitUpdateFrame dflags sp mkUpdInfoLabel e + +pushStackFrame :: [CmmParse CmmExpr] -> CmmParse () -> CmmParse () +pushStackFrame fields body = do + dflags <- getDynFlags + exprs <- sequence fields + updfr_off <- getUpdFrameOff + let (new_updfr_off, _, g) = copyOutOflow dflags NativeReturn Ret Old + [] updfr_off exprs + emit g + withUpdFrameOff new_updfr_off body profilingInfo dflags desc_str ty_str = if not (dopt Opt_SccProfilingOn dflags) @@ -846,7 +1023,7 @@ profilingInfo dflags desc_str ty_str else ProfilingInfo (stringToWord8s desc_str) (stringToWord8s ty_str) -staticClosure :: PackageId -> FastString -> FastString -> [CmmLit] -> ExtCode +staticClosure :: PackageId -> FastString -> FastString -> [CmmLit] -> CmmParse () staticClosure pkg cl_label info payload = do dflags <- getDynFlags let lits = mkStaticClosure dflags (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] [] @@ -854,78 +1031,93 @@ staticClosure pkg cl_label info payload foreignCall :: String - -> [ExtFCode HintedCmmFormal] - -> ExtFCode CmmExpr - -> [ExtFCode HintedCmmActual] - -> Maybe [GlobalReg] - -> CmmSafety + -> [CmmParse (LocalReg, ForeignHint)] + -> CmmParse CmmExpr + -> [CmmParse (CmmExpr, ForeignHint)] + -> Safety -> CmmReturnInfo - -> P ExtCode -foreignCall conv_string results_code expr_code args_code vols safety ret - = do convention <- case conv_string of + -> P (CmmParse ()) +foreignCall conv_string results_code expr_code args_code safety ret + = do conv <- case conv_string of "C" -> return CCallConv "stdcall" -> return StdCallConv - "C--" -> return CmmCallConv _ -> fail ("unknown calling convention: " ++ conv_string) return $ do dflags <- getDynFlags - let platform = targetPlatform dflags results <- sequence results_code - expr <- expr_code - args <- sequence args_code - case convention of - -- Temporary hack so at least some functions are CmmSafe - CmmCallConv -> code (stmtC (CmmCall (CmmCallee expr convention) results args ret)) - _ -> - let expr' = adjCallTarget dflags convention expr args in - case safety of - CmmUnsafe -> - code (emitForeignCall' PlayRisky results - (CmmCallee expr' convention) args vols NoC_SRT ret) - CmmSafe srt -> - code (emitForeignCall' PlaySafe results - (CmmCallee expr' convention) args vols NoC_SRT ret) where - CmmInterruptible -> - code (emitForeignCall' PlayInterruptible results - (CmmCallee expr' convention) args vols NoC_SRT ret) - -adjCallTarget :: DynFlags -> CCallConv -> CmmExpr -> [CmmHinted CmmExpr] + expr <- expr_code + args <- sequence args_code + let + expr' = adjCallTarget dflags conv expr args + (arg_exprs, arg_hints) = unzip args + (res_regs, res_hints) = unzip results + fc = ForeignConvention conv arg_hints res_hints ret + target = ForeignTarget expr' fc + _ <- code $ emitForeignCall safety res_regs target arg_exprs + return () + + +doReturn :: [CmmParse CmmExpr] -> CmmParse () +doReturn exprs_code = do + dflags <- getDynFlags + exprs <- sequence exprs_code + updfr_off <- getUpdFrameOff + emit (mkReturnSimple dflags exprs updfr_off) + +doRawJump :: CmmParse CmmExpr -> [GlobalReg] -> CmmParse () +doRawJump expr_code vols = do + dflags <- getDynFlags + expr <- expr_code + updfr_off <- getUpdFrameOff + emit (mkRawJump dflags expr updfr_off vols) + +doJumpWithStack :: CmmParse CmmExpr -> [CmmParse CmmExpr] + -> [CmmParse CmmExpr] -> CmmParse () +doJumpWithStack expr_code stk_code args_code = do + dflags <- getDynFlags + expr <- expr_code + stk_args <- sequence stk_code + args <- sequence args_code + updfr_off <- getUpdFrameOff + emit (mkJumpExtra dflags expr args updfr_off stk_args) + +doCall :: CmmParse CmmExpr -> [CmmParse LocalReg] -> [CmmParse CmmExpr] + -> CmmParse () +doCall expr_code res_code args_code = do + dflags <- getDynFlags + expr <- expr_code + args <- sequence args_code + ress <- sequence res_code + updfr_off <- getUpdFrameOff + c <- code $ mkCall expr (NativeNodeCall,NativeReturn) ress args updfr_off [] + emit c + +adjCallTarget :: DynFlags -> CCallConv -> CmmExpr -> [(CmmExpr, ForeignHint) ] -> CmmExpr -- On Windows, we have to add the '@N' suffix to the label when making -- a call with the stdcall calling convention. adjCallTarget dflags StdCallConv (CmmLit (CmmLabel lbl)) args | platformOS (targetPlatform dflags) == OSMinGW32 = CmmLit (CmmLabel (addLabelSize lbl (sum (map size args)))) - where size (CmmHinted e _) = max (wORD_SIZE dflags) (widthInBytes (typeWidth (cmmExprType dflags e))) + where size (e, _) = max (wORD_SIZE dflags) (widthInBytes (typeWidth (cmmExprType dflags e))) -- c.f. CgForeignCall.emitForeignCall adjCallTarget _ _ expr _ = expr primCall - :: [ExtFCode HintedCmmFormal] - -> FastString - -> [ExtFCode HintedCmmActual] - -> Maybe [GlobalReg] - -> CmmSafety - -> P ExtCode -primCall results_code name args_code vols safety + :: [CmmParse (CmmFormal, ForeignHint)] + -> FastString + -> [CmmParse CmmExpr] + -> P (CmmParse ()) +primCall results_code name args_code = case lookupUFM callishMachOps name of Nothing -> fail ("unknown primitive " ++ unpackFS name) - Just p -> return $ do - results <- sequence results_code - args <- sequence args_code - case safety of - CmmUnsafe -> - code (emitForeignCall' PlayRisky results - (CmmPrim p Nothing) args vols NoC_SRT CmmMayReturn) - CmmSafe srt -> - code (emitForeignCall' PlaySafe results - (CmmPrim p Nothing) args vols NoC_SRT CmmMayReturn) where - CmmInterruptible -> - code (emitForeignCall' PlayInterruptible results - (CmmPrim p Nothing) args vols NoC_SRT CmmMayReturn) - -doStore :: CmmType -> ExtFCode CmmExpr -> ExtFCode CmmExpr -> ExtCode + Just p -> return $ do + results <- sequence results_code + args <- sequence args_code + code (emitPrimCall (map fst results) p args) + +doStore :: CmmType -> CmmParse CmmExpr -> CmmParse CmmExpr -> CmmParse () doStore rep addr_code val_code = do dflags <- getDynFlags addr <- addr_code @@ -940,19 +1132,7 @@ doStore rep addr_code val_code let coerce_val | val_width /= rep_width = CmmMachOp (MO_UU_Conv val_width rep_width) [val] | otherwise = val - stmtEC (CmmStore addr coerce_val) - --- Return an unboxed tuple. -emitRetUT :: [(CgRep,CmmExpr)] -> Code -emitRetUT args = do - dflags <- getDynFlags - tickyUnboxedTupleReturn (length args) -- TICK - (sp, stmts, live) <- pushUnboxedTuple 0 args - emitSimultaneously stmts -- NB. the args might overlap with the stack slots - -- or regs that we assign to, so better use - -- simultaneous assignments here (#3546) - when (sp /= 0) $ stmtC (CmmAssign spReg (cmmRegOffW dflags spReg (-sp))) - stmtC $ CmmJump (entryCode dflags (CmmLoad (cmmRegOffW dflags spReg sp) (bWord dflags))) (Just live) + emitStore addr coerce_val -- ----------------------------------------------------------------------------- -- If-then-else and boolean expressions @@ -966,16 +1146,16 @@ data BoolExpr -- ToDo: smart constructors which simplify the boolean expression. cmmIfThenElse cond then_part else_part = do - then_id <- code newLabelC - join_id <- code newLabelC + then_id <- newBlockId + join_id <- newBlockId c <- cond emitCond c then_id else_part - stmtEC (CmmBranch join_id) - code (labelC then_id) + emit (mkBranch join_id) + emitLabel then_id then_part -- fall through to join - code (labelC join_id) + emitLabel join_id cmmRawIf cond then_id = do c <- cond @@ -984,30 +1164,32 @@ cmmRawIf cond then_id = do -- 'emitCond cond true_id' emits code to test whether the cond is true, -- branching to true_id if so, and falling through otherwise. emitCond (BoolTest e) then_id = do - stmtEC (CmmCondBranch e then_id) + else_id <- newBlockId + emit (mkCbranch e then_id else_id) + emitLabel else_id emitCond (BoolNot (BoolTest (CmmMachOp op args))) then_id | Just op' <- maybeInvertComparison op = emitCond (BoolTest (CmmMachOp op' args)) then_id emitCond (BoolNot e) then_id = do - else_id <- code newLabelC + else_id <- newBlockId emitCond e else_id - stmtEC (CmmBranch then_id) - code (labelC else_id) + emit (mkBranch then_id) + emitLabel else_id emitCond (e1 `BoolOr` e2) then_id = do emitCond e1 then_id emitCond e2 then_id emitCond (e1 `BoolAnd` e2) then_id = do -- we'd like to invert one of the conditionals here to avoid an - -- extra branch instruction, but we can't use maybeInvertComparison - -- here because we can't look too closely at the expression since - -- we're in a loop. - and_id <- code newLabelC - else_id <- code newLabelC + -- extra branch instruction, but we can't use maybeInvertComparison + -- here because we can't look too closely at the expression since + -- we're in a loop. + and_id <- newBlockId + else_id <- newBlockId emitCond e1 and_id - stmtEC (CmmBranch else_id) - code (labelC and_id) + emit (mkBranch else_id) + emitLabel and_id emitCond e2 then_id - code (labelC else_id) + emitLabel else_id -- ----------------------------------------------------------------------------- @@ -1020,38 +1202,45 @@ emitCond (e1 `BoolAnd` e2) then_id = do -- optional range on the switch (eg. switch [0..7] {...}), or by -- the minimum/maximum values from the branches. -doSwitch :: Maybe (Int,Int) -> ExtFCode CmmExpr -> [([Int],Either BlockId ExtCode)] - -> Maybe ExtCode -> ExtCode +doSwitch :: Maybe (Int,Int) -> CmmParse CmmExpr -> [([Int],Either BlockId (CmmParse ()))] + -> Maybe (CmmParse ()) -> CmmParse () doSwitch mb_range scrut arms deflt = do - -- Compile code for the default branch - dflt_entry <- - case deflt of - Nothing -> return Nothing - Just e -> do b <- forkLabelledCodeEC e; return (Just b) - - -- Compile each case branch - table_entries <- mapM emitArm arms - - -- Construct the table - let - all_entries = concat table_entries - ixs = map fst all_entries - (min,max) - | Just (l,u) <- mb_range = (l,u) - | otherwise = (minimum ixs, maximum ixs) - - entries = elems (accumArray (\_ a -> Just a) dflt_entry (min,max) - all_entries) - expr <- scrut - -- ToDo: check for out of range and jump to default if necessary - stmtEC (CmmSwitch expr entries) + -- Compile code for the default branch + dflt_entry <- + case deflt of + Nothing -> return Nothing + Just e -> do b <- forkLabelledCode e; return (Just b) + + -- Compile each case branch + table_entries <- mapM emitArm arms + + -- Construct the table + let + all_entries = concat table_entries + ixs = map fst all_entries + (min,max) + | Just (l,u) <- mb_range = (l,u) + | otherwise = (minimum ixs, maximum ixs) + + entries = elems (accumArray (\_ a -> Just a) dflt_entry (min,max) + all_entries) + expr <- scrut + -- ToDo: check for out of range and jump to default if necessary + emit (mkSwitch expr entries) where - emitArm :: ([Int],Either BlockId ExtCode) -> ExtFCode [(Int,BlockId)] - emitArm (ints,Left blockid) = return [ (i,blockid) | i <- ints ] - emitArm (ints,Right code) = do - blockid <- forkLabelledCodeEC code - return [ (i,blockid) | i <- ints ] + emitArm :: ([Int],Either BlockId (CmmParse ())) -> CmmParse [(Int,BlockId)] + emitArm (ints,Left blockid) = return [ (i,blockid) | i <- ints ] + emitArm (ints,Right code) = do + blockid <- forkLabelledCode code + return [ (i,blockid) | i <- ints ] + +forkLabelledCode :: CmmParse () -> CmmParse BlockId +forkLabelledCode p = do + ag <- getCode p + l <- newBlockId + emitOutOfLine l ag + return l -- ----------------------------------------------------------------------------- -- Putting it all together diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs index 5fca9e7164..4f5d3b926c 100644 --- a/compiler/cmm/CmmPipeline.hs +++ b/compiler/cmm/CmmPipeline.hs @@ -52,7 +52,7 @@ cmmPipeline hsc_env topSRT prog = cpsTop :: HscEnv -> CmmDecl -> IO (CAFEnv, [CmmDecl]) cpsTop _ p@(CmmData {}) = return (mapEmpty, [p]) -cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) l g) = +cpsTop hsc_env proc = do ----------- Control-flow optimisations ---------------------------------- @@ -60,10 +60,13 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) -- later passes by removing lots of empty blocks, so we do it -- even when optimisation isn't turned on. -- - g <- {-# SCC "cmmCfgOpts(1)" #-} - return $ cmmCfgOpts splitting_proc_points g + CmmProc h l g <- {-# SCC "cmmCfgOpts(1)" #-} + return $ cmmCfgOptsProc splitting_proc_points proc dump Opt_D_dump_cmmz_cfg "Post control-flow optimsations" g + let !TopInfo {stack_info=StackInfo { arg_space = entry_off + , do_layout = do_layout }} = h + ----------- Eliminate common blocks ------------------------------------- g <- {-# SCC "elimCommonBlocks" #-} condPass Opt_CmmElimCommonBlocks elimCommonBlocks g @@ -95,7 +98,9 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) ----------- Layout the stack and manifest Sp ---------------------------- (g, stackmaps) <- {-# SCC "layoutStack" #-} - runUniqSM $ cmmLayoutStack dflags proc_points entry_off g + if do_layout + then runUniqSM $ cmmLayoutStack dflags proc_points entry_off g + else return (g, mapEmpty) dump Opt_D_dump_cmmz_sp "Layout Stack" g ----------- Sink and inline assignments *after* stack layout ------------ diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs index 471faf8b0c..19f0155908 100644 --- a/compiler/cmm/CmmProcPoint.hs +++ b/compiler/cmm/CmmProcPoint.hs @@ -291,7 +291,8 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap let to_proc (bid, g) = case expectJust "pp label" $ mapLookup bid procLabels of (lbl, Just info_lbl) | bid == entry - -> CmmProc (TopInfo {info_tbls=info_tbls, stack_info=stack_info}) + -> CmmProc (TopInfo {info_tbls = info_tbls, + stack_info = stack_info}) top_l (replacePPIds g) | otherwise -> CmmProc (TopInfo {info_tbls = mapSingleton (g_entry g) (mkEmptyContInfoTable info_lbl), stack_info=stack_info}) @@ -300,7 +301,9 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap -> CmmProc (TopInfo {info_tbls = mapEmpty, stack_info=stack_info}) lbl (replacePPIds g) where - stack_info = StackInfo 0 Nothing -- panic "No StackInfo" + stack_info = StackInfo { arg_space = 0 + , updfr_space = Nothing + , do_layout = True } -- cannot use panic, this is printed by -ddump-cmmz -- References to procpoint IDs can now be replaced with the diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs index 2ff9b98d2a..6dccdabe89 100644 --- a/compiler/cmm/CmmSink.hs +++ b/compiler/cmm/CmmSink.hs @@ -6,6 +6,7 @@ module CmmSink ( import CodeGen.Platform (callerSaves) import Cmm +import CmmOpt import BlockId import CmmLive import CmmUtils @@ -13,8 +14,7 @@ import Hoopl import DynFlags import UniqFM --- import PprCmm () --- import Outputable +import PprCmm () import Data.List (partition) import qualified Data.Set as Set @@ -76,9 +76,11 @@ import qualified Data.Set as Set -- *but*, that will invalidate the liveness analysis, and we'll have -- to re-do it. --- TODO: things that we aren't optimising very well yet. +-- ----------------------------------------------------------------------------- +-- things that we aren't optimising very well yet. -- --- From GHC's FastString.hashStr: +-- ----------- +-- (1) From GHC's FastString.hashStr: -- -- s2ay: -- if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp; @@ -95,6 +97,26 @@ import qualified Data.Set as Set -- a nice loop, but we didn't eliminate the silly assignment at the end. -- See Note [dependent assignments], which would probably fix this. -- +-- ----------- +-- (2) From stg_atomically_frame in PrimOps.cmm +-- +-- We have a diamond control flow: +-- +-- x = ... +-- | +-- / \ +-- A B +-- \ / +-- | +-- use of x +-- +-- Now x won't be sunk down to its use, because we won't push it into +-- both branches of the conditional. We certainly do have to check +-- that we can sink it past all the code in both A and B, but having +-- discovered that, we could sink it to its use. +-- + +-- ----------------------------------------------------------------------------- type Assignment = (LocalReg, CmmExpr, AbsMem) -- Assignment caches AbsMem, an abstraction of the memory read by @@ -130,7 +152,8 @@ cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks -- Now sink and inline in this block (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk) - (final_last, assigs') = tryToInline dflags live last assigs + fold_last = constantFold dflags last + (final_last, assigs') = tryToInline dflags live fold_last assigs -- We cannot sink into join points (successors with more than -- one predecessor), so identify the join points and the set @@ -246,13 +269,24 @@ walk dflags nodes assigs = go nodes emptyBlock assigs go [] block as = (block, as) go ((live,node):ns) block as | shouldDiscard node live = go ns block as - | Just a <- shouldSink dflags node1 = go ns block (a : as1) + | Just a <- shouldSink dflags node2 = go ns block (a : as1) | otherwise = go ns block' as' where - (node1, as1) = tryToInline dflags live node as + node1 = constantFold dflags node + + (node2, as1) = tryToInline dflags live node1 as + + (dropped, as') = dropAssignmentsSimple dflags + (\a -> conflicts dflags a node2) as1 + + block' = foldl blockSnoc block dropped `blockSnoc` node2 + - (dropped, as') = dropAssignmentsSimple dflags (\a -> conflicts dflags a node1) as1 - block' = foldl blockSnoc block dropped `blockSnoc` node1 +constantFold :: DynFlags -> CmmNode e x -> CmmNode e x +constantFold dflags node = mapExpDeep f node + where f (CmmMachOp op args) = cmmMachOpFold dflags op args + f (CmmRegOff r 0) = CmmReg r + f e = e -- -- Heuristic to decide whether to pick up and sink an assignment @@ -352,6 +386,8 @@ tryToInline dflags live node assigs = go usages node [] assigs where inline (CmmReg (CmmLocal l')) | l == l' = rhs inline (CmmRegOff (CmmLocal l') off) | l == l' = cmmOffset dflags rhs off + -- re-constant fold after inlining + inline (CmmMachOp op args) = cmmMachOpFold dflags op args inline other = other go usages node skipped (assig@(l,rhs,_) : rest) @@ -416,7 +452,8 @@ conflicts dflags (r, rhs, addr) node | foldRegsUsed (\b r' -> r == r' || b) False node = True -- (2) a store to an address conflicts with a read of the same memory - | CmmStore addr' e <- node, memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True + | CmmStore addr' e <- node + , memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True -- (3) an assignment to Hp/Sp conflicts with a heap/stack read respectively | HeapMem <- addr, CmmAssign (CmmGlobal Hp) _ <- node = True @@ -526,5 +563,6 @@ loadAddr dflags e w = regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem regAddr _ (CmmGlobal Sp) i w = SpMem i (widthInBytes w) regAddr _ (CmmGlobal Hp) _ _ = HeapMem +regAddr _ (CmmGlobal CurrentTSO) _ _ = HeapMem -- important for PrimOps regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself regAddr _ _ _ _ = AnyMem diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs index d6da5a4022..9a443c1ae2 100644 --- a/compiler/cmm/CmmType.hs +++ b/compiler/cmm/CmmType.hs @@ -15,6 +15,8 @@ module CmmType , rEP_CostCentreStack_mem_alloc , rEP_CostCentreStack_scc_count , rEP_StgEntCounter_allocs + + , ForeignHint(..) ) where @@ -52,7 +54,8 @@ instance Outputable CmmType where instance Outputable CmmCat where ppr FloatCat = ptext $ sLit("F") - ppr _ = ptext $ sLit("I") + ppr GcPtrCat = ptext $ sLit("P") + ppr BitsCat = ptext $ sLit("I") -- Why is CmmType stratified? For native code generation, -- most of the time you just want to know what sort of register @@ -242,6 +245,19 @@ narrowS W64 x = fromIntegral (fromIntegral x :: Int64) narrowS _ _ = panic "narrowTo" ------------------------------------------------------------------------- +-- Hints + +-- Hints are extra type information we attach to the arguments and +-- results of a foreign call, where more type information is sometimes +-- needed by the ABI to make the correct kind of call. + +data ForeignHint + = NoHint | AddrHint | SignedHint + deriving( Eq ) + -- Used to give extra per-argument or per-result + -- information needed by foreign calling conventions + +------------------------------------------------------------------------- -- These don't really belong here, but I don't know where is best to -- put them. diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index bf93a2f6ff..f420e7d94e 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -22,6 +22,7 @@ module CmmUtils( mkWordCLit, packHalfWordsCLit, mkByteStringCLit, mkDataLits, mkRODataLits, + mkStgWordCLit, -- CmmExpr mkIntExpr, zeroExpr, @@ -120,6 +121,8 @@ typeForeignHint = primRepForeignHint . typePrimRep -- --------------------------------------------------- +-- XXX: should really be Integer, since Int doesn't necessarily cover +-- the full range of target Ints. mkIntCLit :: DynFlags -> Int -> CmmLit mkIntCLit dflags i = CmmInt (toInteger i) (wordWidth dflags) @@ -132,6 +135,9 @@ zeroCLit dflags = CmmInt 0 (wordWidth dflags) zeroExpr :: DynFlags -> CmmExpr zeroExpr dflags = CmmLit (zeroCLit dflags) +mkWordCLit :: DynFlags -> Integer -> CmmLit +mkWordCLit dflags wd = CmmInt wd (wordWidth dflags) + mkByteStringCLit :: Unique -> [Word8] -> (CmmLit, GenCmmDecl CmmStatics info stmt) -- We have to make a top-level decl for the string, -- and return a literal pointing to it @@ -155,8 +161,8 @@ mkRODataLits lbl lits needsRelocation (CmmLabelOff _ _) = True needsRelocation _ = False -mkWordCLit :: DynFlags -> StgWord -> CmmLit -mkWordCLit dflags wd = CmmInt (fromStgWord wd) (wordWidth dflags) +mkStgWordCLit :: DynFlags -> StgWord -> CmmLit +mkStgWordCLit dflags wd = CmmInt (fromStgWord wd) (wordWidth dflags) packHalfWordsCLit :: DynFlags -> StgHalfWord -> StgHalfWord -> CmmLit -- Make a single word literal in which the lower_half_word is @@ -168,8 +174,8 @@ packHalfWordsCLit dflags lower_half_word upper_half_word = if wORDS_BIGENDIAN dflags then mkWordCLit dflags ((l `shiftL` hALF_WORD_SIZE_IN_BITS dflags) .|. u) else mkWordCLit dflags (l .|. (u `shiftL` hALF_WORD_SIZE_IN_BITS dflags)) - where l = toStgWord dflags (fromStgHalfWord lower_half_word) - u = toStgWord dflags (fromStgHalfWord upper_half_word) + where l = fromStgHalfWord lower_half_word + u = fromStgHalfWord upper_half_word --------------------------------------------------- -- @@ -197,6 +203,9 @@ cmmOffset _ e 0 = e cmmOffset _ (CmmReg reg) byte_off = cmmRegOff reg byte_off cmmOffset _ (CmmRegOff reg m) byte_off = cmmRegOff reg (m+byte_off) cmmOffset _ (CmmLit lit) byte_off = CmmLit (cmmOffsetLit lit byte_off) +cmmOffset _ (CmmStackSlot area off) byte_off + = CmmStackSlot area (off - byte_off) + -- note stack area offsets increase towards lower addresses cmmOffset _ (CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt byte_off1 _rep)]) byte_off2 = CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt (byte_off1 + toInteger byte_off2) rep)] @@ -207,6 +216,7 @@ cmmOffset dflags expr byte_off -- Smart constructor for CmmRegOff. Same caveats as cmmOffset above. cmmRegOff :: CmmReg -> Int -> CmmExpr +cmmRegOff reg 0 = CmmReg reg cmmRegOff reg byte_off = CmmRegOff reg byte_off cmmOffsetLit :: CmmLit -> Int -> CmmLit diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs index 4ba82cd8f8..1e2ddfadd1 100644 --- a/compiler/cmm/MkGraph.hs +++ b/compiler/cmm/MkGraph.hs @@ -9,9 +9,10 @@ module MkGraph , stackStubExpr , mkNop, mkAssign, mkStore, mkUnsafeCall, mkFinalCall, mkCallReturnsTo , mkJumpReturnsTo - , mkJump, mkDirectJump, mkForeignJump, mkForeignJumpExtra, mkJumpGC + , mkJump, mkJumpExtra, mkDirectJump, mkForeignJump, mkForeignJumpExtra + , mkRawJump , mkCbranch, mkSwitch - , mkReturn, mkComment, mkCallEntry, mkBranch + , mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch , copyInOflow, copyOutOflow , noExtraStack , toCall, Transfer(..) @@ -20,7 +21,7 @@ where import BlockId import Cmm -import CmmCallConv (assignArgumentsPos, ParamLocation(..)) +import CmmCallConv import Compiler.Hoopl hiding (Unique, (<*>), mkFirst, mkMiddle, mkLast, mkLabel, mkBranch, Shape(..)) @@ -161,11 +162,11 @@ outOfLine l g = unitOL (CgFork l g) -- | allocate a fresh label for the entry point lgraphOfAGraph :: CmmAGraph -> UniqSM CmmGraph lgraphOfAGraph g = do u <- getUniqueM - return (flattenCmmAGraph (mkBlockId u) g) + return (labelAGraph (mkBlockId u) g) -- | use the given BlockId as the label of the entry point -labelAGraph :: BlockId -> CmmAGraph -> UniqSM CmmGraph -labelAGraph lbl ag = return (flattenCmmAGraph lbl ag) +labelAGraph :: BlockId -> CmmAGraph -> CmmGraph +labelAGraph lbl ag = flattenCmmAGraph lbl ag ---------- No-ops mkNop :: CmmAGraph @@ -194,16 +195,25 @@ mkJump dflags e actuals updfr_off = lastWithArgs dflags Jump Old NativeNodeCall actuals updfr_off $ toCall e Nothing updfr_off 0 -mkDirectJump :: DynFlags -> CmmExpr -> [CmmActual] -> UpdFrameOffset +-- | A jump where the caller says what the live GlobalRegs are. Used +-- for low-level hand-written Cmm. +mkRawJump :: DynFlags -> CmmExpr -> UpdFrameOffset -> [GlobalReg] -> CmmAGraph -mkDirectJump dflags e actuals updfr_off = - lastWithArgs dflags Jump Old NativeDirectCall actuals updfr_off $ +mkRawJump dflags e updfr_off vols = + lastWithArgs dflags Jump Old NativeNodeCall [] updfr_off $ + \arg_space _ -> toCall e Nothing updfr_off 0 arg_space vols + + +mkJumpExtra :: DynFlags -> CmmExpr -> [CmmActual] -> UpdFrameOffset + -> [CmmActual] -> CmmAGraph +mkJumpExtra dflags e actuals updfr_off extra_stack = + lastWithArgsAndExtraStack dflags Jump Old NativeNodeCall actuals updfr_off extra_stack $ toCall e Nothing updfr_off 0 -mkJumpGC :: DynFlags -> CmmExpr -> [CmmActual] -> UpdFrameOffset +mkDirectJump :: DynFlags -> CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkJumpGC dflags e actuals updfr_off = - lastWithArgs dflags Jump Old GC actuals updfr_off $ +mkDirectJump dflags e actuals updfr_off = + lastWithArgs dflags Jump Old NativeDirectCall actuals updfr_off $ toCall e Nothing updfr_off 0 mkForeignJump :: DynFlags @@ -213,7 +223,7 @@ mkForeignJump dflags conv e actuals updfr_off = mkForeignJumpExtra dflags conv e actuals updfr_off noExtraStack mkForeignJumpExtra :: DynFlags -> Convention -> CmmExpr -> [CmmActual] - -> UpdFrameOffset -> (ByteOff, [(CmmExpr, ByteOff)]) + -> UpdFrameOffset -> [CmmActual] -> CmmAGraph mkForeignJumpExtra dflags conv e actuals updfr_off extra_stack = lastWithArgsAndExtraStack dflags Jump Old conv actuals updfr_off extra_stack $ @@ -231,6 +241,11 @@ mkReturn dflags e actuals updfr_off = lastWithArgs dflags Ret Old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0 +mkReturnSimple :: DynFlags -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkReturnSimple dflags actuals updfr_off = + mkReturn dflags e actuals updfr_off + where e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags) + mkBranch :: BlockId -> CmmAGraph mkBranch bid = mkLast (CmmBranch bid) @@ -245,7 +260,7 @@ mkCallReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmActual] -> BlockId -> ByteOff -> UpdFrameOffset - -> (ByteOff, [(CmmExpr,ByteOff)]) + -> [CmmActual] -> CmmAGraph mkCallReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off extra_stack = do lastWithArgsAndExtraStack dflags Call (Young ret_lbl) callConv actuals @@ -282,39 +297,40 @@ stackStubExpr :: Width -> CmmExpr stackStubExpr w = CmmLit (CmmInt 0 w) -- When we copy in parameters, we usually want to put overflow --- parameters on the stack, but sometimes we want to pass --- the variables in their spill slots. --- Therefore, for copying arguments and results, we provide different --- functions to pass the arguments in an overflow area and to pass them in spill slots. -copyInOflow :: DynFlags -> Convention -> Area -> [CmmFormal] +-- parameters on the stack, but sometimes we want to pass the +-- variables in their spill slots. Therefore, for copying arguments +-- and results, we provide different functions to pass the arguments +-- in an overflow area and to pass them in spill slots. +copyInOflow :: DynFlags -> Convention -> Area + -> [CmmFormal] + -> [CmmFormal] -> (Int, CmmAGraph) -copyInOflow dflags conv area formals = (offset, catAGraphs $ map mkMiddle nodes) - where (offset, nodes) = copyIn dflags oneCopyOflowI conv area formals - -type SlotCopier = Area -> (LocalReg, ByteOff) -> (ByteOff, [CmmNode O O]) -> - (ByteOff, [CmmNode O O]) -type CopyIn = DynFlags -> SlotCopier -> Convention -> Area -> [CmmFormal] -> (ByteOff, [CmmNode O O]) +copyInOflow dflags conv area formals extra_stk + = (offset, catAGraphs $ map mkMiddle nodes) + where (offset, nodes) = copyIn dflags conv area formals extra_stk -- Return the number of bytes used for copying arguments, as well as the -- instructions to copy the arguments. -copyIn :: CopyIn -copyIn dflags oflow conv area formals = - foldr ci (init_offset, []) args' - where ci (reg, RegisterParam r) (n, ms) = - (n, CmmAssign (CmmLocal reg) (CmmReg $ CmmGlobal r) : ms) - ci (r, StackParam off) (n, ms) = oflow area (r, off) (n, ms) - init_offset = widthInBytes (wordWidth dflags) -- infotable - args = assignArgumentsPos dflags conv localRegType formals - args' = foldl adjust [] args - where adjust rst (v, StackParam off) = (v, StackParam (off + init_offset)) : rst - adjust rst x@(_, RegisterParam _) = x : rst - --- Copy-in one arg, using overflow space if needed. -oneCopyOflowI :: SlotCopier -oneCopyOflowI area (reg, off) (n, ms) = - (max n off, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty) : ms) - where ty = localRegType reg +copyIn :: DynFlags -> Convention -> Area + -> [CmmFormal] + -> [CmmFormal] + -> (ByteOff, [CmmNode O O]) +copyIn dflags conv area formals extra_stk + = (stk_size, map ci (stk_args ++ args)) + where + ci (reg, RegisterParam r) = + CmmAssign (CmmLocal reg) (CmmReg (CmmGlobal r)) + ci (reg, StackParam off) = + CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty) + where ty = localRegType reg + + init_offset = widthInBytes (wordWidth dflags) -- infotable + + (stk_off, stk_args) = assignStack dflags init_offset localRegType extra_stk + + (stk_size, args) = assignArgumentsPos dflags stk_off conv + localRegType formals -- Factoring out the common parts of the copyout functions yielded something -- more complicated: @@ -323,7 +339,7 @@ data Transfer = Call | JumpRet | Jump | Ret deriving Eq copyOutOflow :: DynFlags -> Convention -> Transfer -> Area -> [CmmActual] -> UpdFrameOffset - -> (ByteOff, [(CmmExpr,ByteOff)]) -- extra stack stuff + -> [CmmActual] -- extra stack args -> (Int, [GlobalReg], CmmAGraph) -- Generate code to move the actual parameters into the locations @@ -335,22 +351,20 @@ copyOutOflow :: DynFlags -> Convention -> Transfer -> Area -> [CmmActual] -- the info table for return and adjust the offsets of the other -- parameters. If this is a call instruction, we adjust the offsets -- of the other parameters. -copyOutOflow dflags conv transfer area actuals updfr_off - (extra_stack_off, extra_stack_stuff) - = foldr co (init_offset, [], mkNop) (args' ++ stack_params) - where - co (v, RegisterParam r) (n, rs, ms) - = (n, r:rs, mkAssign (CmmGlobal r) v <*> ms) - co (v, StackParam off) (n, rs, ms) - = (max n off, rs, mkStore (CmmStackSlot area off) v <*> ms) - - stack_params = [ (e, StackParam (off + init_offset)) - | (e,off) <- extra_stack_stuff ] +copyOutOflow dflags conv transfer area actuals updfr_off extra_stack_stuff + = (stk_size, regs, graph) + where + (regs, graph) = foldr co ([], mkNop) (setRA ++ args ++ stack_params) + + co (v, RegisterParam r) (rs, ms) + = (r:rs, mkAssign (CmmGlobal r) v <*> ms) + co (v, StackParam off) (rs, ms) + = (rs, mkStore (CmmStackSlot area off) v <*> ms) (setRA, init_offset) = case area of - Young id -> id `seq` -- Generate a store instruction for - -- the return address if making a call + Young id -> -- Generate a store instruction for + -- the return address if making a call case transfer of Call -> ([(CmmLit (CmmBlock id), StackParam init_offset)], @@ -362,19 +376,19 @@ copyOutOflow dflags conv transfer area actuals updfr_off ([], 0) Old -> ([], updfr_off) - arg_offset = init_offset + extra_stack_off + (extra_stack_off, stack_params) = + assignStack dflags init_offset (cmmExprType dflags) extra_stack_stuff args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it - args = assignArgumentsPos dflags conv (cmmExprType dflags) actuals - - args' = foldl adjust setRA args - where adjust rst (v, StackParam off) = (v, StackParam (off + arg_offset)) : rst - adjust rst x@(_, RegisterParam _) = x : rst + (stk_size, args) = assignArgumentsPos dflags extra_stack_off conv + (cmmExprType dflags) actuals -mkCallEntry :: DynFlags -> Convention -> [CmmFormal] -> (Int, CmmAGraph) -mkCallEntry dflags conv formals = copyInOflow dflags conv Old formals +mkCallEntry :: DynFlags -> Convention -> [CmmFormal] -> [CmmFormal] + -> (Int, CmmAGraph) +mkCallEntry dflags conv formals extra_stk + = copyInOflow dflags conv Old formals extra_stk lastWithArgs :: DynFlags -> Transfer -> Area -> Convention -> [CmmActual] -> UpdFrameOffset @@ -386,7 +400,7 @@ lastWithArgs dflags transfer area conv actuals updfr_off last = lastWithArgsAndExtraStack :: DynFlags -> Transfer -> Area -> Convention -> [CmmActual] - -> UpdFrameOffset -> (ByteOff, [(CmmExpr,ByteOff)]) + -> UpdFrameOffset -> [CmmActual] -> (ByteOff -> [GlobalReg] -> CmmAGraph) -> CmmAGraph lastWithArgsAndExtraStack dflags transfer area conv actuals updfr_off @@ -397,8 +411,8 @@ lastWithArgsAndExtraStack dflags transfer area conv actuals updfr_off updfr_off extra_stack -noExtraStack :: (ByteOff, [(CmmExpr,ByteOff)]) -noExtraStack = (0,[]) +noExtraStack :: [CmmActual] +noExtraStack = [] toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff -> [GlobalReg] diff --git a/compiler/cmm/OldCmm.hs b/compiler/cmm/OldCmm.hs index 05aa5fb811..3d0599b7ea 100644 --- a/compiler/cmm/OldCmm.hs +++ b/compiler/cmm/OldCmm.hs @@ -16,7 +16,7 @@ module OldCmm ( GenBasicBlock(..), CmmBasicBlock, blockId, blockStmts, mapBlockStmts, - CmmStmt(..), CmmReturnInfo(..), CmmHinted(..), + CmmStmt(..), New.CmmReturnInfo(..), CmmHinted(..), HintedCmmFormal, HintedCmmActual, CmmSafety(..), CmmCallTarget(..), @@ -120,11 +120,6 @@ cmmTopMapGraph :: (g -> g') -> GenCmmDecl d h g -> GenCmmDecl d h g' cmmTopMapGraph f (CmmProc h l g) = CmmProc h l (f g) cmmTopMapGraph _ (CmmData s ds) = CmmData s ds -data CmmReturnInfo - = CmmMayReturn - | CmmNeverReturns - deriving ( Eq ) - ----------------------------------------------------------------------------- -- CmmStmt -- A "statement". Note that all branches are explicit: there are no @@ -145,7 +140,7 @@ data CmmStmt CmmCallTarget [HintedCmmFormal] -- zero or more results [HintedCmmActual] -- zero or more arguments - CmmReturnInfo + New.CmmReturnInfo -- Some care is necessary when handling the arguments of these, see -- [Register parameter passing] and the hack in cmm/CmmOpt.hs diff --git a/compiler/cmm/OldPprCmm.hs b/compiler/cmm/OldPprCmm.hs index a3857d4e47..dcde86e37c 100644 --- a/compiler/cmm/OldPprCmm.hs +++ b/compiler/cmm/OldPprCmm.hs @@ -111,12 +111,8 @@ pprStmt stmt = case stmt of pp_lhs | null results = empty | otherwise = commafy (map ppr_ar results) <+> equals -- Don't print the hints on a native C-- call - ppr_ar (CmmHinted ar k) = case cconv of - CmmCallConv -> ppr ar - _ -> ppr (ar,k) - pp_conv = case cconv of - CmmCallConv -> empty - _ -> ptext (sLit("foreign")) <+> doubleQuotes (ppr cconv) + ppr_ar (CmmHinted ar k) = ppr (ar,k) + pp_conv = ptext (sLit("foreign")) <+> doubleQuotes (ppr cconv) -- Call a CallishMachOp, like sin or cos that might be implemented as a library call. CmmCall (CmmPrim op _) results args ret -> diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 1a3eb0d716..a2427df868 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -865,7 +865,6 @@ is_cishCC :: CCallConv -> Bool is_cishCC CCallConv = True is_cishCC CApiConv = True is_cishCC StdCallConv = True -is_cishCC CmmCallConv = False is_cishCC PrimCallConv = False -- --------------------------------------------------------------------- diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs index 423bcd5504..f3e2a02737 100644 --- a/compiler/cmm/PprCmm.hs +++ b/compiler/cmm/PprCmm.hs @@ -75,6 +75,8 @@ instance Outputable ForeignConvention where instance Outputable ForeignTarget where ppr = pprForeignTarget +instance Outputable CmmReturnInfo where + ppr = pprReturnInfo instance Outputable (Block CmmNode C C) where ppr = pprBlock @@ -145,17 +147,18 @@ pprConvention (NativeDirectCall {}) = text "<native-direct-call-convention>" pprConvention (NativeReturn {}) = text "<native-ret-convention>" pprConvention Slow = text "<slow-convention>" pprConvention GC = text "<gc-convention>" -pprConvention PrimOpCall = text "<primop-call-convention>" -pprConvention PrimOpReturn = text "<primop-ret-convention>" pprForeignConvention :: ForeignConvention -> SDoc -pprForeignConvention (ForeignConvention c as rs) = ppr c <> ppr as <> ppr rs +pprForeignConvention (ForeignConvention c args res ret) = + doubleQuotes (ppr c) <+> text "arg hints: " <+> ppr args <+> text " result hints: " <+> ppr res <+> ppr ret + +pprReturnInfo :: CmmReturnInfo -> SDoc +pprReturnInfo CmmMayReturn = empty +pprReturnInfo CmmNeverReturns = ptext (sLit "never returns") pprForeignTarget :: ForeignTarget -> SDoc -pprForeignTarget (ForeignTarget fn c) = ppr_fc c <+> ppr_target fn - where ppr_fc :: ForeignConvention -> SDoc - ppr_fc (ForeignConvention c args res) = - doubleQuotes (ppr c) <+> text "arg hints: " <+> ppr args <+> text " result hints: " <+> ppr res +pprForeignTarget (ForeignTarget fn c) = ppr c <+> ppr_target fn + where ppr_target :: CmmExpr -> SDoc ppr_target t@(CmmLit _) = ppr t ppr_target fn' = parens (ppr fn') diff --git a/compiler/cmm/SMRep.lhs b/compiler/cmm/SMRep.lhs index d9644488fc..ac021df761 100644 --- a/compiler/cmm/SMRep.lhs +++ b/compiler/cmm/SMRep.lhs @@ -30,6 +30,7 @@ module SMRep ( -- ** Predicates isStaticRep, isConRep, isThunkRep, isFunRep, isStaticNoCafCon, + isStackRep, -- ** Size-related things heapClosureSize, @@ -148,7 +149,7 @@ data SMRep Liveness | RTSRep -- The RTS needs to declare info tables with specific - StgHalfWord -- type tags, so this form lets us override the default + Int -- type tags, so this form lets us override the default SMRep -- tag for an SMRep. -- | True <=> This is a static closure. Affects how we garbage-collect it. @@ -166,10 +167,10 @@ data ClosureTypeInfo | ThunkSelector SelectorOffset | BlackHole -type ConstrTag = StgHalfWord +type ConstrTag = Int type ConstrDescription = [Word8] -- result of dataConIdentity -type FunArity = StgHalfWord -type SelectorOffset = StgWord +type FunArity = Int +type SelectorOffset = Int ------------------------- -- We represent liveness bitmaps as a Bitmap (whose internal @@ -188,7 +189,7 @@ type Liveness = [Bool] -- One Bool per word; True <=> non-ptr or dead data ArgDescr = ArgSpec -- Fits one of the standard patterns - !StgHalfWord -- RTS type identifier ARG_P, ARG_N, ... + !Int -- RTS type identifier ARG_P, ARG_N, ... | ArgGen -- General case Liveness -- Details about the arguments @@ -212,7 +213,7 @@ mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type_info hdr_size = closureTypeHdrSize dflags cl_type_info payload_size = ptr_wds + nonptr_wds -mkRTSRep :: StgHalfWord -> SMRep -> SMRep +mkRTSRep :: Int -> SMRep -> SMRep mkRTSRep = RTSRep mkStackRep :: [Bool] -> SMRep @@ -229,6 +230,11 @@ isStaticRep (HeapRep is_static _ _ _) = is_static isStaticRep (StackRep {}) = False isStaticRep (RTSRep _ rep) = isStaticRep rep +isStackRep :: SMRep -> Bool +isStackRep StackRep{} = True +isStackRep (RTSRep _ rep) = isStackRep rep +isStackRep _ = False + isConRep :: SMRep -> Bool isConRep (HeapRep _ _ _ Constr{}) = True isConRep _ = False @@ -314,11 +320,10 @@ closureTypeHdrSize dflags ty = case ty of -- Defines CONSTR, CONSTR_1_0 etc -- | Derives the RTS closure type from an 'SMRep' -rtsClosureType :: DynFlags -> SMRep -> StgHalfWord -rtsClosureType dflags rep - = toStgHalfWord dflags - $ case rep of - RTSRep ty _ -> fromStgHalfWord ty +rtsClosureType :: SMRep -> Int +rtsClosureType rep + = case rep of + RTSRep ty _ -> ty HeapRep False 1 0 Constr{} -> CONSTR_1_0 HeapRep False 0 1 Constr{} -> CONSTR_0_1 @@ -355,11 +360,11 @@ rtsClosureType dflags rep _ -> panic "rtsClosureType" -- We export these ones -rET_SMALL, rET_BIG, aRG_GEN, aRG_GEN_BIG :: DynFlags -> StgHalfWord -rET_SMALL dflags = toStgHalfWord dflags RET_SMALL -rET_BIG dflags = toStgHalfWord dflags RET_BIG -aRG_GEN dflags = toStgHalfWord dflags ARG_GEN -aRG_GEN_BIG dflags = toStgHalfWord dflags ARG_GEN_BIG +rET_SMALL, rET_BIG, aRG_GEN, aRG_GEN_BIG :: Int +rET_SMALL = RET_SMALL +rET_BIG = RET_BIG +aRG_GEN = ARG_GEN +aRG_GEN_BIG = ARG_GEN_BIG \end{code} Note [Static NoCaf constructors] diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs index 1f5b711d86..d548741e1f 100644 --- a/compiler/codeGen/CgCallConv.hs +++ b/compiler/codeGen/CgCallConv.hs @@ -70,7 +70,7 @@ mkArgDescr _nm args let arg_bits = argBits dflags arg_reps arg_reps = filter nonVoidArg (map idCgRep args) -- Getting rid of voids eases matching of standard patterns - case stdPattern dflags arg_reps of + case stdPattern arg_reps of Just spec_id -> return (ArgSpec spec_id) Nothing -> return (ArgGen arg_bits) @@ -79,10 +79,9 @@ argBits _ [] = [] argBits dflags (PtrArg : args) = False : argBits dflags args argBits dflags (arg : args) = take (cgRepSizeW dflags arg) (repeat True) ++ argBits dflags args -stdPattern :: DynFlags -> [CgRep] -> Maybe StgHalfWord -stdPattern dflags reps - = fmap (toStgHalfWord dflags) - $ case reps of +stdPattern :: [CgRep] -> Maybe Int +stdPattern reps + = case reps of [] -> Just ARG_NONE -- just void args, probably [PtrArg] -> Just ARG_P diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs index aeb87235e3..858de3a616 100644 --- a/compiler/codeGen/CgCon.lhs +++ b/compiler/codeGen/CgCon.lhs @@ -189,7 +189,7 @@ buildDynCon' dflags platform binder _ con [arg_amode] , (_, CmmLit (CmmInt val _)) <- arg_amode , let val_int = (fromIntegral val) :: Int , val_int <= mAX_INTLIKE dflags && val_int >= mIN_INTLIKE dflags - = do { let intlike_lbl = mkCmmGcPtrLabel rtsPackageId (fsLit "stg_INTLIKE_closure") + = do { let intlike_lbl = mkCmmClosureLabel rtsPackageId (fsLit "stg_INTLIKE") offsetW = (val_int - mIN_INTLIKE dflags) * (fixedHdrSize dflags + 1) -- INTLIKE closures consist of a header and one word payload intlike_amode = CmmLit (cmmLabelOffW dflags intlike_lbl offsetW) @@ -201,7 +201,7 @@ buildDynCon' dflags platform binder _ con [arg_amode] , (_, CmmLit (CmmInt val _)) <- arg_amode , let val_int = (fromIntegral val) :: Int , val_int <= mAX_CHARLIKE dflags && val_int >= mIN_CHARLIKE dflags - = do { let charlike_lbl = mkCmmGcPtrLabel rtsPackageId (fsLit "stg_CHARLIKE_closure") + = do { let charlike_lbl = mkCmmClosureLabel rtsPackageId (fsLit "stg_CHARLIKE") offsetW = (val_int - mIN_CHARLIKE dflags) * (fixedHdrSize dflags + 1) -- CHARLIKE closures consist of a header and one word payload charlike_amode = CmmLit (cmmLabelOffW dflags charlike_lbl offsetW) diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs index 965abf0db8..8cff77381d 100644 --- a/compiler/codeGen/CgHeapery.lhs +++ b/compiler/codeGen/CgHeapery.lhs @@ -415,7 +415,7 @@ unbxTupleHeapCheck regs ptrs nptrs fail_code code { dflags <- getDynFlags ; let full_fail_code = fail_code `plusStmts` oneStmt assign_liveness assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9 VNonGcPtr)) -- Ho ho ho! - (CmmLit (mkWordCLit dflags liveness)) + (CmmLit (mkStgWordCLit dflags liveness)) liveness = mkRegLiveness dflags regs ptrs nptrs live = Just $ map snd regs rts_label = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit "stg_gc_ut"))) diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs index c124b5f68a..03e01b332a 100644 --- a/compiler/codeGen/CgProf.hs +++ b/compiler/codeGen/CgProf.hs @@ -258,7 +258,7 @@ dynLdvInit :: DynFlags -> CmmExpr dynLdvInit dflags = -- (era << LDV_SHIFT) | LDV_STATE_CREATE CmmMachOp (mo_wordOr dflags) [ CmmMachOp (mo_wordShl dflags) [loadEra dflags, mkIntExpr dflags (lDV_SHIFT dflags)], - CmmLit (mkWordCLit dflags (lDV_STATE_CREATE dflags)) + CmmLit (mkWordCLit dflags (iLDV_STATE_CREATE dflags)) ] -- @@ -289,8 +289,8 @@ ldvEnter cl_ptr = do -- don't forget to substract node's tag ldv_wd = ldvWord dflags cl_ptr new_ldv_wd = cmmOrWord dflags (cmmAndWord dflags (CmmLoad ldv_wd (bWord dflags)) - (CmmLit (mkWordCLit dflags (lDV_CREATE_MASK dflags)))) - (cmmOrWord dflags (loadEra dflags) (CmmLit (mkWordCLit dflags (lDV_STATE_USE dflags)))) + (CmmLit (mkWordCLit dflags (iLDV_CREATE_MASK dflags)))) + (cmmOrWord dflags (loadEra dflags) (CmmLit (mkWordCLit dflags (iLDV_STATE_USE dflags)))) ifProfiling $ -- if (era > 0) { -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | @@ -308,10 +308,3 @@ ldvWord :: DynFlags -> CmmExpr -> CmmExpr ldvWord dflags closure_ptr = cmmOffsetB dflags closure_ptr (oFFSET_StgHeader_ldvw dflags) -lDV_CREATE_MASK :: DynFlags -> StgWord -lDV_CREATE_MASK dflags = toStgWord dflags (iLDV_CREATE_MASK dflags) -lDV_STATE_CREATE :: DynFlags -> StgWord -lDV_STATE_CREATE dflags = toStgWord dflags (iLDV_STATE_CREATE dflags) -lDV_STATE_USE :: DynFlags -> StgWord -lDV_STATE_USE dflags = toStgWord dflags (iLDV_STATE_USE dflags) - diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index 3a106abfb4..9f9a2cfe26 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -800,8 +800,8 @@ getSRTInfo = do let srt_desc_lbl = mkLargeSRTLabel id emitRODataLits "getSRTInfo" srt_desc_lbl ( cmmLabelOffW dflags srt_lbl off - : mkWordCLit dflags (toStgWord dflags (toInteger len)) - : map (mkWordCLit dflags) bmp) + : mkWordCLit dflags (toInteger len) + : map (mkWordCLit dflags . fromStgWord) bmp) return (C_SRT srt_desc_lbl 0 (srt_escape dflags)) | otherwise diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs index 740bfab845..f2cbc21d27 100644 --- a/compiler/codeGen/ClosureInfo.lhs +++ b/compiler/codeGen/ClosureInfo.lhs @@ -480,7 +480,7 @@ mkClosureInfo dflags is_static id lf_info tot_wds ptr_wds srt_info descr -- anything else gets eta expanded. where name = idName id - sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType dflags lf_info) + sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType lf_info) nonptr_wds = tot_wds - ptr_wds mkConInfo :: DynFlags @@ -492,7 +492,7 @@ mkConInfo dflags is_static data_con tot_wds ptr_wds = ConInfo { closureSMRep = sm_rep, closureCon = data_con } where - sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType dflags lf_info) + sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType lf_info) lf_info = mkConLFInfo data_con nonptr_wds = tot_wds - ptr_wds \end{code} @@ -526,16 +526,16 @@ closureNeedsUpdSpace cl_info = closureUpdReqd cl_info %************************************************************************ \begin{code} -lfClosureType :: DynFlags -> LambdaFormInfo -> ClosureTypeInfo -lfClosureType dflags (LFReEntrant _ arity _ argd) = Fun (toStgHalfWord dflags (toInteger arity)) argd -lfClosureType dflags (LFCon con) = Constr (toStgHalfWord dflags (toInteger (dataConTagZ con))) - (dataConIdentity con) -lfClosureType dflags (LFThunk _ _ _ is_sel _) = thunkClosureType dflags is_sel -lfClosureType _ _ = panic "lfClosureType" - -thunkClosureType :: DynFlags -> StandardFormInfo -> ClosureTypeInfo -thunkClosureType dflags (SelectorThunk off) = ThunkSelector (toStgWord dflags (toInteger off)) -thunkClosureType _ _ = Thunk +lfClosureType :: LambdaFormInfo -> ClosureTypeInfo +lfClosureType (LFReEntrant _ arity _ argd) = Fun arity argd +lfClosureType (LFCon con) = Constr (dataConTagZ con) + (dataConIdentity con) +lfClosureType (LFThunk _ _ _ is_sel _) = thunkClosureType is_sel +lfClosureType _ = panic "lfClosureType" + +thunkClosureType :: StandardFormInfo -> ClosureTypeInfo +thunkClosureType (SelectorThunk off) = ThunkSelector off +thunkClosureType _ = Thunk -- We *do* get non-updatable top-level thunks sometimes. eg. f = g -- gets compiled to a jump to g (if g has non-zero arity), instead of diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs index 37ca5e0d43..67aae3f6c0 100644 --- a/compiler/codeGen/StgCmm.hs +++ b/compiler/codeGen/StgCmm.hs @@ -245,7 +245,7 @@ cgDataCon data_con arg_reps = [(typePrimRep rep_ty, rep_ty) | ty <- dataConRepArgTys data_con, rep_ty <- flattenRepType (repType ty)] -- Dynamic closure code for non-nullary constructors only - ; whenC (not (isNullaryRepDataCon data_con)) + ; when (not (isNullaryRepDataCon data_con)) (emit_info dyn_info_tbl tickyEnterDynCon) -- Dynamic-Closure first, to reduce forward references diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 89d27dd161..5e46dcfd65 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -10,7 +10,7 @@ module StgCmmBind ( cgTopRhsClosure, cgBind, emitBlackHoleCode, - pushUpdateFrame + pushUpdateFrame, emitUpdateFrame ) where #include "HsVersions.h" @@ -37,7 +37,6 @@ import CLabel import StgSyn import CostCentre import Id -import Control.Monad import Name import Module import ListSetOps @@ -48,6 +47,8 @@ import FastString import Maybes import DynFlags +import Control.Monad + ------------------------------------------------------------------------ -- Top-level bindings ------------------------------------------------------------------------ @@ -460,7 +461,7 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details (CmmMachOp (mo_wordSub dflags) [ CmmReg nodeReg , mkIntExpr dflags (funTag dflags cl_info) ]) - ; whenC node_points (ldvEnterClosure cl_info) + ; when node_points (ldvEnterClosure cl_info) ; granYield arg_regs node_points -- Main payload @@ -525,8 +526,8 @@ thunkCode cl_info fv_details _cc node arity body ; entryHeapCheck cl_info node' arity [] $ do { -- Overwrite with black hole if necessary -- but *after* the heap-overflow check - ; whenC (blackHoleOnEntry cl_info && node_points) - (blackHoleIt cl_info) + ; when (blackHoleOnEntry cl_info && node_points) + (blackHoleIt cl_info node) -- Push update frame ; setupUpdate cl_info node $ @@ -545,13 +546,14 @@ thunkCode cl_info fv_details _cc node arity body -- Update and black-hole wrappers ------------------------------------------------------------------------ -blackHoleIt :: ClosureInfo -> FCode () +blackHoleIt :: ClosureInfo -> LocalReg -> FCode () -- Only called for closures with no args -- Node points to the closure -blackHoleIt closure_info = emitBlackHoleCode (closureSingleEntry closure_info) +blackHoleIt closure_info node + = emitBlackHoleCode (closureSingleEntry closure_info) (CmmReg (CmmLocal node)) -emitBlackHoleCode :: Bool -> FCode () -emitBlackHoleCode is_single_entry = do +emitBlackHoleCode :: Bool -> CmmExpr -> FCode () +emitBlackHoleCode is_single_entry node = do dflags <- getDynFlags -- Eager blackholing is normally disabled, but can be turned on with @@ -578,12 +580,12 @@ emitBlackHoleCode is_single_entry = do -- profiling), so currently eager blackholing doesn't -- work with profiling. - whenC eager_blackholing $ do + when eager_blackholing $ do tickyBlackHole (not is_single_entry) - emitStore (cmmOffsetW dflags (CmmReg nodeReg) (fixedHdrSize dflags)) + emitStore (cmmOffsetW dflags node (fixedHdrSize dflags)) (CmmReg (CmmGlobal CurrentTSO)) emitPrimCall [] MO_WriteBarrier [] - emitStore (CmmReg nodeReg) (CmmReg (CmmGlobal EagerBlackholeInfo)) + emitStore node (CmmReg (CmmGlobal EagerBlackholeInfo)) setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode () -- Nota Bene: this function does not change Node (even if it's a CAF), @@ -634,13 +636,20 @@ pushUpdateFrame lbl updatee body let hdr = fixedHdrSize dflags * wORD_SIZE dflags frame = updfr + hdr + sIZEOF_StgUpdateFrame_NoHdr dflags - off_updatee = hdr + oFFSET_StgUpdateFrame_updatee dflags -- - emitStore (CmmStackSlot Old frame) (mkLblExpr lbl) - emitStore (CmmStackSlot Old (frame - off_updatee)) updatee - initUpdFrameProf frame + emitUpdateFrame dflags (CmmStackSlot Old frame) lbl updatee withUpdFrameOff frame body +emitUpdateFrame :: DynFlags -> CmmExpr -> CLabel -> CmmExpr -> FCode () +emitUpdateFrame dflags frame lbl updatee = do + let + hdr = fixedHdrSize dflags * wORD_SIZE dflags + off_updatee = hdr + oFFSET_StgUpdateFrame_updatee dflags + -- + emitStore frame (mkLblExpr lbl) + emitStore (cmmOffset dflags frame off_updatee) updatee + initUpdFrameProf frame + ----------------------------------------------------------------------------- -- Entering a CAF -- diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs index 4be5bd3d0c..f865c37ad8 100644 --- a/compiler/codeGen/StgCmmClosure.hs +++ b/compiler/codeGen/StgCmmClosure.hs @@ -353,16 +353,16 @@ isLFReEntrant _ = False -- Choosing SM reps ----------------------------------------------------------------------------- -lfClosureType :: DynFlags -> LambdaFormInfo -> ClosureTypeInfo -lfClosureType dflags (LFReEntrant _ arity _ argd) = Fun (toStgHalfWord dflags (toInteger arity)) argd -lfClosureType dflags (LFCon con) = Constr (toStgHalfWord dflags (toInteger (dataConTagZ con))) - (dataConIdentity con) -lfClosureType dflags (LFThunk _ _ _ is_sel _) = thunkClosureType dflags is_sel -lfClosureType _ _ = panic "lfClosureType" +lfClosureType :: LambdaFormInfo -> ClosureTypeInfo +lfClosureType (LFReEntrant _ arity _ argd) = Fun arity argd +lfClosureType (LFCon con) = Constr (dataConTagZ con) + (dataConIdentity con) +lfClosureType (LFThunk _ _ _ is_sel _) = thunkClosureType is_sel +lfClosureType _ = panic "lfClosureType" -thunkClosureType :: DynFlags -> StandardFormInfo -> ClosureTypeInfo -thunkClosureType dflags (SelectorThunk off) = ThunkSelector (toStgWord dflags (toInteger off)) -thunkClosureType _ _ = Thunk +thunkClosureType :: StandardFormInfo -> ClosureTypeInfo +thunkClosureType (SelectorThunk off) = ThunkSelector off +thunkClosureType _ = Thunk -- We *do* get non-updatable top-level thunks sometimes. eg. f = g -- gets compiled to a jump to g (if g has non-zero arity), instead of @@ -373,8 +373,6 @@ thunkClosureType _ _ = Thunk -- nodeMustPointToIt ----------------------------------------------------------------------------- --- Be sure to see the stg-details notes about these... - nodeMustPointToIt :: DynFlags -> LambdaFormInfo -> Bool nodeMustPointToIt _ (LFReEntrant top _ no_fvs _) = not no_fvs || -- Certainly if it has fvs we need to point to it @@ -687,7 +685,7 @@ mkClosureInfo dflags is_static id lf_info tot_wds ptr_wds val_descr closureProf = prof } -- (we don't have an SRT yet) where name = idName id - sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType dflags lf_info) + sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds (lfClosureType lf_info) prof = mkProfilingInfo dflags id val_descr nonptr_wds = tot_wds - ptr_wds @@ -899,8 +897,7 @@ mkDataConInfoTable dflags data_con is_static ptr_wds nonptr_wds sm_rep = mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type - cl_type = Constr (toStgHalfWord dflags (toInteger (dataConTagZ data_con))) - (dataConIdentity data_con) + cl_type = Constr (dataConTagZ data_con) (dataConIdentity data_con) prof | not (dopt Opt_SccProfilingOn dflags) = NoProfilingInfo | otherwise = ProfilingInfo ty_descr val_descr diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs index c822a64e2c..8e775dec51 100644 --- a/compiler/codeGen/StgCmmCon.hs +++ b/compiler/codeGen/StgCmmCon.hs @@ -185,7 +185,7 @@ buildDynCon' dflags platform binder _cc con [arg] , StgLitArg (MachInt val) <- arg , val <= fromIntegral (mAX_INTLIKE dflags) -- Comparisons at type Integer! , val >= fromIntegral (mIN_INTLIKE dflags) -- ...ditto... - = do { let intlike_lbl = mkCmmGcPtrLabel rtsPackageId (fsLit "stg_INTLIKE_closure") + = do { let intlike_lbl = mkCmmClosureLabel rtsPackageId (fsLit "stg_INTLIKE") val_int = fromIntegral val :: Int offsetW = (val_int - mIN_INTLIKE dflags) * (fixedHdrSize dflags + 1) -- INTLIKE closures consist of a header and one word payload @@ -200,7 +200,7 @@ buildDynCon' dflags platform binder _cc con [arg] , let val_int = ord val :: Int , val_int <= mAX_CHARLIKE dflags , val_int >= mIN_CHARLIKE dflags - = do { let charlike_lbl = mkCmmGcPtrLabel rtsPackageId (fsLit "stg_CHARLIKE_closure") + = do { let charlike_lbl = mkCmmClosureLabel rtsPackageId (fsLit "stg_CHARLIKE") offsetW = (val_int - mIN_CHARLIKE dflags) * (fixedHdrSize dflags + 1) -- CHARLIKE closures consist of a header and one word payload charlike_amode = cmmLabelOffW dflags charlike_lbl offsetW diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index a8ffc12bb0..a0859252ff 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -717,12 +717,12 @@ emitEnter fun = do -- AssignTo res_regs _ -> do { lret <- newLabelC - ; let (off, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs + ; let (off, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs [] ; lcall <- newLabelC ; updfr_off <- getUpdFrameOff ; let area = Young lret ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area - [fun] updfr_off (0,[]) + [fun] updfr_off [] -- refer to fun via nodeReg after the copyout, to avoid having -- both live simultaneously; this sometimes enables fun to be -- inlined in the RHS of the R1 assignment. diff --git a/compiler/codeGen/CgExtCode.hs b/compiler/codeGen/StgCmmExtCode.hs index a651319a49..b0608227ae 100644 --- a/compiler/codeGen/CgExtCode.hs +++ b/compiler/codeGen/StgCmmExtCode.hs @@ -9,44 +9,36 @@ -- to collect declarations as we parse the proc, and feed the environment -- back in circularly (to avoid a two-pass algorithm). -{-# OPTIONS -fno-warn-tabs #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and --- detab the module (please do the detabbing in a separate patch). See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces --- for details - -module CgExtCode ( - ExtFCode(..), - ExtCode, - Named(..), Env, +module StgCmmExtCode ( + CmmParse(..), + Named(..), Env, loopDecls, getEnv, newLocal, - newLabel, + newLabel, + newBlockId, newFunctionName, newImport, lookupLabel, lookupName, code, - code2, - nopEC, - stmtEC, - stmtsEC, - getCgStmtsEC, - getCgStmtsEC', - forkLabelledCodeEC + emit, emitLabel, emitAssign, emitStore, + getCode, getCodeR, + emitOutOfLine, + withUpdFrameOff, getUpdFrameOff ) where -import CgMonad +import qualified StgCmmMonad as F +import StgCmmMonad (FCode, newUnique) +import Cmm import CLabel -import OldCmm hiding( ClosureTypeInfo(..) ) +import MkGraph -- import BasicTypes import BlockId @@ -73,22 +65,22 @@ type Decls = [(FastString,Named)] -- | Does a computation in the FCode monad, with a current environment -- and a list of local declarations. Returns the resulting list of declarations. -newtype ExtFCode a +newtype CmmParse a = EC { unEC :: Env -> Decls -> FCode (Decls, a) } -type ExtCode = ExtFCode () +type ExtCode = CmmParse () -returnExtFC :: a -> ExtFCode a +returnExtFC :: a -> CmmParse a returnExtFC a = EC $ \_ s -> return (s, a) -thenExtFC :: ExtFCode a -> (a -> ExtFCode b) -> ExtFCode b +thenExtFC :: CmmParse a -> (a -> CmmParse b) -> CmmParse b thenExtFC (EC m) k = EC $ \e s -> do (s',r) <- m e s; unEC (k r) e s' -instance Monad ExtFCode where +instance Monad CmmParse where (>>=) = thenExtFC return = returnExtFC -instance HasDynFlags ExtFCode where +instance HasDynFlags CmmParse where getDynFlags = EC (\_ d -> do dflags <- getDynFlags return (d, dflags)) @@ -99,15 +91,15 @@ instance HasDynFlags ExtFCode where -- procedure, and imports that scope over the entire module. -- Discards the local declaration contained within decl' -- -loopDecls :: ExtFCode a -> ExtFCode a +loopDecls :: CmmParse a -> CmmParse a loopDecls (EC fcode) = EC $ \e globalDecls -> do - (_, a) <- fixC (\ ~(decls, _) -> fcode (addListToUFM e (decls ++ globalDecls)) globalDecls) + (_, a) <- F.fixC (\ ~(decls, _) -> fcode (addListToUFM e (decls ++ globalDecls)) globalDecls) return (globalDecls, a) -- | Get the current environment from the monad. -getEnv :: ExtFCode Env +getEnv :: CmmParse Env getEnv = EC $ \e s -> return (s, e) @@ -127,7 +119,7 @@ addLabel name block_id newLocal :: CmmType -- ^ data type -> FastString -- ^ name of variable - -> ExtFCode LocalReg -- ^ register holding the value + -> CmmParse LocalReg -- ^ register holding the value newLocal ty name = do u <- code newUnique @@ -137,12 +129,14 @@ newLocal ty name = do -- | Allocate a fresh label. -newLabel :: FastString -> ExtFCode BlockId +newLabel :: FastString -> CmmParse BlockId newLabel name = do u <- code newUnique addLabel name (mkBlockId u) return (mkBlockId u) +newBlockId :: CmmParse BlockId +newBlockId = code F.newLabelC -- | Add add a local function to the environment. newFunctionName @@ -159,7 +153,7 @@ newFunctionName name pkg -- over the whole module. newImport :: (FastString, CLabel) - -> ExtFCode () + -> CmmParse () newImport (name, cmmLabel) = addVarDecl name (CmmLit (CmmLabel cmmLabel)) @@ -168,7 +162,7 @@ newImport (name, cmmLabel) -- | Lookup the BlockId bound to the label with this name. -- If one hasn't been bound yet, create a fresh one based on the -- Unique of the name. -lookupLabel :: FastString -> ExtFCode BlockId +lookupLabel :: FastString -> CmmParse BlockId lookupLabel name = do env <- getEnv return $ @@ -181,7 +175,7 @@ lookupLabel name = do -- Unknown names are treated as if they had been 'import'ed from the runtime system. -- This saves us a lot of bother in the RTS sources, at the expense of -- deferring some errors to link time. -lookupName :: FastString -> ExtFCode CmmExpr +lookupName :: FastString -> CmmParse CmmExpr lookupName name = do env <- getEnv return $ @@ -191,51 +185,40 @@ lookupName name = do _other -> CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name)) --- | Lift an FCode computation into the ExtFCode monad -code :: FCode a -> ExtFCode a +-- | Lift an FCode computation into the CmmParse monad +code :: FCode a -> CmmParse a code fc = EC $ \_ s -> do r <- fc return (s, r) +emit :: CmmAGraph -> CmmParse () +emit = code . F.emit -code2 :: (FCode (Decls,b) -> FCode ((Decls,b),c)) -> ExtFCode b -> ExtFCode c -code2 f (EC ec) - = EC $ \e s -> do - ((s', _),c) <- f (ec e s) - return (s',c) +emitLabel :: BlockId -> CmmParse () +emitLabel = code. F.emitLabel +emitAssign :: CmmReg -> CmmExpr -> CmmParse () +emitAssign l r = code (F.emitAssign l r) --- | Do nothing in the ExtFCode monad. -nopEC :: ExtFCode () -nopEC = code nopC +emitStore :: CmmExpr -> CmmExpr -> CmmParse () +emitStore l r = code (F.emitStore l r) +getCode :: CmmParse a -> CmmParse CmmAGraph +getCode (EC ec) = EC $ \e s -> do + ((s',_), gr) <- F.getCodeR (ec e s) + return (s', gr) --- | Accumulate a CmmStmt into the monad state. -stmtEC :: CmmStmt -> ExtFCode () -stmtEC stmt = code (stmtC stmt) +getCodeR :: CmmParse a -> CmmParse (a, CmmAGraph) +getCodeR (EC ec) = EC $ \e s -> do + ((s', r), gr) <- F.getCodeR (ec e s) + return (s', (r,gr)) +emitOutOfLine :: BlockId -> CmmAGraph -> CmmParse () +emitOutOfLine l g = code (F.emitOutOfLine l g) --- | Accumulate some CmmStmts into the monad state. -stmtsEC :: [CmmStmt] -> ExtFCode () -stmtsEC stmts = code (stmtsC stmts) - - --- | Get the generated statements out of the monad state. -getCgStmtsEC :: ExtFCode a -> ExtFCode CgStmts -getCgStmtsEC = code2 getCgStmts' - - --- | Get the generated statements, and the return value out of the monad state. -getCgStmtsEC' :: ExtFCode a -> ExtFCode (a, CgStmts) -getCgStmtsEC' = code2 (\m -> getCgStmts' m >>= f) - where f ((decl, b), c) = return ((decl, b), (b, c)) - - --- | Emit a chunk of code outside the instruction stream, --- and return its block id. -forkLabelledCodeEC :: ExtFCode a -> ExtFCode BlockId -forkLabelledCodeEC ec = do - stmts <- getCgStmtsEC ec - code (forkCgStmts stmts) - +withUpdFrameOff :: UpdFrameOffset -> CmmParse () -> CmmParse () +withUpdFrameOff size inner + = EC $ \e s -> F.withUpdFrameOff size $ (unEC inner) e s +getUpdFrameOff :: CmmParse UpdFrameOffset +getUpdFrameOff = code $ F.getUpdFrameOff diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index 9e4db9cdaa..1830f7b6d6 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -9,9 +9,10 @@ module StgCmmForeign ( cgForeignCall, loadThreadState, saveThreadState, emitPrimCall, emitCCall, + emitForeignCall, -- For CmmParse emitSaveThreadState, -- will be needed by the Cmm parser emitLoadThreadState, -- ditto - emitOpenNursery, + emitCloseNursery, emitOpenNursery ) where #include "HsVersions.h" @@ -24,10 +25,8 @@ import StgCmmUtils import StgCmmClosure import StgCmmLayout -import BlockId import Cmm import CmmUtils -import OldCmm ( CmmReturnInfo(..) ) import MkGraph import Type import TysPrim @@ -85,7 +84,7 @@ cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty DynamicTarget -> case cmm_args of (fn,_):rest -> (unzip rest, fn) [] -> panic "cgForeignCall []" - fc = ForeignConvention cconv arg_hints res_hints + fc = ForeignConvention cconv arg_hints res_hints CmmMayReturn call_target = ForeignTarget cmm_target fc -- we want to emit code for the call, and then emitReturn. @@ -100,12 +99,10 @@ cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty ; sequel <- getSequel ; case sequel of AssignTo assign_to_these _ -> - emitForeignCall safety assign_to_these call_target - call_args CmmMayReturn + emitForeignCall safety assign_to_these call_target call_args _something_else -> - do { _ <- emitForeignCall safety res_regs call_target - call_args CmmMayReturn + do { _ <- emitForeignCall safety res_regs call_target call_args ; emitReturn (map (CmmReg . CmmLocal) res_regs) } } @@ -183,17 +180,17 @@ emitCCall :: [(CmmFormal,ForeignHint)] -> [(CmmActual,ForeignHint)] -> FCode () emitCCall hinted_results fn hinted_args - = void $ emitForeignCall PlayRisky results target args CmmMayReturn + = void $ emitForeignCall PlayRisky results target args where (args, arg_hints) = unzip hinted_args (results, result_hints) = unzip hinted_results target = ForeignTarget fn fc - fc = ForeignConvention CCallConv arg_hints result_hints + fc = ForeignConvention CCallConv arg_hints result_hints CmmMayReturn emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode () emitPrimCall res op args - = void $ emitForeignCall PlayRisky res (PrimTarget op) args CmmMayReturn + = void $ emitForeignCall PlayRisky res (PrimTarget op) args -- alternative entry point, used by CmmParse emitForeignCall @@ -201,10 +198,8 @@ emitForeignCall -> [CmmFormal] -- where to put the results -> ForeignTarget -- the op -> [CmmActual] -- arguments - -> CmmReturnInfo -- This can say "never returns" - -- only RTS procedures do this -> FCode ReturnKind -emitForeignCall safety results target args _ret +emitForeignCall safety results target args | not (playSafe safety) = do dflags <- getDynFlags let (caller_save, caller_load) = callerSaveVolatileRegs dflags @@ -218,7 +213,7 @@ emitForeignCall safety results target args _ret updfr_off <- getUpdFrameOff temp_target <- load_target_into_temp target k <- newLabelC - let (off, copyout) = copyInOflow dflags NativeReturn (Young k) results + let (off, copyout) = copyInOflow dflags NativeReturn (Young k) results [] -- see Note [safe foreign call convention] emit $ ( mkStore (CmmStackSlot (Young k) (widthInBytes (wordWidth dflags))) @@ -285,17 +280,15 @@ saveThreadState dflags = mkStore (cmmOffset dflags stgCurrentTSO (tso_CCCS dflags)) curCCS else mkNop -emitSaveThreadState :: BlockId -> FCode () -emitSaveThreadState bid = do +emitSaveThreadState :: FCode () +emitSaveThreadState = do dflags <- getDynFlags + emit (saveThreadState dflags) - -- CurrentTSO->stackobj->sp = Sp; - emitStore (cmmOffset dflags (CmmLoad (cmmOffset dflags stgCurrentTSO (tso_stackobj dflags)) (bWord dflags)) (stack_SP dflags)) - (CmmStackSlot (Young bid) (widthInBytes (typeWidth (gcWord dflags)))) - emit $ closeNursery dflags - -- and save the current cost centre stack in the TSO when profiling: - when (dopt Opt_SccProfilingOn dflags) $ - emitStore (cmmOffset dflags stgCurrentTSO (tso_CCCS dflags)) curCCS +emitCloseNursery :: FCode () +emitCloseNursery = do + df <- getDynFlags + emit (closeNursery df) -- CurrentNursery->free = Hp+1; closeNursery :: DynFlags -> CmmAGraph @@ -303,8 +296,6 @@ closeNursery dflags = mkStore (nursery_bdescr_free dflags) (cmmOffsetW dflags st loadThreadState :: DynFlags -> LocalReg -> LocalReg -> CmmAGraph loadThreadState dflags tso stack = do - -- tso <- newTemp (gcWord dflags) -- TODO FIXME NOW - -- stack <- newTemp (gcWord dflags) -- TODO FIXME NOW catAGraphs [ -- tso = CurrentTSO; mkAssign (CmmLocal tso) stgCurrentTSO, @@ -321,9 +312,18 @@ loadThreadState dflags tso stack = do storeCurCCS (CmmLoad (cmmOffset dflags (CmmReg (CmmLocal tso)) (tso_CCCS dflags)) (ccsType dflags)) else mkNop] -emitLoadThreadState :: LocalReg -> LocalReg -> FCode () -emitLoadThreadState tso stack = do dflags <- getDynFlags - emit $ loadThreadState dflags tso stack + +emitLoadThreadState :: FCode () +emitLoadThreadState = do + dflags <- getDynFlags + load_tso <- newTemp (gcWord dflags) + load_stack <- newTemp (gcWord dflags) + emit $ loadThreadState dflags load_tso load_stack + +emitOpenNursery :: FCode () +emitOpenNursery = do + df <- getDynFlags + emit (openNursery df) openNursery :: DynFlags -> CmmAGraph openNursery dflags = catAGraphs [ @@ -345,9 +345,6 @@ openNursery dflags = catAGraphs [ ) ) ] -emitOpenNursery :: FCode () -emitOpenNursery = do dflags <- getDynFlags - emit $ openNursery dflags nursery_bdescr_free, nursery_bdescr_start, nursery_bdescr_blocks :: DynFlags -> CmmExpr nursery_bdescr_free dflags = cmmOffset dflags stgCurrentNursery (oFFSET_bdescr_free dflags) diff --git a/compiler/codeGen/StgCmmGran.hs b/compiler/codeGen/StgCmmGran.hs index 2abca3fe16..fe00d7c384 100644 --- a/compiler/codeGen/StgCmmGran.hs +++ b/compiler/codeGen/StgCmmGran.hs @@ -53,7 +53,7 @@ staticGranHdr = [] doGranAllocate :: CmmExpr -> Code -- macro DO_GRAN_ALLOCATE doGranAllocate hp - | not opt_GranMacros = nopC + | not opt_GranMacros = return () | otherwise = panic "doGranAllocate" @@ -75,7 +75,7 @@ granFetchAndReschedule regs node_reqd = do { fetch ; reschedule liveness node_reqd } | otherwise - = nopC + = return () where liveness = mkRegLiveness regs 0 0 @@ -109,7 +109,7 @@ granYield :: [(Id,GlobalReg)] -- Live registers granYield regs node_reqd | opt_GranMacros && node_reqd = yield liveness - | otherwise = nopC + | otherwise = return () where liveness = mkRegLiveness regs 0 0 diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index b7cca48f5a..c133ab00d4 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -11,6 +11,8 @@ module StgCmmHeap ( getHpRelOffset, hpRel, entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo, + heapStackCheckGen, + entryHeapCheck', mkVirtHeapOffsets, mkVirtConstrOffsets, mkStaticClosureFields, mkStaticClosure, @@ -47,6 +49,7 @@ import FastString( mkFastString, fsLit ) import Util import Control.Monad (when) +import Data.Maybe (isJust) ----------------------------------------------------------- -- Initialise dynamic heap objects @@ -334,16 +337,28 @@ entryHeapCheck :: ClosureInfo -> FCode () entryHeapCheck cl_info nodeSet arity args code + = entryHeapCheck' is_fastf node arity args code + where + node = case nodeSet of + Just r -> CmmReg (CmmLocal r) + Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info) + + is_fastf = case closureFunInfo cl_info of + Just (_, ArgGen _) -> False + _otherwise -> True + +-- | lower-level version for CmmParse +entryHeapCheck' :: Bool -- is a known function pattern + -> CmmExpr -- expression for the closure pointer + -> Int -- Arity -- not same as len args b/c of voids + -> [LocalReg] -- Non-void args (empty for thunk) + -> FCode () + -> FCode () +entryHeapCheck' is_fastf node arity args code = do dflags <- getDynFlags let is_thunk = arity == 0 - is_fastf = case closureFunInfo cl_info of - Just (_, ArgGen _) -> False - _otherwise -> True args' = map (CmmReg . CmmLocal) args - node = case nodeSet of - Just r -> CmmReg (CmmLocal r) - Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info) stg_gc_fun = CmmReg (CmmGlobal GCFun) stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1) @@ -373,50 +388,6 @@ entryHeapCheck cl_info nodeSet arity args code emitLabel loop_id heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code -{- - -- This code is slightly outdated now and we could easily keep the above - -- GC methods. However, there may be some performance gains to be made by - -- using more specialised GC entry points. Since the semi generic GCFun - -- entry needs to check the node and figure out what registers to save... - -- if we provided and used more specialised GC entry points then these - -- runtime decisions could be turned into compile time decisions. - - args' = case fun of Just f -> f : args - Nothing -> args - arg_exprs = map (CmmReg . CmmLocal) args' - gc_call updfr_sz - | arity == 0 = mkJumpGC (CmmReg (CmmGlobal GCEnter1)) arg_exprs updfr_sz - | otherwise = - case gc_lbl args' of - Just _lbl -> panic "StgCmmHeap.entryHeapCheck: not finished" - -- mkJumpGC (CmmLit (CmmLabel (mkRtsCodeLabel lbl))) - -- arg_exprs updfr_sz - Nothing -> mkCall generic_gc (GC, GC) [] [] updfr_sz - - gc_lbl :: [LocalReg] -> Maybe FastString - gc_lbl [reg] - | isGcPtrType ty = Just (sLit "stg_gc_unpt_r1") -- "stg_gc_fun_1p" - | isFloatType ty = case width of - W32 -> Just (sLit "stg_gc_f1") - W64 -> Just (sLit "stg_gc_d1") - _other -> Nothing - | width == wordWidth dflags = Just (mkGcLabel "stg_gc_unbx_r1") - | width == W64 = Just (mkGcLabel "stg_gc_l1") - | otherwise = Nothing - where - ty = localRegType reg - width = typeWidth ty - - gc_lbl regs = gc_lbl_ptrs (map (isGcPtrType . localRegType) regs) - - gc_lbl_ptrs :: [Bool] -> Maybe FastString - -- JD: TEMPORARY -- UNTIL THESE FUNCTIONS EXIST... - --gc_lbl_ptrs [True,True] = Just (sLit "stg_gc_fun_2p") - --gc_lbl_ptrs [True,True,True] = Just (sLit "stg_gc_fun_3p") - gc_lbl_ptrs _ = Nothing --} - - -- ------------------------------------------------------------ -- A heap/stack check in a case alternative @@ -445,7 +416,7 @@ altOrNoEscapeHeapCheck checkYield regs code = do Nothing -> genericGC checkYield code Just gc -> do lret <- newLabelC - let (off, copyin) = copyInOflow dflags NativeReturn (Young lret) regs + let (off, copyin) = copyInOflow dflags NativeReturn (Young lret) regs [] lcont <- newLabelC emitOutOfLine lret (copyin <*> mkBranch lcont) emitLabel lcont @@ -475,23 +446,29 @@ cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code reg_exprs = map (CmmReg . CmmLocal) regs -- Note [stg_gc arguments] + -- NB. we use the NativeReturn convention for passing arguments + -- to the canned heap-check routines, because we are in a case + -- alternative and hence the [LocalReg] was passed to us in the + -- NativeReturn convention. gc_call dflags label sp - | cont_on_stack = mkJumpReturnsTo dflags label GC reg_exprs lret off sp - | otherwise = mkCallReturnsTo dflags label GC reg_exprs lret off sp (0,[]) + | cont_on_stack + = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp + | otherwise + = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp [] genericGC :: Bool -> FCode a -> FCode a genericGC checkYield code = do updfr_sz <- getUpdFrameOff lretry <- newLabelC emitLabel lretry - call <- mkCall generic_gc (GC, GC) [] [] updfr_sz (0,[]) + call <- mkCall generic_gc (GC, GC) [] [] updfr_sz [] heapCheck False checkYield (call <*> mkBranch lretry) code cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr cannedGCEntryPoint dflags regs - = case regs of + = case map localRegType regs of [] -> Just (mkGcLabel "stg_gc_noregs") - [reg] + [ty] | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1") | isFloatType ty -> case width of W32 -> Just (mkGcLabel "stg_gc_f1") @@ -502,8 +479,19 @@ cannedGCEntryPoint dflags regs | width == W64 -> Just (mkGcLabel "stg_gc_l1") | otherwise -> Nothing where - ty = localRegType reg width = typeWidth ty + [ty1,ty2] + | isGcPtrType ty1 + && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp") + [ty1,ty2,ty3] + | isGcPtrType ty1 + && isGcPtrType ty2 + && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp") + [ty1,ty2,ty3,ty4] + | isGcPtrType ty1 + && isGcPtrType ty2 + && isGcPtrType ty3 + && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp") _otherwise -> Nothing -- Note [stg_gc arguments] @@ -538,51 +526,70 @@ heapCheck checkStack checkYield do_gc code = getHeapUsage $ \ hpHw -> -- Emit heap checks, but be sure to do it lazily so -- that the conditionals on hpHw don't cause a black hole - do { codeOnly $ do_checks checkStack checkYield hpHw do_gc + do { dflags <- getDynFlags + ; let mb_alloc_bytes + | hpHw > 0 = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags))) + | otherwise = Nothing + stk_hwm | checkStack = Just (CmmLit CmmHighStackMark) + | otherwise = Nothing + ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc ; tickyAllocHeap hpHw ; doGranAllocate hpHw ; setRealHp hpHw ; code } -do_checks :: Bool -- Should we check the stack? +heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode () +heapStackCheckGen stk_hwm mb_bytes + = do updfr_sz <- getUpdFrameOff + lretry <- newLabelC + emitLabel lretry + call <- mkCall generic_gc (GC, GC) [] [] updfr_sz [] + do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry) + +do_checks :: Maybe CmmExpr -- Should we check the stack? -> Bool -- Should we check for preemption? - -> WordOff -- Heap headroom + -> Maybe CmmExpr -- Heap headroom (bytes) -> CmmAGraph -- What to do on failure -> FCode () -do_checks checkStack checkYield alloc do_gc = do +do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do dflags <- getDynFlags + gc_id <- newLabelC + let - alloc_lit = mkIntExpr dflags (alloc * wORD_SIZE dflags) -- Bytes - bump_hp = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit + Just alloc_lit = mb_alloc_lit + + bump_hp = cmmOffsetExprB dflags (CmmReg hpReg) alloc_lit -- Sp overflow if (Sp - CmmHighStack < SpLim) - sp_oflo = CmmMachOp (mo_wordULt dflags) + sp_oflo sp_hwm = + CmmMachOp (mo_wordULt dflags) [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg))) - [CmmReg spReg, CmmLit CmmHighStackMark], + [CmmReg spReg, sp_hwm], CmmReg spLimReg] -- Hp overflow if (Hp > HpLim) -- (Hp has been incremented by now) -- HpLim points to the LAST WORD of valid allocation space. hp_oflo = CmmMachOp (mo_wordUGt dflags) - [CmmReg hpReg, CmmReg (CmmGlobal HpLim)] - - -- Yielding if HpLim == 0 - yielding = CmmMachOp (mo_wordEq dflags) - [CmmReg (CmmGlobal HpLim), CmmLit (zeroCLit dflags)] + [CmmReg hpReg, CmmReg (CmmGlobal HpLim)] alloc_n = mkAssign (CmmGlobal HpAlloc) alloc_lit - gc_id <- newLabelC - when checkStack $ do - emit =<< mkCmmIfGoto sp_oflo gc_id + case mb_stk_hwm of + Nothing -> return () + Just stk_hwm -> emit =<< mkCmmIfGoto (sp_oflo stk_hwm) gc_id - if (alloc /= 0) + if (isJust mb_alloc_lit) then do - emitAssign hpReg bump_hp - emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id) + emitAssign hpReg bump_hp + emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id) else do - when (not (dopt Opt_OmitYields dflags) && checkYield) (emit =<< mkCmmIfGoto yielding gc_id) + when (not (dopt Opt_OmitYields dflags) && checkYield) $ do + -- Yielding if HpLim == 0 + let yielding = CmmMachOp (mo_wordEq dflags) + [CmmReg (CmmGlobal HpLim), + CmmLit (zeroCLit dflags)] + emit =<< mkCmmIfGoto yielding gc_id emitOutOfLine gc_id $ do_gc -- this is expected to jump back somewhere diff --git a/compiler/codeGen/StgCmmHpc.hs b/compiler/codeGen/StgCmmHpc.hs index cb60e9dd71..85f4c161ad 100644 --- a/compiler/codeGen/StgCmmHpc.hs +++ b/compiler/codeGen/StgCmmHpc.hs @@ -19,6 +19,8 @@ import StgCmmUtils import HscTypes import DynFlags +import Control.Monad + mkTickBox :: DynFlags -> Module -> Int -> CmmAGraph mkTickBox dflags mod n = mkStore tick_box (CmmMachOp (MO_Add W64) @@ -36,7 +38,7 @@ initHpc _ (NoHpcInfo {}) = return () initHpc this_mod (HpcInfo tickCount _hashNo) = do dflags <- getDynFlags - whenC (dopt Opt_Hpc dflags) $ + when (dopt Opt_Hpc dflags) $ do emitDataLits (mkHpcTicksLabel this_mod) [ (CmmInt 0 W64) | _ <- take tickCount [0 :: Int ..] diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 75d8d1c38f..4742332107 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -111,7 +111,7 @@ emitCall convs fun args -- emitCallWithExtraStack :: (Convention, Convention) -> CmmExpr -> [CmmExpr] - -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode ReturnKind + -> [CmmExpr] -> FCode ReturnKind emitCallWithExtraStack (callConv, retConv) fun args extra_stack = do { dflags <- getDynFlags ; adjustHpBackwards @@ -124,7 +124,7 @@ emitCallWithExtraStack (callConv, retConv) fun args extra_stack AssignTo res_regs _ -> do k <- newLabelC let area = Young k - (off, copyin) = copyInOflow dflags retConv area res_regs + (off, copyin) = copyInOflow dflags retConv area res_regs [] copyout = mkCallReturnsTo dflags fun callConv args k off updfr_off extra_stack emit (copyout <*> mkLabel k <*> copyin) @@ -222,7 +222,7 @@ direct_call caller call_conv lbl arity args emitCallWithExtraStack (call_conv, NativeReturn) target (nonVArgs fast_args) - (mkStkOffsets dflags (stack_args dflags)) + (nonVArgs (stack_args dflags)) where target = CmmLit (CmmLabel lbl) (fast_args, rest_args) = splitAt real_arity args @@ -326,32 +326,7 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0) ------------------------------------------------------------------------- --- Fix the byte-offsets of a bunch of things to push on the stack - --- This is used for pushing slow-call continuations. --- See Note [over-saturated calls]. - -mkStkOffsets - :: DynFlags - -> [(ArgRep, Maybe CmmExpr)] -- things to make offsets for - -> ( ByteOff -- OUTPUTS: Topmost allocated word - , [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out) -mkStkOffsets dflags things - = loop 0 [] (reverse things) - where - loop offset offs [] = (offset,offs) - loop offset offs ((_,Nothing):things) = loop offset offs things - -- ignore Void arguments - loop offset offs ((rep,Just thing):things) - = loop thing_off ((thing, thing_off):offs) things - where - thing_off = offset + argRepSizeW dflags rep * wORD_SIZE dflags - -- offset of thing is offset+size, because we're - -- growing the stack *downwards* as the offsets increase. - - -------------------------------------------------------------------------- --- Classifying arguments: ArgRep +-- Classifying arguments: ArgRep ------------------------------------------------------------------------- -- ArgRep is not exported (even abstractly) @@ -472,7 +447,7 @@ mkArgDescr _nm args let arg_bits = argBits dflags arg_reps arg_reps = filter isNonV (map idArgRep args) -- Getting rid of voids eases matching of standard patterns - case stdPattern dflags arg_reps of + case stdPattern arg_reps of Just spec_id -> return (ArgSpec spec_id) Nothing -> return (ArgGen arg_bits) @@ -483,10 +458,9 @@ argBits dflags (arg : args) = take (argRepSizeW dflags arg) (repeat True) ++ argBits dflags args ---------------------- -stdPattern :: DynFlags -> [ArgRep] -> Maybe StgHalfWord -stdPattern dflags reps - = fmap (toStgHalfWord dflags) - $ case reps of +stdPattern :: [ArgRep] -> Maybe Int +stdPattern reps + = case reps of [] -> Just ARG_NONE -- just void args, probably [N] -> Just ARG_N [P] -> Just ARG_P @@ -545,7 +519,7 @@ emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body ; let args' = if node_points then (node : arg_regs) else arg_regs conv = if nodeMustPointToIt dflags lf_info then NativeNodeCall else NativeDirectCall - (offset, _) = mkCallEntry dflags conv args' + (offset, _) = mkCallEntry dflags conv args' [] ; emitClosureAndInfoTable info_tbl conv args' $ body (offset, node, arg_regs) } diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index fb290d8e96..b7797bdae6 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -18,15 +18,16 @@ module StgCmmMonad ( FCode, -- type initC, runC, thenC, thenFC, listCs, - returnFC, nopC, whenC, + returnFC, fixC, newUnique, newUniqSupply, newLabelC, emitLabel, - emit, emitDecl, emitProc, emitProcWithConvention, emitSimpleProc, + emit, emitDecl, emitProc, + emitProcWithConvention, emitProcWithStackFrame, emitOutOfLine, emitAssign, emitStore, emitComment, - getCmm, cgStmtsToBlocks, + getCmm, aGraphToGraph, getCodeR, getCode, getHeapUsage, mkCmmIfThenElse, mkCmmIfThen, mkCmmIfGoto, @@ -89,7 +90,30 @@ infixr 9 `thenFC` -------------------------------------------------------- --- The FCode monad and its types +-- The FCode monad and its types +-- +-- FCode is the monad plumbed through the Stg->Cmm code generator, and +-- the Cmm parser. It contains the following things: +-- +-- - A writer monad, collecting: +-- - code for the current function, in the form of a CmmAGraph. +-- The function "emit" appends more code to this. +-- - the top-level CmmDecls accumulated so far +-- +-- - A state monad with: +-- - the local bindings in scope +-- - the current heap usage +-- - a UniqSupply +-- +-- - A reader monad, for CgInfoDownwards, containing +-- - DynFlags, +-- - the current Module +-- - the static top-level environmnet +-- - the update-frame offset +-- - the ticky counter label +-- - the Sequel (the continuation to return to) + + -------------------------------------------------------- newtype FCode a = FCode (CgInfoDownwards -> CgState -> (# a, CgState #)) @@ -120,13 +144,6 @@ thenC (FCode m) (FCode k) = FCode $ \info_down state -> case m info_down state of (# _,new_state #) -> k info_down new_state -nopC :: FCode () -nopC = return () - -whenC :: Bool -> FCode () -> FCode () -whenC True code = code -whenC False _code = nopC - listCs :: [FCode ()] -> FCode () listCs [] = return () listCs (fc:fcs) = do @@ -141,6 +158,15 @@ thenFC (FCode m) k = FCode $ case k m_result of FCode kcode -> kcode info_down new_state +fixC :: (a -> FCode a) -> FCode a +fixC fcode = FCode ( + \info_down state -> + let + (v,s) = doFCode (fcode v) info_down state + in + (# v, s #) + ) + -------------------------------------------------------- -- The code generator environment -------------------------------------------------------- @@ -478,7 +504,7 @@ getSequel = do { info <- getInfoDown -- Note: I'm including the size of the original return address -- in the size of the update frame -- hence the default case on `get'. -withUpdFrameOff :: UpdFrameOffset -> FCode () -> FCode () +withUpdFrameOff :: UpdFrameOffset -> FCode a -> FCode a withUpdFrameOff size code = do { info <- getInfoDown ; withInfoDown code (info {cgd_updfr_off = size }) } @@ -675,31 +701,60 @@ emitDecl decl emitOutOfLine :: BlockId -> CmmAGraph -> FCode () emitOutOfLine l stmts = emitCgStmt (CgFork l stmts) +emitProcWithStackFrame + :: Convention -- entry convention + -> Maybe CmmInfoTable -- info table? + -> CLabel -- label for the proc + -> [CmmFormal] -- stack frame + -> [CmmFormal] -- arguments + -> CmmAGraph -- code + -> Bool -- do stack layout? + -> FCode () + +emitProcWithStackFrame _conv mb_info lbl _stk_args [] blocks False + = do { dflags <- getDynFlags + ; emitProc_ mb_info lbl blocks (widthInBytes (wordWidth dflags)) False + } +emitProcWithStackFrame conv mb_info lbl stk_args args blocks True -- do layout + = do { dflags <- getDynFlags + ; let (offset, entry) = mkCallEntry dflags conv args stk_args + ; emitProc_ mb_info lbl (entry <*> blocks) offset True + } +emitProcWithStackFrame _ _ _ _ _ _ _ = panic "emitProcWithStackFrame" + emitProcWithConvention :: Convention -> Maybe CmmInfoTable -> CLabel - -> [CmmFormal] -> CmmAGraph -> FCode () + -> [CmmFormal] + -> CmmAGraph + -> FCode () emitProcWithConvention conv mb_info lbl args blocks + = emitProcWithStackFrame conv mb_info lbl [] args blocks True + +emitProc :: Maybe CmmInfoTable -> CLabel -> CmmAGraph -> Int -> FCode () +emitProc mb_info lbl blocks offset + = emitProc_ mb_info lbl blocks offset True + +emitProc_ :: Maybe CmmInfoTable -> CLabel -> CmmAGraph -> Int -> Bool + -> FCode () +emitProc_ mb_info lbl blocks offset do_layout = do { dflags <- getDynFlags - ; us <- newUniqSupply - ; let (offset, entry) = mkCallEntry dflags conv args - blks = initUs_ us $ lgraphOfAGraph $ entry <*> blocks - ; let sinfo = StackInfo {arg_space = offset, updfr_space = Just (initUpdFrameOff dflags)} - tinfo = TopInfo {info_tbls = infos, stack_info=sinfo} - proc_block = CmmProc tinfo lbl blks + ; l <- newLabelC + ; let + blks = labelAGraph l blocks - infos | Just info <- mb_info - = mapSingleton (g_entry blks) info - | otherwise - = mapEmpty + infos | Just info <- mb_info = mapSingleton (g_entry blks) info + | otherwise = mapEmpty - ; state <- getState - ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } } + sinfo = StackInfo { arg_space = offset + , updfr_space = Just (initUpdFrameOff dflags) + , do_layout = do_layout } + + tinfo = TopInfo { info_tbls = infos + , stack_info=sinfo} -emitProc :: Maybe CmmInfoTable -> CLabel -> [CmmFormal] -> CmmAGraph -> FCode () -emitProc = emitProcWithConvention NativeNodeCall + proc_block = CmmProc tinfo lbl blks -emitSimpleProc :: CLabel -> CmmAGraph -> FCode () -emitSimpleProc lbl code = - emitProc Nothing lbl [] code + ; state <- getState + ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } } getCmm :: FCode () -> FCode CmmGroup -- Get all the CmmTops (there should be no stmts) @@ -735,29 +790,25 @@ mkCmmIfThen e tbranch = do mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual] - -> UpdFrameOffset -> (ByteOff,[(CmmExpr,ByteOff)]) -> FCode CmmAGraph + -> UpdFrameOffset -> [CmmActual] -> FCode CmmAGraph mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do dflags <- getDynFlags k <- newLabelC let area = Young k - (off, copyin) = copyInOflow dflags retConv area results + (off, copyin) = copyInOflow dflags retConv area results [] copyout = mkCallReturnsTo dflags f callConv actuals k off updfr_off extra_stack return (copyout <*> mkLabel k <*> copyin) mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset -> FCode CmmAGraph mkCmmCall f results actuals updfr_off - = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off (0,[]) + = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off [] -- ---------------------------------------------------------------------------- --- CgStmts - --- These functions deal in terms of CgStmts, which is an abstract type --- representing the code in the current proc. +-- turn CmmAGraph into CmmGraph, for making a new proc. --- turn CgStmts into [CmmBasicBlock], for making a new proc. -cgStmtsToBlocks :: CmmAGraph -> FCode CmmGraph -cgStmtsToBlocks stmts - = do { us <- newUniqSupply - ; return (initUs_ us (lgraphOfAGraph stmts)) } +aGraphToGraph :: CmmAGraph -> FCode CmmGraph +aGraphToGraph stmts + = do { l <- newLabelC + ; return (labelAGraph l stmts) } diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index cbb2aa70bd..97104ce4a2 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -97,7 +97,7 @@ cgOpApp (StgPrimOp primop) args res_ty | primOpOutOfLine primop = do { cmm_args <- getNonVoidArgAmodes args ; let fun = CmmLit (CmmLabel (mkRtsPrimOpLabel primop)) - ; emitCall (PrimOpCall, PrimOpReturn) fun cmm_args } + ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args } | ReturnsPrim VoidRep <- result_info = do cgPrimOp [] primop args @@ -130,7 +130,7 @@ cgOpApp (StgPrimOp primop) args res_ty cgOpApp (StgPrimCallOp primcall) args _res_ty = do { cmm_args <- getNonVoidArgAmodes args ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall)) - ; emitCall (PrimOpCall, PrimOpReturn) fun cmm_args } + ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args } --------------------------------------------------- cgPrimOp :: [LocalReg] -- where to put the results diff --git a/compiler/codeGen/StgCmmProf.hs b/compiler/codeGen/StgCmmProf.hs index b666554403..1b218462e1 100644 --- a/compiler/codeGen/StgCmmProf.hs +++ b/compiler/codeGen/StgCmmProf.hs @@ -82,24 +82,22 @@ costCentreFrom :: DynFlags -> CmmExpr -- The cost centre from that closure costCentreFrom dflags cl = CmmLoad (cmmOffsetB dflags cl (oFFSET_StgHeader_ccs dflags)) (ccsType dflags) +-- | The profiling header words in a static closure staticProfHdr :: DynFlags -> CostCentreStack -> [CmmLit] --- The profiling header words in a static closure --- Was SET_STATIC_PROF_HDR staticProfHdr dflags ccs = ifProfilingL dflags [mkCCostCentreStack ccs, staticLdvInit dflags] +-- | Profiling header words in a dynamic closure dynProfHdr :: DynFlags -> CmmExpr -> [CmmExpr] --- Profiling header words in a dynamic closure dynProfHdr dflags ccs = ifProfilingL dflags [ccs, dynLdvInit dflags] -initUpdFrameProf :: ByteOff -> FCode () --- Initialise the profiling field of an update frame -initUpdFrameProf frame_off +-- | Initialise the profiling field of an update frame +initUpdFrameProf :: CmmExpr -> FCode () +initUpdFrameProf frame = ifProfiling $ -- frame->header.prof.ccs = CCCS do dflags <- getDynFlags - emitStore (CmmStackSlot Old (frame_off - oFFSET_StgHeader_ccs dflags)) - curCCS - -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) + emitStore (cmmOffset dflags frame (oFFSET_StgHeader_ccs dflags)) curCCS + -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) -- is unnecessary because it is not used anyhow. --------------------------------------------------------------------------- @@ -200,7 +198,7 @@ ifProfiling code = do dflags <- getDynFlags if dopt Opt_SccProfilingOn dflags then code - else nopC + else return () ifProfilingL :: DynFlags -> [a] -> [a] ifProfilingL dflags xs @@ -216,7 +214,7 @@ initCostCentres :: CollectedCCs -> FCode () -- Emit the declarations initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs) = do dflags <- getDynFlags - whenC (dopt Opt_SccProfilingOn dflags) $ + when (dopt Opt_SccProfilingOn dflags) $ do mapM_ emitCostCentreDecl local_CCs mapM_ emitCostCentreStackDecl singleton_CCSs @@ -283,7 +281,7 @@ emitSetCCC :: CostCentre -> Bool -> Bool -> FCode () emitSetCCC cc tick push = do dflags <- getDynFlags if not (dopt Opt_SccProfilingOn dflags) - then nopC + then return () else do tmp <- newTemp (ccsType dflags) -- TODO FIXME NOW pushCostCentre tmp curCCS cc when tick $ emit (bumpSccCount dflags (CmmReg (CmmLocal tmp))) @@ -321,7 +319,7 @@ dynLdvInit :: DynFlags -> CmmExpr dynLdvInit dflags = -- (era << LDV_SHIFT) | LDV_STATE_CREATE CmmMachOp (mo_wordOr dflags) [ CmmMachOp (mo_wordShl dflags) [loadEra dflags, mkIntExpr dflags (lDV_SHIFT dflags)], - CmmLit (mkWordCLit dflags (lDV_STATE_CREATE dflags)) + CmmLit (mkWordCLit dflags (iLDV_STATE_CREATE dflags)) ] -- @@ -350,8 +348,8 @@ ldvEnter cl_ptr = do let -- don't forget to substract node's tag ldv_wd = ldvWord dflags cl_ptr new_ldv_wd = cmmOrWord dflags (cmmAndWord dflags (CmmLoad ldv_wd (bWord dflags)) - (CmmLit (mkWordCLit dflags (lDV_CREATE_MASK dflags)))) - (cmmOrWord dflags (loadEra dflags) (CmmLit (mkWordCLit dflags (lDV_STATE_USE dflags)))) + (CmmLit (mkWordCLit dflags (iLDV_CREATE_MASK dflags)))) + (cmmOrWord dflags (loadEra dflags) (CmmLit (mkWordCLit dflags (iLDV_STATE_USE dflags)))) ifProfiling $ -- if (era > 0) { -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | @@ -371,10 +369,3 @@ ldvWord :: DynFlags -> CmmExpr -> CmmExpr ldvWord dflags closure_ptr = cmmOffsetB dflags closure_ptr (oFFSET_StgHeader_ldvw dflags) -lDV_CREATE_MASK :: DynFlags -> StgWord -lDV_CREATE_MASK dflags = toStgWord dflags (iLDV_CREATE_MASK dflags) -lDV_STATE_CREATE :: DynFlags -> StgWord -lDV_STATE_CREATE dflags = toStgWord dflags (iLDV_STATE_CREATE dflags) -lDV_STATE_USE :: DynFlags -> StgWord -lDV_STATE_USE dflags = toStgWord dflags (iLDV_STATE_USE dflags) - diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs index 79ad3ff822..01babb212f 100644 --- a/compiler/codeGen/StgCmmTicky.hs +++ b/compiler/codeGen/StgCmmTicky.hs @@ -333,7 +333,7 @@ tickyAllocHeap hp ifTicky :: FCode () -> FCode () ifTicky code = do dflags <- getDynFlags if dopt Opt_Ticky dflags then code - else nopC + else return () -- All the ticky-ticky counters are declared "unsigned long" in C bumpTickyCounter :: FastString -> FCode () diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index 386e7f46d6..138e00ee52 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -36,7 +36,6 @@ module StgCmmUtils ( addToMem, addToMemE, addToMemLbl, mkWordCLit, newStringCLit, newByteStringCLit, - packHalfWordsCLit, blankWord ) where @@ -196,9 +195,9 @@ emitRtsCallGen res pkg fun args safe call updfr_off = if safe then emit =<< mkCmmCall fun_expr res' args' updfr_off - else - emit $ mkUnsafeCall (ForeignTarget fun_expr - (ForeignConvention CCallConv arg_hints res_hints)) res' args' + else do + let conv = ForeignConvention CCallConv arg_hints res_hints CmmMayReturn + emit $ mkUnsafeCall (ForeignTarget fun_expr conv) res' args' (args', arg_hints) = unzip args (res', res_hints) = unzip res fun_expr = mkLblExpr (mkCmmCodeLabel pkg fun) diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 6d83150eb6..888ff1a0be 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -214,7 +214,6 @@ Library CgClosure CgCon CgExpr - CgExtCode CgForeignCall CgHeapery CgHpc @@ -244,6 +243,7 @@ Library StgCmmProf StgCmmTicky StgCmmUtils + StgCmmExtCode ClosureInfo SMRep CoreArity diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs index 448bd4d94c..207a237b7e 100644 --- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs +++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs @@ -274,7 +274,6 @@ genCall env target res args ret = do CCallConv -> CC_Ccc CApiConv -> CC_Ccc PrimCallConv -> CC_Ccc - CmmCallConv -> panic "CmmCallConv not supported here!" {- Some of the possibilities here are a worry with the use of a custom diff --git a/compiler/main/HscMain.hs b/compiler/main/HscMain.hs index 04f89bf63e..62a472037b 100644 --- a/compiler/main/HscMain.hs +++ b/compiler/main/HscMain.hs @@ -1349,7 +1349,11 @@ hscCompileCmmFile hsc_env filename = runHsc hsc_env $ do let dflags = hsc_dflags hsc_env cmm <- ioMsgMaybe $ parseCmmFile dflags filename liftIO $ do - rawCmms <- cmmToRawCmm dflags (Stream.yield cmm) + us <- mkSplitUniqSupply 'S' + let initTopSRT = initUs_ us emptySRT + dumpIfSet_dyn dflags Opt_D_dump_cmmz "Parsed Cmm" (ppr cmm) + (_, cmmgroup) <- cmmPipeline hsc_env initTopSRT cmm + rawCmms <- cmmToRawCmm dflags (Stream.yield (cmmOfZgraph cmmgroup)) _ <- codeOutput dflags no_mod no_loc NoStubs [] rawCmms return () where diff --git a/compiler/nativeGen/AsmCodeGen.lhs b/compiler/nativeGen/AsmCodeGen.lhs index 47fd96c426..d0e4a17746 100644 --- a/compiler/nativeGen/AsmCodeGen.lhs +++ b/compiler/nativeGen/AsmCodeGen.lhs @@ -971,12 +971,13 @@ cmmStmtConFold stmt cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr cmmExprConFold referenceKind expr = do dflags <- getDynFlags - -- Skip constant folding if new code generator is running - -- (this optimization is done in Hoopl) - -- SDM: re-enabled for now, while cmmRewriteAssignments is turned off - let expr' = if False -- dopt Opt_TryNewCodeGen dflags + + -- With -O1 and greater, the cmmSink pass does constant-folding, so + -- we don't need to do it again here. + let expr' = if optLevel dflags >= 1 then expr else cmmExprCon dflags expr + cmmExprNative referenceKind expr' cmmExprCon :: DynFlags -> CmmExpr -> CmmExpr diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs index a15bca07e7..3f1efe5824 100644 --- a/compiler/nativeGen/RegAlloc/Linear/Main.hs +++ b/compiler/nativeGen/RegAlloc/Linear/Main.hs @@ -434,10 +434,21 @@ raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live)) (uniqSetToList $ liveDieRead live) (uniqSetToList $ liveDieWrite live) - raInsn _ _ _ instr = pprPanic "raInsn" (text "no match for:" <> ppr instr) +-- ToDo: what can we do about +-- +-- R1 = x +-- jump I64[x] // [R1] +-- +-- where x is mapped to the same reg as R1. We want to coalesce x and +-- R1, but the register allocator doesn't know whether x will be +-- assigned to again later, in which case x and R1 should be in +-- different registers. Right now we assume the worst, and the +-- assignment to R1 will clobber x, so we'll spill x into another reg, +-- generating another reg->reg move. + isInReg :: Reg -> RegMap Loc -> Bool isInReg src assig | Just (InReg _) <- lookupUFM assig src = True diff --git a/compiler/prelude/ForeignCall.lhs b/compiler/prelude/ForeignCall.lhs index b3a2ad3ff1..b53ae7cf50 100644 --- a/compiler/prelude/ForeignCall.lhs +++ b/compiler/prelude/ForeignCall.lhs @@ -156,8 +156,7 @@ platforms. See: http://www.programmersheaven.com/2/Calling-conventions \begin{code} -data CCallConv = CCallConv | CApiConv | StdCallConv - | CmmCallConv | PrimCallConv +data CCallConv = CCallConv | CApiConv | StdCallConv | PrimCallConv deriving (Eq, Data, Typeable) {-! derive: Binary !-} @@ -165,7 +164,6 @@ instance Outputable CCallConv where ppr StdCallConv = ptext (sLit "stdcall") ppr CCallConv = ptext (sLit "ccall") ppr CApiConv = ptext (sLit "capi") - ppr CmmCallConv = ptext (sLit "C--") ppr PrimCallConv = ptext (sLit "prim") defaultCCallConv :: CCallConv @@ -175,7 +173,6 @@ ccallConvToInt :: CCallConv -> Int ccallConvToInt StdCallConv = 0 ccallConvToInt CCallConv = 1 ccallConvToInt CApiConv = panic "ccallConvToInt CApiConv" -ccallConvToInt (CmmCallConv {}) = panic "ccallConvToInt CmmCallConv" ccallConvToInt (PrimCallConv {}) = panic "ccallConvToInt PrimCallConv" \end{code} @@ -187,7 +184,6 @@ ccallConvAttribute :: CCallConv -> SDoc ccallConvAttribute StdCallConv = text "__attribute__((__stdcall__))" ccallConvAttribute CCallConv = empty ccallConvAttribute CApiConv = empty -ccallConvAttribute (CmmCallConv {}) = panic "ccallConvAttribute CmmCallConv" ccallConvAttribute (PrimCallConv {}) = panic "ccallConvAttribute PrimCallConv" \end{code} @@ -326,17 +322,14 @@ instance Binary CCallConv where putByte bh 1 put_ bh PrimCallConv = do putByte bh 2 - put_ bh CmmCallConv = do - putByte bh 3 put_ bh CApiConv = do - putByte bh 4 + putByte bh 3 get bh = do h <- getByte bh case h of 0 -> do return CCallConv 1 -> do return StdCallConv 2 -> do return PrimCallConv - 3 -> do return CmmCallConv _ -> do return CApiConv instance Binary CType where diff --git a/compiler/typecheck/TcForeign.lhs b/compiler/typecheck/TcForeign.lhs index 90a174081c..9c8b9807f2 100644 --- a/compiler/typecheck/TcForeign.lhs +++ b/compiler/typecheck/TcForeign.lhs @@ -464,7 +464,6 @@ checkCConv StdCallConv = do dflags <- getDynFlags return CCallConv checkCConv PrimCallConv = do addErrTc (text "The `prim' calling convention can only be used with `foreign import'") return PrimCallConv -checkCConv CmmCallConv = panic "checkCConv CmmCallConv" \end{code} Warnings diff --git a/includes/Cmm.h b/includes/Cmm.h index edcf46e7c0..afe08a26a3 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -9,36 +9,6 @@ * * For the syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y. * - * If you're used to the old HC file syntax, here's a quick cheat sheet - * for converting HC code: - * - * - Remove FB_/FE_ - * - Remove all type casts - * - Remove '&' - * - STGFUN(foo) { ... } ==> foo { ... } - * - FN_(foo) { ... } ==> foo { ... } - * - JMP_(e) ==> jump e; - * - Remove EXTFUN(foo) - * - Sp[n] ==> Sp(n) - * - Hp[n] ==> Hp(n) - * - Sp += n ==> Sp_adj(n) - * - Hp += n ==> Hp_adj(n) - * - R1.i ==> R1 (similarly for R1.w, R1.cl etc.) - * - You need to explicitly dereference variables; eg. - * alloc_blocks ==> CInt[alloc_blocks] - * - convert all word offsets into byte offsets: - * - e ==> WDS(e) - * - sizeofW(StgFoo) ==> SIZEOF_StgFoo - * - ENTRY_CODE(e) ==> %ENTRY_CODE(e) - * - get_itbl(c) ==> %GET_STD_INFO(c) - * - Change liveness masks in STK_CHK_GEN, HP_CHK_GEN: - * R1_PTR | R2_PTR ==> R1_PTR & R2_PTR - * (NOTE: | becomes &) - * - Declarations like 'StgPtr p;' become just 'W_ p;' - * - e->payload[n] ==> PAYLOAD(e,n) - * - Be very careful with comparisons: the infix versions (>, >=, etc.) - * are unsigned, so use %lt(a,b) to get signed less-than for example. - * * Accessing fields of structures defined in the RTS header files is * done via automatically-generated macros in DerivedConstants.h. For * example, where previously we used @@ -136,6 +106,8 @@ Misc useful stuff -------------------------------------------------------------------------- */ +#define ccall foreign "C" + #define NULL (0::W_) #define STRING(name,str) \ @@ -210,7 +182,7 @@ #define Sp(n) W_[Sp + WDS(n)] #define Hp(n) W_[Hp + WDS(n)] -#define Sp_adj(n) Sp = Sp + WDS(n) +#define Sp_adj(n) Sp = Sp + WDS(n) /* pronounced "spadge" */ #define Hp_adj(n) Hp = Hp + WDS(n) /* ----------------------------------------------------------------------------- @@ -278,25 +250,37 @@ #define LOAD_INFO \ info = %INFO_PTR(UNTAG(P1)); -#define UNTAG_R1 \ - P1 = UNTAG(P1); +#define MAYBE_UNTAG(x) UNTAG(x); #else -#define LOAD_INFO \ - if (GETTAG(P1) != 0) { \ - jump %ENTRY_CODE(Sp(0)); \ +#define LOAD_INFO(ret,x) \ + if (GETTAG(x) != 0) { \ + ret(x); \ } \ - info = %INFO_PTR(P1); + info = %INFO_PTR(x); -#define UNTAG_R1 /* nothing */ +#define MAYBE_UNTAG(x) (x) /* already untagged */ #endif -#define ENTER() \ +// We need two versions of ENTER(): +// - ENTER(x) takes the closure as an argument and uses return(), +// for use in civilized code where the stack is handled by GHC +// +// - ENTER_NOSTACK() where the closure is in R1, and returns are +// explicit jumps, for use when we are doing the stack management +// ourselves. + +#define ENTER(x) ENTER_(return,x) +#define ENTER_R1() ENTER_(RET_R1,R1) + +#define RET_R1(x) jump %ENTRY_CODE(Sp(0)) [R1] + +#define ENTER_(ret,x) \ again: \ W_ info; \ - LOAD_INFO \ + LOAD_INFO(ret,x) \ switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \ (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \ case \ @@ -304,7 +288,7 @@ IND_PERM, \ IND_STATIC: \ { \ - P1 = StgInd_indirectee(P1); \ + x = StgInd_indirectee(x); \ goto again; \ } \ case \ @@ -318,12 +302,12 @@ BCO, \ PAP: \ { \ - jump %ENTRY_CODE(Sp(0)); \ + ret(x); \ } \ default: \ { \ - UNTAG_R1 \ - jump %ENTRY_CODE(info); \ + x = MAYBE_UNTAG(x); \ + jump %ENTRY_CODE(info) (x); \ } \ } @@ -348,7 +332,6 @@ */ #include "stg/RtsMachRegs.h" -#include "rts/storage/Liveness.h" #include "rts/prof/LDV.h" #undef BLOCK_SIZE @@ -359,6 +342,18 @@ #define MyCapability() (BaseReg - OFFSET_Capability_r) /* ------------------------------------------------------------------------- + Info tables + ------------------------------------------------------------------------- */ + +#if defined(PROFILING) +#define PROF_HDR_FIELDS(w_) \ + w_ prof_hdr_1, \ + w_ prof_hdr_2, +#else +#define PROF_HDR_FIELDS(w_) /* nothing */ +#endif + +/* ------------------------------------------------------------------------- Allocation and garbage collection ------------------------------------------------------------------------- */ @@ -371,30 +366,134 @@ * ticky-ticky. It's not clear whether eg. the size field of an array * should be counted as "admin", or the various fields of a BCO. */ -#define ALLOC_PRIM(bytes,liveness,reentry) \ - HP_CHK_GEN_TICKY(bytes,liveness,reentry); \ +#define ALLOC_PRIM(bytes) \ + HP_CHK_GEN_TICKY(bytes); \ TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \ CCCS_ALLOC(bytes); +#define HEAP_CHECK(bytes,failure) \ + Hp = Hp + bytes; \ + if (Hp > HpLim) { HpAlloc = bytes; failure; } \ + TICK_ALLOC_HEAP_NOCTR(bytes); + +#define ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,failure) \ + HEAP_CHECK(bytes,failure) \ + TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \ + CCCS_ALLOC(bytes); + +#define ALLOC_PRIM_P(bytes,fun,arg) \ + ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_P(fun,arg)); + +#define ALLOC_PRIM_N(bytes,fun,arg) \ + ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_N(fun,arg)); + /* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */ #define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS) -#define HP_CHK_GEN_TICKY(alloc,liveness,reentry) \ - HP_CHK_GEN(alloc,liveness,reentry); \ +#define HP_CHK_GEN_TICKY(alloc) \ + HP_CHK_GEN(alloc); \ TICK_ALLOC_HEAP_NOCTR(alloc); +#define HP_CHK_P(bytes, fun, arg) \ + HEAP_CHECK(bytes, GC_PRIM_P(fun,arg)) + +#define ALLOC_P_TICKY(alloc, fun, arg) \ + HP_CHK_P(alloc); \ + TICK_ALLOC_HEAP_NOCTR(alloc); + +#define CHECK_GC() \ + (bdescr_link(CurrentNursery) == NULL || \ + generation_n_new_large_words(W_[g0]) >= TO_W_(CLong[large_alloc_lim])) + // allocate() allocates from the nursery, so we check to see // whether the nursery is nearly empty in any function that uses // allocate() - this includes many of the primops. -#define MAYBE_GC(liveness,reentry) \ - if (bdescr_link(CurrentNursery) == NULL || \ - generation_n_new_large_words(W_[g0]) >= TO_W_(CLong[large_alloc_lim])) { \ - R9 = liveness; \ - R10 = reentry; \ - HpAlloc = 0; \ - jump stg_gc_gen_hp; \ +// +// HACK alert: the __L__ stuff is here to coax the common-block +// eliminator into commoning up the call stg_gc_noregs() with the same +// code that gets generated by a STK_CHK_GEN() in the same proc. We +// also need an if (0) { goto __L__; } so that the __L__ label isn't +// optimised away by the control-flow optimiser prior to common-block +// elimination (it will be optimised away later). +// +// This saves some code in gmp-wrappers.cmm where we have lots of +// MAYBE_GC() in the same proc as STK_CHK_GEN(). +// +#define MAYBE_GC(retry) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + goto __L__; \ + __L__: \ + call stg_gc_noregs(); \ + goto retry; \ + } \ + if (0) { goto __L__; } + +#define GC_PRIM(fun) \ + R9 = fun; \ + jump stg_gc_prim(); + +#define GC_PRIM_N(fun,arg) \ + R9 = fun; \ + jump stg_gc_prim_n(arg); + +#define GC_PRIM_P(fun,arg) \ + R9 = fun; \ + jump stg_gc_prim_p(arg); + +#define GC_PRIM_PP(fun,arg1,arg2) \ + R9 = fun; \ + jump stg_gc_prim_pp(arg1,arg2); + +#define MAYBE_GC_(fun) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + GC_PRIM(fun) \ + } + +#define MAYBE_GC_N(fun,arg) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + GC_PRIM_N(fun,arg) \ + } + +#define MAYBE_GC_P(fun,arg) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + GC_PRIM_P(fun,arg) \ } +#define MAYBE_GC_PP(fun,arg1,arg2) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + GC_PRIM_PP(fun,arg1,arg2) \ + } + +#define STK_CHK(n, fun) \ + if (Sp - n < SpLim) { \ + GC_PRIM(fun) \ + } + +#define STK_CHK_P(n, fun, arg) \ + if (Sp - n < SpLim) { \ + GC_PRIM_P(fun,arg) \ + } + +#define STK_CHK_PP(n, fun, arg1, arg2) \ + if (Sp - n < SpLim) { \ + GC_PRIM_PP(fun,arg1,arg2) \ + } + +#define STK_CHK_ENTER(n, closure) \ + if (Sp - n < SpLim) { \ + jump __stg_gc_enter_1(closure); \ + } + +// A funky heap check used by AutoApply.cmm + +#define HP_CHK_NP_ASSIGN_SP0(size,f) \ + HEAP_CHECK(size, Sp(0) = f; jump __stg_gc_enter_1 [R1];) + /* ----------------------------------------------------------------------------- Closure headers -------------------------------------------------------------------------- */ @@ -481,23 +580,6 @@ #endif /* ----------------------------------------------------------------------------- - Voluntary Yields/Blocks - - We only have a generic version of this at the moment - if it turns - out to be slowing us down we can make specialised ones. - -------------------------------------------------------------------------- */ - -#define YIELD(liveness,reentry) \ - R9 = liveness; \ - R10 = reentry; \ - jump stg_gen_yield; - -#define BLOCK(liveness,reentry) \ - R9 = liveness; \ - R10 = reentry; \ - jump stg_gen_block; - -/* ----------------------------------------------------------------------------- Ticky macros -------------------------------------------------------------------------- */ @@ -585,6 +667,63 @@ TICK_BUMP_BY(ALLOC_HEAP_tot,n) /* ----------------------------------------------------------------------------- + Saving and restoring STG registers + + STG registers must be saved around a C call, just in case the STG + register is mapped to a caller-saves machine register. Normally we + don't need to worry about this the code generator has already + loaded any live STG registers into variables for us, but in + hand-written low-level Cmm code where we don't know which registers + are live, we might have to save them all. + -------------------------------------------------------------------------- */ + +#define SAVE_STGREGS \ + W_ r1, r2, r3, r4, r5, r6, r7, r8; \ + F_ f1, f2, f3, f4; \ + D_ d1, d2; \ + L_ l1; \ + \ + r1 = R1; \ + r2 = R2; \ + r3 = R3; \ + r4 = R4; \ + r5 = R5; \ + r6 = R6; \ + r7 = R7; \ + r8 = R8; \ + \ + f1 = F1; \ + f2 = F2; \ + f3 = F3; \ + f4 = F4; \ + \ + d1 = D1; \ + d2 = D2; \ + \ + l1 = L1; + + +#define RESTORE_STGREGS \ + R1 = r1; \ + R2 = r2; \ + R3 = r3; \ + R4 = r4; \ + R5 = r5; \ + R6 = r6; \ + R7 = r7; \ + R8 = r8; \ + \ + F1 = f1; \ + F2 = f2; \ + F3 = f3; \ + F4 = f4; \ + \ + D1 = d1; \ + D2 = d2; \ + \ + L1 = l1; + +/* ----------------------------------------------------------------------------- Misc junk -------------------------------------------------------------------------- */ @@ -592,14 +731,14 @@ #define END_TSO_QUEUE stg_END_TSO_QUEUE_closure #define END_INVARIANT_CHECK_QUEUE stg_END_INVARIANT_CHECK_QUEUE_closure -#define recordMutableCap(p, gen, regs) \ +#define recordMutableCap(p, gen) \ W_ __bd; \ W_ mut_list; \ mut_list = Capability_mut_lists(MyCapability()) + WDS(gen); \ __bd = W_[mut_list]; \ if (bdescr_free(__bd) >= bdescr_start(__bd) + BLOCK_SIZE) { \ W_ __new_bd; \ - ("ptr" __new_bd) = foreign "C" allocBlock_lock() [regs]; \ + ("ptr" __new_bd) = foreign "C" allocBlock_lock(); \ bdescr_link(__new_bd) = __bd; \ __bd = __new_bd; \ W_[mut_list] = __bd; \ @@ -609,13 +748,13 @@ W_[free] = p; \ bdescr_free(__bd) = free + WDS(1); -#define recordMutable(p, regs) \ +#define recordMutable(p) \ P_ __p; \ W_ __bd; \ W_ __gen; \ __p = p; \ __bd = Bdescr(__p); \ __gen = TO_W_(bdescr_gen_no(__bd)); \ - if (__gen > 0) { recordMutableCap(__p, __gen, regs); } + if (__gen > 0) { recordMutableCap(__p, __gen); } #endif /* CMM_H */ diff --git a/includes/Rts.h b/includes/Rts.h index c52fe63d78..b31776828f 100644 --- a/includes/Rts.h +++ b/includes/Rts.h @@ -208,7 +208,6 @@ INLINE_HEADER Time fsecondsToTime (double t) #include "rts/storage/FunTypes.h" #include "rts/storage/InfoTables.h" #include "rts/storage/Closures.h" -#include "rts/storage/Liveness.h" #include "rts/storage/ClosureTypes.h" #include "rts/storage/TSO.h" #include "stg/MiscClosures.h" /* InfoTables, closures etc. defined in the RTS */ diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h index cd741be7e0..2fab041c22 100644 --- a/includes/rts/Constants.h +++ b/includes/rts/Constants.h @@ -118,11 +118,6 @@ pushed in one of the heap check fragments in HeapStackCheck.hc (ie. currently the generic heap checks - 3 words for StgRetDyn, 18 words for the saved registers, see StgMacros.h). - - In the event of an unboxed tuple or let-no-escape stack/heap check - failure, there will be other words on the stack which are covered - by the RET_DYN frame. These will have been accounted for by stack - checks however, so we don't need to allow for them here. -------------------------------------------------------------------------- */ #define RESERVED_STACK_WORDS 21 @@ -277,25 +272,6 @@ */ #define TSO_SQUEEZED 128 -/* ----------------------------------------------------------------------------- - RET_DYN stack frames - -------------------------------------------------------------------------- */ - -/* VERY MAGIC CONSTANTS! - * must agree with code in HeapStackCheck.c, stg_gen_chk, and - * RESERVED_STACK_WORDS in Constants.h. - */ -#define RET_DYN_BITMAP_SIZE 8 -#define RET_DYN_NONPTR_REGS_SIZE 10 - -/* Sanity check that RESERVED_STACK_WORDS is reasonable. We can't - * just derive RESERVED_STACK_WORDS because it's used in Haskell code - * too. - */ -#if RESERVED_STACK_WORDS != (3 + RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE) -#error RESERVED_STACK_WORDS may be wrong! -#endif - /* * The number of times we spin in a spin lock before yielding (see * #3758). To tune this value, use the benchmark in #3758: run the diff --git a/includes/rts/storage/ClosureMacros.h b/includes/rts/storage/ClosureMacros.h index 6fdd55727a..dd5f428135 100644 --- a/includes/rts/storage/ClosureMacros.h +++ b/includes/rts/storage/ClosureMacros.h @@ -410,14 +410,6 @@ EXTERN_INLINE StgWord stack_frame_sizeW( StgClosure *frame ) info = get_ret_itbl(frame); switch (info->i.type) { - case RET_DYN: - { - StgRetDyn *dyn = (StgRetDyn *)frame; - return sizeofW(StgRetDyn) + RET_DYN_BITMAP_SIZE + - RET_DYN_NONPTR_REGS_SIZE + - RET_DYN_PTRS(dyn->liveness) + RET_DYN_NONPTRS(dyn->liveness); - } - case RET_FUN: return sizeofW(StgRetFun) + ((StgRetFun *)frame)->size; diff --git a/includes/rts/storage/ClosureTypes.h b/includes/rts/storage/ClosureTypes.h index 75ec08bf18..4e3b1e6a72 100644 --- a/includes/rts/storage/ClosureTypes.h +++ b/includes/rts/storage/ClosureTypes.h @@ -52,33 +52,32 @@ #define RET_BCO 31 #define RET_SMALL 32 #define RET_BIG 33 -#define RET_DYN 34 -#define RET_FUN 35 -#define UPDATE_FRAME 36 -#define CATCH_FRAME 37 -#define UNDERFLOW_FRAME 38 -#define STOP_FRAME 39 -#define BLOCKING_QUEUE 40 -#define BLACKHOLE 41 -#define MVAR_CLEAN 42 -#define MVAR_DIRTY 43 -#define ARR_WORDS 44 -#define MUT_ARR_PTRS_CLEAN 45 -#define MUT_ARR_PTRS_DIRTY 46 -#define MUT_ARR_PTRS_FROZEN0 47 -#define MUT_ARR_PTRS_FROZEN 48 -#define MUT_VAR_CLEAN 49 -#define MUT_VAR_DIRTY 50 -#define WEAK 51 -#define PRIM 52 -#define MUT_PRIM 53 -#define TSO 54 -#define STACK 55 -#define TREC_CHUNK 56 -#define ATOMICALLY_FRAME 57 -#define CATCH_RETRY_FRAME 58 -#define CATCH_STM_FRAME 59 -#define WHITEHOLE 60 -#define N_CLOSURE_TYPES 61 +#define RET_FUN 34 +#define UPDATE_FRAME 35 +#define CATCH_FRAME 36 +#define UNDERFLOW_FRAME 37 +#define STOP_FRAME 38 +#define BLOCKING_QUEUE 39 +#define BLACKHOLE 40 +#define MVAR_CLEAN 41 +#define MVAR_DIRTY 42 +#define ARR_WORDS 43 +#define MUT_ARR_PTRS_CLEAN 44 +#define MUT_ARR_PTRS_DIRTY 45 +#define MUT_ARR_PTRS_FROZEN0 46 +#define MUT_ARR_PTRS_FROZEN 47 +#define MUT_VAR_CLEAN 48 +#define MUT_VAR_DIRTY 49 +#define WEAK 50 +#define PRIM 51 +#define MUT_PRIM 52 +#define TSO 53 +#define STACK 54 +#define TREC_CHUNK 55 +#define ATOMICALLY_FRAME 56 +#define CATCH_RETRY_FRAME 57 +#define CATCH_STM_FRAME 58 +#define WHITEHOLE 59 +#define N_CLOSURE_TYPES 60 #endif /* RTS_STORAGE_CLOSURETYPES_H */ diff --git a/includes/rts/storage/Closures.h b/includes/rts/storage/Closures.h index 5f4f03541f..fcba1ebeb6 100644 --- a/includes/rts/storage/Closures.h +++ b/includes/rts/storage/Closures.h @@ -240,60 +240,6 @@ typedef struct { #define BCO_BITMAP_SIZEW(bco) ((BCO_BITMAP_SIZE(bco) + BITS_IN(StgWord) - 1) \ / BITS_IN(StgWord)) -/* ----------------------------------------------------------------------------- - Dynamic stack frames for generic heap checks. - - These generic heap checks are slow, but have the advantage of being - usable in a variety of situations. - - The one restriction is that any relevant SRTs must already be pointed - to from the stack. The return address doesn't need to have an info - table attached: hence it can be any old code pointer. - - The liveness mask contains a 1 at bit n, if register Rn contains a - non-pointer. The contents of all 8 vanilla registers are always saved - on the stack; the liveness mask tells the GC which ones contain - pointers. - - Good places to use a generic heap check: - - - case alternatives (the return address with an SRT is already - on the stack). - - - primitives (no SRT required). - - The stack frame layout for a RET_DYN is like this: - - some pointers |-- RET_DYN_PTRS(liveness) words - some nonpointers |-- RET_DYN_NONPTRS(liveness) words - - L1 \ - D1-2 |-- RET_DYN_NONPTR_REGS_SIZE words - F1-4 / - - R1-8 |-- RET_DYN_BITMAP_SIZE words - - return address \ - liveness mask |-- StgRetDyn structure - stg_gen_chk_info / - - we assume that the size of a double is always 2 pointers (wasting a - word when it is only one pointer, but avoiding lots of #ifdefs). - - See Liveness.h for the macros (RET_DYN_PTRS() etc.). - - NOTE: if you change the layout of RET_DYN stack frames, then you - might also need to adjust the value of RESERVED_STACK_WORDS in - Constants.h. - -------------------------------------------------------------------------- */ - -typedef struct { - const StgInfoTable* info; - StgWord liveness; - StgWord ret_addr; - StgClosure * payload[FLEXIBLE_ARRAY]; -} StgRetDyn; - /* A function return stack frame: used when saving the state for a * garbage collection at a function entry point. The function * arguments are on the stack, and we also save the function (its @@ -430,7 +376,7 @@ typedef struct { typedef struct { StgHeader header; - StgBool running_alt_code; + StgWord running_alt_code; StgClosure *first_code; StgClosure *alt_code; } StgCatchRetryFrame; diff --git a/includes/rts/storage/Liveness.h b/includes/rts/storage/Liveness.h deleted file mode 100644 index 66c82f3134..0000000000 --- a/includes/rts/storage/Liveness.h +++ /dev/null @@ -1,34 +0,0 @@ -/* ----------------------------------------------------------------------------- - * - * (c) The University of Glasgow 2004 - * - * Building liveness masks for RET_DYN stack frames. - * A few macros that are used in both .cmm and .c sources. - * - * A liveness mask is constructed like so: - * - * R1_PTR & R2_PTR & R3_PTR - * - * -------------------------------------------------------------------------- */ - -#ifndef RTS_STORAGE_LIVENESS_H -#define RTS_STORAGE_LIVENESS_H - -#define NO_PTRS 0xff -#define R1_PTR (NO_PTRS ^ (1<<0)) -#define R2_PTR (NO_PTRS ^ (1<<1)) -#define R3_PTR (NO_PTRS ^ (1<<2)) -#define R4_PTR (NO_PTRS ^ (1<<3)) -#define R5_PTR (NO_PTRS ^ (1<<4)) -#define R6_PTR (NO_PTRS ^ (1<<5)) -#define R7_PTR (NO_PTRS ^ (1<<6)) -#define R8_PTR (NO_PTRS ^ (1<<7)) - -#define N_NONPTRS(n) ((n)<<16) -#define N_PTRS(n) ((n)<<24) - -#define RET_DYN_NONPTRS(l) ((l)>>16 & 0xff) -#define RET_DYN_PTRS(l) ((l)>>24 & 0xff) -#define RET_DYN_LIVENESS(l) ((l) & 0xffff) - -#endif /* RTS_STORAGE_LIVENESS_H */ diff --git a/includes/rts/storage/SMPClosureOps.h b/includes/rts/storage/SMPClosureOps.h index 8dee7cbcf9..cd6a789af4 100644 --- a/includes/rts/storage/SMPClosureOps.h +++ b/includes/rts/storage/SMPClosureOps.h @@ -12,7 +12,7 @@ #ifdef CMINUSMINUS #define unlockClosure(ptr,info) \ - prim %write_barrier() []; \ + prim %write_barrier(); \ StgHeader_info(ptr) = info; #else diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index c93cc319c0..b7b24a8632 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -169,23 +169,6 @@ RTS_RET(stg_noforceIO); /* standard selector thunks */ -RTS_RET(stg_sel_ret_0_upd); -RTS_RET(stg_sel_ret_1_upd); -RTS_RET(stg_sel_ret_2_upd); -RTS_RET(stg_sel_ret_3_upd); -RTS_RET(stg_sel_ret_4_upd); -RTS_RET(stg_sel_ret_5_upd); -RTS_RET(stg_sel_ret_6_upd); -RTS_RET(stg_sel_ret_7_upd); -RTS_RET(stg_sel_ret_8_upd); -RTS_RET(stg_sel_ret_9_upd); -RTS_RET(stg_sel_ret_10_upd); -RTS_RET(stg_sel_ret_11_upd); -RTS_RET(stg_sel_ret_12_upd); -RTS_RET(stg_sel_ret_13_upd); -RTS_RET(stg_sel_ret_14_upd); -RTS_RET(stg_sel_ret_15_upd); - RTS_ENTRY(stg_sel_0_upd); RTS_ENTRY(stg_sel_1_upd); RTS_ENTRY(stg_sel_2_upd); @@ -267,45 +250,39 @@ RTS_FUN_DECL(stg_PAP_apply); /* standard GC & stack check entry points, all defined in HeapStackCheck.hc */ -RTS_RET(stg_enter); +RTS_FUN_DECL(stg_gc_noregs); + RTS_RET(stg_enter_checkbh); -RTS_RET(stg_gc_void); +RTS_RET(stg_ret_v); +RTS_RET(stg_ret_p); +RTS_RET(stg_ret_n); +RTS_RET(stg_ret_f); +RTS_RET(stg_ret_d); +RTS_RET(stg_ret_l); +RTS_FUN_DECL(stg_gc_prim_p); +RTS_FUN_DECL(stg_gc_prim_pp); +RTS_FUN_DECL(stg_gc_prim_n); + +RTS_RET(stg_enter); RTS_FUN_DECL(__stg_gc_enter_1); -RTS_FUN_DECL(stg_gc_noregs); -RTS_RET(stg_gc_unpt_r1); RTS_FUN_DECL(stg_gc_unpt_r1); - -RTS_RET(stg_gc_unbx_r1); RTS_FUN_DECL(stg_gc_unbx_r1); - -RTS_RET(stg_gc_f1); RTS_FUN_DECL(stg_gc_f1); - -RTS_RET(stg_gc_d1); RTS_FUN_DECL(stg_gc_d1); - -RTS_RET(stg_gc_l1); RTS_FUN_DECL(stg_gc_l1); +RTS_FUN_DECL(stg_gc_pp); +RTS_FUN_DECL(stg_gc_ppp); +RTS_FUN_DECL(stg_gc_pppp); RTS_RET(stg_gc_fun); RTS_FUN_DECL(__stg_gc_fun); -RTS_RET(stg_gc_gen); -RTS_FUN_DECL(stg_gc_gen); - -RTS_RET(stg_ut_1_0_unreg); - -RTS_FUN_DECL(stg_gc_gen_hp); -RTS_FUN_DECL(stg_gc_ut); -RTS_FUN_DECL(stg_gen_yield); RTS_FUN_DECL(stg_yield_noregs); RTS_FUN_DECL(stg_yield_to_interpreter); -RTS_FUN_DECL(stg_gen_block); RTS_FUN_DECL(stg_block_noregs); -RTS_FUN_DECL(stg_block_1); RTS_FUN_DECL(stg_block_blackhole); RTS_FUN_DECL(stg_block_blackhole_finally); RTS_FUN_DECL(stg_block_takemvar); diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h index bf17b7e825..70e93d3234 100644 --- a/includes/stg/Regs.h +++ b/includes/stg/Regs.h @@ -93,10 +93,10 @@ typedef struct { /* * Registers Hp and HpLim are global across the entire system, and are - * copied into the RegTable before executing a thread. + * copied into the RegTable or registers before executing a thread. * - * Registers Sp and SpLim are saved in the TSO for the - * thread, but are copied into the RegTable before executing a thread. + * Registers Sp and SpLim are saved in the TSO for the thread, but are + * copied into the RegTable or registers before executing a thread. * * All other registers are "general purpose", and are used for passing * arguments to functions, and returning values. The code generator @@ -116,45 +116,6 @@ typedef struct { * (pseudo-)registers in those cases. */ -/* - * Locations for saving per-thread registers. - */ - -#define SAVE_Sp (CurrentTSO->sp) -#define SAVE_SpLim (CurrentTSO->splim) - -#define SAVE_Hp (BaseReg->rHp) - -#define SAVE_CurrentTSO (BaseReg->rCurrentTSO) -#define SAVE_CurrentNursery (BaseReg->rCurrentNursery) -#define SAVE_HpAlloc (BaseReg->rHpAlloc) - -/* We sometimes need to save registers across a C-call, eg. if they - * are clobbered in the standard calling convention. We define the - * save locations for all registers in the register table. - */ - -#define SAVE_R1 (BaseReg->rR1) -#define SAVE_R2 (BaseReg->rR2) -#define SAVE_R3 (BaseReg->rR3) -#define SAVE_R4 (BaseReg->rR4) -#define SAVE_R5 (BaseReg->rR5) -#define SAVE_R6 (BaseReg->rR6) -#define SAVE_R7 (BaseReg->rR7) -#define SAVE_R8 (BaseReg->rR8) -#define SAVE_R9 (BaseReg->rR9) -#define SAVE_R10 (BaseReg->rR10) - -#define SAVE_F1 (BaseReg->rF1) -#define SAVE_F2 (BaseReg->rF2) -#define SAVE_F3 (BaseReg->rF3) -#define SAVE_F4 (BaseReg->rF4) - -#define SAVE_D1 (BaseReg->rD1) -#define SAVE_D2 (BaseReg->rD2) - -#define SAVE_L1 (BaseReg->rL1) - /* ----------------------------------------------------------------------------- * Emit the GCC-specific register declarations for each machine * register being used. If any STG register isn't mapped to a machine @@ -163,11 +124,6 @@ typedef struct { * First, the general purpose registers. The idea is, if a particular * general-purpose STG register can't be mapped to a real machine * register, it won't be used at all. Instead, we'll use the stack. - * - * This is an improvement on the way things used to be done, when all - * registers were mapped to locations in the register table, and stuff - * was being shifted from the stack to the register table and back - * again for no good reason (on register-poor architectures). */ /* define NO_REGS to omit register declarations - used in RTS C code @@ -402,287 +358,6 @@ GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc) #define stg_gc_enter_1 (FunReg->stgGCEnter1) #define stg_gc_fun (FunReg->stgGCFun) -/* ----------------------------------------------------------------------------- - For any registers which are denoted "caller-saves" by the C calling - convention, we have to emit code to save and restore them across C - calls. - -------------------------------------------------------------------------- */ - -#ifdef CALLER_SAVES_R1 -#define CALLER_SAVE_R1 SAVE_R1 = R1; -#define CALLER_RESTORE_R1 R1 = SAVE_R1; -#else -#define CALLER_SAVE_R1 /* nothing */ -#define CALLER_RESTORE_R1 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R2 -#define CALLER_SAVE_R2 SAVE_R2 = R2; -#define CALLER_RESTORE_R2 R2 = SAVE_R2; -#else -#define CALLER_SAVE_R2 /* nothing */ -#define CALLER_RESTORE_R2 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R3 -#define CALLER_SAVE_R3 SAVE_R3 = R3; -#define CALLER_RESTORE_R3 R3 = SAVE_R3; -#else -#define CALLER_SAVE_R3 /* nothing */ -#define CALLER_RESTORE_R3 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R4 -#define CALLER_SAVE_R4 SAVE_R4 = R4; -#define CALLER_RESTORE_R4 R4 = SAVE_R4; -#else -#define CALLER_SAVE_R4 /* nothing */ -#define CALLER_RESTORE_R4 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R5 -#define CALLER_SAVE_R5 SAVE_R5 = R5; -#define CALLER_RESTORE_R5 R5 = SAVE_R5; -#else -#define CALLER_SAVE_R5 /* nothing */ -#define CALLER_RESTORE_R5 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R6 -#define CALLER_SAVE_R6 SAVE_R6 = R6; -#define CALLER_RESTORE_R6 R6 = SAVE_R6; -#else -#define CALLER_SAVE_R6 /* nothing */ -#define CALLER_RESTORE_R6 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R7 -#define CALLER_SAVE_R7 SAVE_R7 = R7; -#define CALLER_RESTORE_R7 R7 = SAVE_R7; -#else -#define CALLER_SAVE_R7 /* nothing */ -#define CALLER_RESTORE_R7 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R8 -#define CALLER_SAVE_R8 SAVE_R8 = R8; -#define CALLER_RESTORE_R8 R8 = SAVE_R8; -#else -#define CALLER_SAVE_R8 /* nothing */ -#define CALLER_RESTORE_R8 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R9 -#define CALLER_SAVE_R9 SAVE_R9 = R9; -#define CALLER_RESTORE_R9 R9 = SAVE_R9; -#else -#define CALLER_SAVE_R9 /* nothing */ -#define CALLER_RESTORE_R9 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R10 -#define CALLER_SAVE_R10 SAVE_R10 = R10; -#define CALLER_RESTORE_R10 R10 = SAVE_R10; -#else -#define CALLER_SAVE_R10 /* nothing */ -#define CALLER_RESTORE_R10 /* nothing */ -#endif - -#ifdef CALLER_SAVES_F1 -#define CALLER_SAVE_F1 SAVE_F1 = F1; -#define CALLER_RESTORE_F1 F1 = SAVE_F1; -#else -#define CALLER_SAVE_F1 /* nothing */ -#define CALLER_RESTORE_F1 /* nothing */ -#endif - -#ifdef CALLER_SAVES_F2 -#define CALLER_SAVE_F2 SAVE_F2 = F2; -#define CALLER_RESTORE_F2 F2 = SAVE_F2; -#else -#define CALLER_SAVE_F2 /* nothing */ -#define CALLER_RESTORE_F2 /* nothing */ -#endif - -#ifdef CALLER_SAVES_F3 -#define CALLER_SAVE_F3 SAVE_F3 = F3; -#define CALLER_RESTORE_F3 F3 = SAVE_F3; -#else -#define CALLER_SAVE_F3 /* nothing */ -#define CALLER_RESTORE_F3 /* nothing */ -#endif - -#ifdef CALLER_SAVES_F4 -#define CALLER_SAVE_F4 SAVE_F4 = F4; -#define CALLER_RESTORE_F4 F4 = SAVE_F4; -#else -#define CALLER_SAVE_F4 /* nothing */ -#define CALLER_RESTORE_F4 /* nothing */ -#endif - -#ifdef CALLER_SAVES_D1 -#define CALLER_SAVE_D1 SAVE_D1 = D1; -#define CALLER_RESTORE_D1 D1 = SAVE_D1; -#else -#define CALLER_SAVE_D1 /* nothing */ -#define CALLER_RESTORE_D1 /* nothing */ -#endif - -#ifdef CALLER_SAVES_D2 -#define CALLER_SAVE_D2 SAVE_D2 = D2; -#define CALLER_RESTORE_D2 D2 = SAVE_D2; -#else -#define CALLER_SAVE_D2 /* nothing */ -#define CALLER_RESTORE_D2 /* nothing */ -#endif - -#ifdef CALLER_SAVES_L1 -#define CALLER_SAVE_L1 SAVE_L1 = L1; -#define CALLER_RESTORE_L1 L1 = SAVE_L1; -#else -#define CALLER_SAVE_L1 /* nothing */ -#define CALLER_RESTORE_L1 /* nothing */ -#endif - -#ifdef CALLER_SAVES_Sp -#define CALLER_SAVE_Sp SAVE_Sp = Sp; -#define CALLER_RESTORE_Sp Sp = SAVE_Sp; -#else -#define CALLER_SAVE_Sp /* nothing */ -#define CALLER_RESTORE_Sp /* nothing */ -#endif - -#ifdef CALLER_SAVES_SpLim -#define CALLER_SAVE_SpLim SAVE_SpLim = SpLim; -#define CALLER_RESTORE_SpLim SpLim = SAVE_SpLim; -#else -#define CALLER_SAVE_SpLim /* nothing */ -#define CALLER_RESTORE_SpLim /* nothing */ -#endif - -#ifdef CALLER_SAVES_Hp -#define CALLER_SAVE_Hp SAVE_Hp = Hp; -#define CALLER_RESTORE_Hp Hp = SAVE_Hp; -#else -#define CALLER_SAVE_Hp /* nothing */ -#define CALLER_RESTORE_Hp /* nothing */ -#endif - -#ifdef CALLER_SAVES_Base -#ifdef THREADED_RTS -#error "Can't have caller-saved BaseReg with THREADED_RTS" -#endif -#define CALLER_SAVE_Base /* nothing */ -#define CALLER_RESTORE_Base BaseReg = &MainRegTable; -#else -#define CALLER_SAVE_Base /* nothing */ -#define CALLER_RESTORE_Base /* nothing */ -#endif - -#ifdef CALLER_SAVES_CurrentTSO -#define CALLER_SAVE_CurrentTSO SAVE_CurrentTSO = CurrentTSO; -#define CALLER_RESTORE_CurrentTSO CurrentTSO = SAVE_CurrentTSO; -#else -#define CALLER_SAVE_CurrentTSO /* nothing */ -#define CALLER_RESTORE_CurrentTSO /* nothing */ -#endif - -#ifdef CALLER_SAVES_CurrentNursery -#define CALLER_SAVE_CurrentNursery SAVE_CurrentNursery = CurrentNursery; -#define CALLER_RESTORE_CurrentNursery CurrentNursery = SAVE_CurrentNursery; -#else -#define CALLER_SAVE_CurrentNursery /* nothing */ -#define CALLER_RESTORE_CurrentNursery /* nothing */ -#endif - -#ifdef CALLER_SAVES_HpAlloc -#define CALLER_SAVE_HpAlloc SAVE_HpAlloc = HpAlloc; -#define CALLER_RESTORE_HpAlloc HpAlloc = SAVE_HpAlloc; -#else -#define CALLER_SAVE_HpAlloc /* nothing */ -#define CALLER_RESTORE_HpAlloc /* nothing */ -#endif - #endif /* IN_STG_CODE */ -/* ---------------------------------------------------------------------------- - Handy bunches of saves/restores - ------------------------------------------------------------------------ */ - -#if IN_STG_CODE - -#define CALLER_SAVE_USER \ - CALLER_SAVE_R1 \ - CALLER_SAVE_R2 \ - CALLER_SAVE_R3 \ - CALLER_SAVE_R4 \ - CALLER_SAVE_R5 \ - CALLER_SAVE_R6 \ - CALLER_SAVE_R7 \ - CALLER_SAVE_R8 \ - CALLER_SAVE_R9 \ - CALLER_SAVE_R10 \ - CALLER_SAVE_F1 \ - CALLER_SAVE_F2 \ - CALLER_SAVE_F3 \ - CALLER_SAVE_F4 \ - CALLER_SAVE_D1 \ - CALLER_SAVE_D2 \ - CALLER_SAVE_L1 - - /* Save Base last, since the others may - be addressed relative to it */ -#define CALLER_SAVE_SYSTEM \ - CALLER_SAVE_Sp \ - CALLER_SAVE_SpLim \ - CALLER_SAVE_Hp \ - CALLER_SAVE_CurrentTSO \ - CALLER_SAVE_CurrentNursery \ - CALLER_SAVE_Base - -#define CALLER_RESTORE_USER \ - CALLER_RESTORE_R1 \ - CALLER_RESTORE_R2 \ - CALLER_RESTORE_R3 \ - CALLER_RESTORE_R4 \ - CALLER_RESTORE_R5 \ - CALLER_RESTORE_R6 \ - CALLER_RESTORE_R7 \ - CALLER_RESTORE_R8 \ - CALLER_RESTORE_R9 \ - CALLER_RESTORE_R10 \ - CALLER_RESTORE_F1 \ - CALLER_RESTORE_F2 \ - CALLER_RESTORE_F3 \ - CALLER_RESTORE_F4 \ - CALLER_RESTORE_D1 \ - CALLER_RESTORE_D2 \ - CALLER_RESTORE_L1 - - /* Restore Base first, since the others may - be addressed relative to it */ -#define CALLER_RESTORE_SYSTEM \ - CALLER_RESTORE_Base \ - CALLER_RESTORE_Sp \ - CALLER_RESTORE_SpLim \ - CALLER_RESTORE_Hp \ - CALLER_RESTORE_CurrentTSO \ - CALLER_RESTORE_CurrentNursery - -#else /* not IN_STG_CODE */ - -#define CALLER_SAVE_USER /* nothing */ -#define CALLER_SAVE_SYSTEM /* nothing */ -#define CALLER_RESTORE_USER /* nothing */ -#define CALLER_RESTORE_SYSTEM /* nothing */ - -#endif /* IN_STG_CODE */ -#define CALLER_SAVE_ALL \ - CALLER_SAVE_SYSTEM \ - CALLER_SAVE_USER - -#define CALLER_RESTORE_ALL \ - CALLER_RESTORE_SYSTEM \ - CALLER_RESTORE_USER - #endif /* REGS_H */ diff --git a/rts/Apply.cmm b/rts/Apply.cmm index a2d4a7e123..b89abeaff2 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -21,18 +21,16 @@ STRING(stg_ap_0_ret_str,"stg_ap_0_ret... ") -stg_ap_0_fast +stg_ap_0_fast ( P_ fun ) { - // fn is in R1, no args on the stack - IF_DEBUG(apply, - foreign "C" debugBelch(stg_ap_0_ret_str) [R1]; - foreign "C" printClosure(R1 "ptr") [R1]); + ccall debugBelch(stg_ap_0_ret_str); + ccall printClosure(R1 "ptr")); IF_DEBUG(sanity, - foreign "C" checkStackFrame(Sp "ptr") [R1]); + ccall checkStackFrame(Sp "ptr")); - ENTER(); + ENTER(fun); } /* ----------------------------------------------------------------------------- @@ -56,9 +54,9 @@ stg_ap_0_fast -------------------------------------------------------------------------- */ INFO_TABLE(stg_PAP,/*special layout*/0,0,PAP,"PAP","PAP") -{ foreign "C" barf("PAP object entered!") never returns; } +{ ccall barf("PAP object entered!") never returns; } -stg_PAP_apply +stg_PAP_apply /* no args => explicit stack */ { W_ Words; W_ pap; @@ -78,7 +76,7 @@ stg_PAP_apply // this before calling stg_PAP_entry. Sp_adj(-1); Sp(0) = R2; - jump stg_gc_unpt_r1; + jump stg_gc_unpt_r1 [R1]; } Sp_adj(-Words); @@ -86,7 +84,7 @@ stg_PAP_apply TICK_ENT_PAP(); LDV_ENTER(pap); #ifdef PROFILING - foreign "C" enterFunCCS(BaseReg "ptr", StgHeader_ccs(pap) "ptr"); + ccall enterFunCCS(BaseReg "ptr", StgHeader_ccs(pap) "ptr"); #endif // Reload the stack @@ -122,26 +120,26 @@ for: TICK_ENT_VIA_NODE(); #ifdef NO_ARG_REGS - jump %GET_ENTRY(UNTAG(R1)); + jump %GET_ENTRY(UNTAG(R1)) [R1]; #else W_ info; info = %GET_FUN_INFO(UNTAG(R1)); W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN) { - jump StgFunInfoExtra_slow_apply(info); + jump StgFunInfoExtra_slow_apply(info) [R1]; } if (type == ARG_GEN_BIG) { - jump StgFunInfoExtra_slow_apply(info); + jump StgFunInfoExtra_slow_apply(info) [R1]; } if (type == ARG_BCO) { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_apply_interp_info; - jump stg_yield_to_interpreter; + jump stg_yield_to_interpreter []; } jump W_[stg_ap_stack_entries + - WDS(TO_W_(StgFunInfoExtra_fun_type(info)))]; + WDS(TO_W_(StgFunInfoExtra_fun_type(info)))] [R1]; #endif } @@ -155,6 +153,7 @@ for: -------------------------------------------------------------------------- */ INFO_TABLE(stg_AP,/*special layout*/0,0,AP,"AP","AP") + /* no args => explicit stack */ { W_ Words; W_ ap; @@ -164,12 +163,12 @@ INFO_TABLE(stg_AP,/*special layout*/0,0,AP,"AP","AP") Words = TO_W_(StgAP_n_args(ap)); /* - * Check for stack overflow. IMPORTANT: use a _NP check here, + * Check for stack overflow. IMPORTANT: use a _ENTER check here, * because if the check fails, we might end up blackholing this very * closure, in which case we must enter the blackhole on return rather * than continuing to evaluate the now-defunct closure. */ - STK_CHK_NP(WDS(Words) + SIZEOF_StgUpdateFrame); + STK_CHK_ENTER(WDS(Words) + SIZEOF_StgUpdateFrame, R1); PUSH_UPD_FRAME(Sp - SIZEOF_StgUpdateFrame, R1); Sp = Sp - SIZEOF_StgUpdateFrame - WDS(Words); @@ -197,26 +196,26 @@ for: TICK_ENT_VIA_NODE(); #ifdef NO_ARG_REGS - jump %GET_ENTRY(UNTAG(R1)); + jump %GET_ENTRY(UNTAG(R1)) [R1]; #else W_ info; info = %GET_FUN_INFO(UNTAG(R1)); W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN) { - jump StgFunInfoExtra_slow_apply(info); + jump StgFunInfoExtra_slow_apply(info) [R1]; } if (type == ARG_GEN_BIG) { - jump StgFunInfoExtra_slow_apply(info); + jump StgFunInfoExtra_slow_apply(info) [R1]; } if (type == ARG_BCO) { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_apply_interp_info; - jump stg_yield_to_interpreter; + jump stg_yield_to_interpreter []; } jump W_[stg_ap_stack_entries + - WDS(TO_W_(StgFunInfoExtra_fun_type(info)))]; + WDS(TO_W_(StgFunInfoExtra_fun_type(info)))] [R1]; #endif } @@ -225,6 +224,7 @@ for: those generated by the byte-code compiler for inserting breakpoints. */ INFO_TABLE(stg_AP_NOUPD,/*special layout*/0,0,AP,"AP_NOUPD","AP_NOUPD") + /* no args => explicit stack */ { W_ Words; W_ ap; @@ -234,12 +234,12 @@ INFO_TABLE(stg_AP_NOUPD,/*special layout*/0,0,AP,"AP_NOUPD","AP_NOUPD") Words = TO_W_(StgAP_n_args(ap)); /* - * Check for stack overflow. IMPORTANT: use a _NP check here, + * Check for stack overflow. IMPORTANT: use a _ENTER check here, * because if the check fails, we might end up blackholing this very * closure, in which case we must enter the blackhole on return rather * than continuing to evaluate the now-defunct closure. */ - STK_CHK_NP(WDS(Words)); + STK_CHK_ENTER(WDS(Words), R1); Sp = Sp - WDS(Words); TICK_ENT_AP(); @@ -265,26 +265,26 @@ for: TICK_ENT_VIA_NODE(); #ifdef NO_ARG_REGS - jump %GET_ENTRY(UNTAG(R1)); + jump %GET_ENTRY(UNTAG(R1)) [R1]; #else W_ info; info = %GET_FUN_INFO(UNTAG(R1)); W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN) { - jump StgFunInfoExtra_slow_apply(info); + jump StgFunInfoExtra_slow_apply(info) [R1]; } if (type == ARG_GEN_BIG) { - jump StgFunInfoExtra_slow_apply(info); + jump StgFunInfoExtra_slow_apply(info) [R1]; } if (type == ARG_BCO) { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_apply_interp_info; - jump stg_yield_to_interpreter; + jump stg_yield_to_interpreter []; } jump W_[stg_ap_stack_entries + - WDS(TO_W_(StgFunInfoExtra_fun_type(info)))]; + WDS(TO_W_(StgFunInfoExtra_fun_type(info)))] [R1]; #endif } @@ -300,6 +300,7 @@ for: -------------------------------------------------------------------------- */ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK") + /* no args => explicit stack */ { W_ Words; W_ ap; @@ -309,12 +310,12 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK") Words = StgAP_STACK_size(ap); /* - * Check for stack overflow. IMPORTANT: use a _NP check here, + * Check for stack overflow. IMPORTANT: use a _ENTER check here, * because if the check fails, we might end up blackholing this very * closure, in which case we must enter the blackhole on return rather * than continuing to evaluate the now-defunct closure. */ - STK_CHK_NP(WDS(Words) + SIZEOF_StgUpdateFrame + WDS(AP_STACK_SPLIM)); + STK_CHK_ENTER(WDS(Words) + SIZEOF_StgUpdateFrame + WDS(AP_STACK_SPLIM), R1); /* ensure there is at least AP_STACK_SPLIM words of headroom available * after unpacking the AP_STACK. See bug #1466 */ @@ -343,7 +344,7 @@ for: R1 = StgAP_STACK_fun(ap); - ENTER(); + ENTER_R1(); } /* ----------------------------------------------------------------------------- @@ -352,6 +353,7 @@ for: INFO_TABLE(stg_AP_STACK_NOUPD,/*special layout*/0,0,AP_STACK, "AP_STACK_NOUPD","AP_STACK_NOUPD") + /* no args => explicit stack */ { W_ Words; W_ ap; @@ -366,7 +368,7 @@ INFO_TABLE(stg_AP_STACK_NOUPD,/*special layout*/0,0,AP_STACK, * closure, in which case we must enter the blackhole on return rather * than continuing to evaluate the now-defunct closure. */ - STK_CHK_NP(WDS(Words) + WDS(AP_STACK_SPLIM)); + STK_CHK_ENTER(WDS(Words) + WDS(AP_STACK_SPLIM), R1); /* ensure there is at least AP_STACK_SPLIM words of headroom available * after unpacking the AP_STACK. See bug #1466 */ @@ -394,5 +396,5 @@ for: R1 = StgAP_STACK_fun(ap); - ENTER(); + ENTER_R1(); } diff --git a/rts/AutoApply.h b/rts/AutoApply.h index d0c5c3fe6b..ebb7308875 100644 --- a/rts/AutoApply.h +++ b/rts/AutoApply.h @@ -35,7 +35,7 @@ } \ R1 = pap; \ Sp_adj(1 + n); \ - jump %ENTRY_CODE(Sp(0)); + jump %ENTRY_CODE(Sp(0)) [R1]; // Copy the old PAP, build a new one with the extra arg(s) // ret addr and m arguments taking up n words are on the stack. @@ -74,7 +74,7 @@ } \ R1 = new_pap; \ Sp_adj(n+1); \ - jump %ENTRY_CODE(Sp(0)); + jump %ENTRY_CODE(Sp(0)) [R1]; // Jump to target, saving CCCS and restoring it on return #if defined(PROFILING) @@ -82,9 +82,9 @@ Sp(-1) = CCCS; \ Sp(-2) = stg_restore_cccs_info; \ Sp_adj(-2); \ - jump (target) + jump (target) [*] #else -#define jump_SAVE_CCCS(target) jump (target) +#define jump_SAVE_CCCS(target) jump (target) [*] #endif #endif /* APPLY_H */ diff --git a/rts/ClosureFlags.c b/rts/ClosureFlags.c index 0ab8b45669..a2a140282f 100644 --- a/rts/ClosureFlags.c +++ b/rts/ClosureFlags.c @@ -55,8 +55,7 @@ StgWord16 closure_flags[] = { [RET_BCO] = ( 0 ), [RET_SMALL] = ( _BTM| _SRT ), [RET_BIG] = ( _SRT ), - [RET_DYN] = ( _SRT ), - [RET_FUN] = ( 0 ), + [RET_FUN] = ( 0 ), [UPDATE_FRAME] = ( _BTM ), [CATCH_FRAME] = ( _BTM ), [UNDERFLOW_FRAME] = ( _BTM ), @@ -84,6 +83,6 @@ StgWord16 closure_flags[] = { [WHITEHOLE] = ( 0 ) }; -#if N_CLOSURE_TYPES != 61 +#if N_CLOSURE_TYPES != 60 #error Closure types changed: update ClosureFlags.c! #endif diff --git a/rts/Exception.cmm b/rts/Exception.cmm index 78907c4ba7..8a9f4e62c9 100644 --- a/rts/Exception.cmm +++ b/rts/Exception.cmm @@ -50,7 +50,8 @@ import ghczmprim_GHCziTypes_True_closure; -------------------------------------------------------------------------- */ -INFO_TABLE_RET(stg_unmaskAsyncExceptionszh_ret, RET_SMALL) +INFO_TABLE_RET(stg_unmaskAsyncExceptionszh_ret, RET_SMALL, W_ info_ptr) + /* explicit stack */ { CInt r; @@ -60,7 +61,7 @@ INFO_TABLE_RET(stg_unmaskAsyncExceptionszh_ret, RET_SMALL) /* Eagerly raise a blocked exception, if there is one */ if (StgTSO_blocked_exceptions(CurrentTSO) != END_TSO_QUEUE) { - STK_CHK_GEN( WDS(2), R1_PTR, stg_unmaskAsyncExceptionszh_ret_info); + STK_CHK_P (WDS(2), stg_unmaskAsyncExceptionszh_ret_info, R1); /* * We have to be very careful here, as in killThread#, since * we are about to raise an async exception in the current @@ -68,18 +69,18 @@ INFO_TABLE_RET(stg_unmaskAsyncExceptionszh_ret, RET_SMALL) */ Sp_adj(-2); Sp(1) = R1; - Sp(0) = stg_gc_unpt_r1_info; + Sp(0) = stg_ret_p_info; SAVE_THREAD_STATE(); - (r) = foreign "C" maybePerformBlockedException (MyCapability() "ptr", - CurrentTSO "ptr") [R1]; + (r) = ccall maybePerformBlockedException (MyCapability() "ptr", + CurrentTSO "ptr"); if (r != 0::CInt) { if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) { - jump stg_threadFinished; + jump stg_threadFinished []; } else { LOAD_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); - jump %ENTRY_CODE(Sp(0)); + jump %ENTRY_CODE(Sp(0)) [R1]; } } else { @@ -93,10 +94,11 @@ INFO_TABLE_RET(stg_unmaskAsyncExceptionszh_ret, RET_SMALL) } Sp_adj(1); - jump %ENTRY_CODE(Sp(0)); + jump %ENTRY_CODE(Sp(0)) [R1]; } -INFO_TABLE_RET(stg_maskAsyncExceptionszh_ret, RET_SMALL) +INFO_TABLE_RET(stg_maskAsyncExceptionszh_ret, RET_SMALL, W_ info_ptr) + return (P_ ret) { StgTSO_flags(CurrentTSO) = %lobits32( @@ -104,11 +106,11 @@ INFO_TABLE_RET(stg_maskAsyncExceptionszh_ret, RET_SMALL) | TSO_BLOCKEX | TSO_INTERRUPTIBLE ); - Sp_adj(1); - jump %ENTRY_CODE(Sp(0)); + return (ret); } -INFO_TABLE_RET(stg_maskUninterruptiblezh_ret, RET_SMALL) +INFO_TABLE_RET(stg_maskUninterruptiblezh_ret, RET_SMALL, W_ info_ptr) + return (P_ ret) { StgTSO_flags(CurrentTSO) = %lobits32( @@ -117,14 +119,13 @@ INFO_TABLE_RET(stg_maskUninterruptiblezh_ret, RET_SMALL) & ~TSO_INTERRUPTIBLE ); - Sp_adj(1); - jump %ENTRY_CODE(Sp(0)); + return (ret); } -stg_maskAsyncExceptionszh +stg_maskAsyncExceptionszh /* explicit stack */ { /* Args: R1 :: IO a */ - STK_CHK_GEN( WDS(1)/* worst case */, R1_PTR, stg_maskAsyncExceptionszh); + STK_CHK_P( WDS(1)/* worst case */, stg_maskAsyncExceptionszh, R1); if ((TO_W_(StgTSO_flags(CurrentTSO)) & TSO_BLOCKEX) == 0) { /* avoid growing the stack unnecessarily */ @@ -146,13 +147,13 @@ stg_maskAsyncExceptionszh TICK_UNKNOWN_CALL(); TICK_SLOW_CALL_v(); - jump stg_ap_v_fast; + jump stg_ap_v_fast [R1]; } -stg_maskUninterruptiblezh +stg_maskUninterruptiblezh /* explicit stack */ { /* Args: R1 :: IO a */ - STK_CHK_GEN( WDS(1)/* worst case */, R1_PTR, stg_maskAsyncExceptionszh); + STK_CHK_P( WDS(1)/* worst case */, stg_maskAsyncExceptionszh, R1); if ((TO_W_(StgTSO_flags(CurrentTSO)) & TSO_BLOCKEX) == 0) { /* avoid growing the stack unnecessarily */ @@ -174,16 +175,16 @@ stg_maskUninterruptiblezh TICK_UNKNOWN_CALL(); TICK_SLOW_CALL_v(); - jump stg_ap_v_fast; + jump stg_ap_v_fast [R1]; } -stg_unmaskAsyncExceptionszh +stg_unmaskAsyncExceptionszh /* explicit stack */ { CInt r; W_ level; /* Args: R1 :: IO a */ - STK_CHK_GEN( WDS(4), R1_PTR, stg_unmaskAsyncExceptionszh); + STK_CHK_P (WDS(4), stg_unmaskAsyncExceptionszh, R1); /* 4 words: one for the unblock frame, 3 for setting up the * stack to call maybePerformBlockedException() below. */ @@ -225,16 +226,16 @@ stg_unmaskAsyncExceptionszh Sp(0) = stg_enter_info; SAVE_THREAD_STATE(); - (r) = foreign "C" maybePerformBlockedException (MyCapability() "ptr", - CurrentTSO "ptr") [R1]; + (r) = ccall maybePerformBlockedException (MyCapability() "ptr", + CurrentTSO "ptr"); if (r != 0::CInt) { if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) { - jump stg_threadFinished; + jump stg_threadFinished []; } else { LOAD_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); - jump %ENTRY_CODE(Sp(0)); + jump %ENTRY_CODE(Sp(0)) [R1]; } } else { /* we'll just call R1 directly, below */ @@ -245,11 +246,11 @@ stg_unmaskAsyncExceptionszh } TICK_UNKNOWN_CALL(); TICK_SLOW_CALL_v(); - jump stg_ap_v_fast; + jump stg_ap_v_fast [R1]; } -stg_getMaskingStatezh +stg_getMaskingStatezh () { /* args: none */ /* @@ -257,25 +258,18 @@ stg_getMaskingStatezh 1 == masked, non-interruptible, 2 == masked, interruptible */ - RET_N(((TO_W_(StgTSO_flags(CurrentTSO)) & TSO_BLOCKEX) != 0) + - ((TO_W_(StgTSO_flags(CurrentTSO)) & TSO_INTERRUPTIBLE) != 0)); + return (((TO_W_(StgTSO_flags(CurrentTSO)) & TSO_BLOCKEX) != 0) + + ((TO_W_(StgTSO_flags(CurrentTSO)) & TSO_INTERRUPTIBLE) != 0)); } -stg_killThreadzh +stg_killThreadzh (P_ target, P_ exception) { - /* args: R1 = TSO to kill, R2 = Exception */ - W_ why_blocked; - W_ target; - W_ exception; - - target = R1; - exception = R2; - + /* Needs 3 words because throwToSingleThreaded uses some stack */ - STK_CHK_GEN( WDS(3), R1_PTR & R2_PTR, stg_killThreadzh); + STK_CHK_PP (WDS(3), stg_killThreadzh, target, exception); /* We call allocate in throwTo(), so better check for GC */ - MAYBE_GC(R1_PTR & R2_PTR, stg_killThreadzh); + MAYBE_GC_PP (stg_killThreadzh, target, exception); /* * We might have killed ourselves. In which case, better be *very* @@ -292,58 +286,75 @@ stg_killThreadzh * happens: on resumption, we will just jump to the next frame on * the stack, which is the return point for stg_killThreadzh. */ - SAVE_THREAD_STATE(); - /* ToDo: what if the current thread is blocking exceptions? */ - foreign "C" throwToSingleThreaded(MyCapability() "ptr", - target "ptr", exception "ptr")[R1,R2]; - if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) { - jump stg_threadFinished; - } else { - LOAD_THREAD_STATE(); - ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); - jump %ENTRY_CODE(Sp(0)); - } + R1 = target; + R2 = exception; + jump stg_killMyself [R1,R2]; } else { - W_ out; - W_ msg; - out = Sp - WDS(1); /* ok to re-use stack space here */ + W_ msg; - (msg) = foreign "C" throwTo(MyCapability() "ptr", + (msg) = ccall throwTo(MyCapability() "ptr", CurrentTSO "ptr", target "ptr", - exception "ptr") [R1,R2]; + exception "ptr"); if (msg == NULL) { - jump %ENTRY_CODE(Sp(0)); - } else { + return (); + } else { StgTSO_why_blocked(CurrentTSO) = BlockedOnMsgThrowTo; StgTSO_block_info(CurrentTSO) = msg; // we must block, and unlock the message before returning - jump stg_block_throwto; + jump stg_block_throwto (target, exception); } } } +/* + * We must switch into low-level Cmm in order to raise an exception in + * the current thread, hence this is in a separate proc with arguments + * passed explicitly in R1 and R2. + */ +stg_killMyself +{ + P_ target, exception; + target = R1; + exception = R2; + + SAVE_THREAD_STATE(); + /* ToDo: what if the current thread is blocking exceptions? */ + ccall throwToSingleThreaded(MyCapability() "ptr", + target "ptr", exception "ptr"); + if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) { + jump stg_threadFinished []; + } else { + LOAD_THREAD_STATE(); + ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); + jump %ENTRY_CODE(Sp(0)) []; + } +} + /* ----------------------------------------------------------------------------- Catch frames -------------------------------------------------------------------------- */ -#define SP_OFF 0 - /* Catch frames are very similar to update frames, but when entering * one we just pop the frame off the stack and perform the correct * kind of return to the activation record underneath us on the stack. */ +#define CATCH_FRAME_FIELDS(w_,p_,info_ptr,exceptions_blocked,handler) \ + w_ info_ptr, \ + PROF_HDR_FIELDS(w_) \ + w_ exceptions_blocked, \ + p_ handler + + INFO_TABLE_RET(stg_catch_frame, CATCH_FRAME, -#if defined(PROFILING) - W_ unused1, W_ unused2, -#endif - W_ unused3, P_ unused4) - { - Sp = Sp + SIZEOF_StgCatchFrame; - jump %ENTRY_CODE(Sp(SP_OFF)); - } + CATCH_FRAME_FIELDS(W_,P_,info_ptr, + exceptions_blocked,handler)) + return (P_ ret) +{ + return (ret); +} /* ----------------------------------------------------------------------------- * The catch infotable @@ -356,30 +367,30 @@ INFO_TABLE_RET(stg_catch_frame, CATCH_FRAME, * -------------------------------------------------------------------------- */ INFO_TABLE(stg_catch,2,0,FUN,"catch","catch") + (P_ node) { - R2 = StgClosure_payload(R1,1); /* h */ - R1 = StgClosure_payload(R1,0); /* x */ - jump stg_catchzh; + jump stg_catchzh(StgClosure_payload(node,0),StgClosure_payload(node,1)); } -stg_catchzh +stg_catchzh ( P_ io, /* :: IO a */ + P_ handler /* :: Exception -> IO a */ ) { - /* args: R1 = m :: IO a, R2 = handler :: Exception -> IO a */ - STK_CHK_GEN(SIZEOF_StgCatchFrame + WDS(1), R1_PTR & R2_PTR, stg_catchzh); + W_ exceptions_blocked; + + STK_CHK_GEN(); - /* Set up the catch frame */ - Sp = Sp - SIZEOF_StgCatchFrame; - SET_HDR(Sp,stg_catch_frame_info,CCCS); - - StgCatchFrame_handler(Sp) = R2; - StgCatchFrame_exceptions_blocked(Sp) = + exceptions_blocked = TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE); TICK_CATCHF_PUSHED(); /* Apply R1 to the realworld token */ TICK_UNKNOWN_CALL(); TICK_SLOW_CALL_v(); - jump stg_ap_v_fast; + + jump stg_ap_v_fast + (CATCH_FRAME_FIELDS(,,stg_catch_frame_info, + exceptions_blocked, handler)) + (io); } /* ----------------------------------------------------------------------------- @@ -394,28 +405,33 @@ stg_catchzh INFO_TABLE(stg_raise,1,0,THUNK_1_0,"raise","raise") { - R1 = StgThunk_payload(R1,0); - jump stg_raisezh; + jump stg_raisezh(StgThunk_payload(R1,0)); } section "data" { no_break_on_exception: W_[1]; } -INFO_TABLE_RET(stg_raise_ret, RET_SMALL, P_ arg1) +INFO_TABLE_RET(stg_raise_ret, RET_SMALL, W_ info_ptr, P_ exception) + return (P_ ret) { - R1 = Sp(1); - Sp = Sp + WDS(2); - W_[no_break_on_exception] = 1; - jump stg_raisezh; + W_[no_break_on_exception] = 1; + jump stg_raisezh (exception); } -stg_raisezh +stg_raisezh /* explicit stack */ +/* + * args : R1 :: Exception + * + * Here we assume that the NativeNodeCall convention always puts the + * first argument in R1 (which it does). We cannot use high-level cmm + * due to all the LOAD_THREAD_STATE()/SAVE_THREAD_STATE() and stack + * walking that happens in here. + */ { W_ handler; W_ frame_type; W_ exception; - /* args : R1 :: Exception */ exception = R1; @@ -427,16 +443,16 @@ stg_raisezh */ if (RtsFlags_ProfFlags_showCCSOnException(RtsFlags) != 0::I32) { SAVE_THREAD_STATE(); - foreign "C" fprintCCS_stderr(CCCS "ptr", + ccall fprintCCS_stderr(CCCS "ptr", exception "ptr", - CurrentTSO "ptr") []; + CurrentTSO "ptr"); LOAD_THREAD_STATE(); } #endif retry_pop_stack: SAVE_THREAD_STATE(); - (frame_type) = foreign "C" raiseExceptionHelper(BaseReg "ptr", CurrentTSO "ptr", exception "ptr") []; + (frame_type) = ccall raiseExceptionHelper(BaseReg "ptr", CurrentTSO "ptr", exception "ptr"); LOAD_THREAD_STATE(); if (frame_type == ATOMICALLY_FRAME) { /* The exception has reached the edge of a memory transaction. Check that @@ -450,14 +466,14 @@ retry_pop_stack: W_ trec, outer; W_ r; trec = StgTSO_trec(CurrentTSO); - (r) = foreign "C" stmValidateNestOfTransactions(trec "ptr") []; + (r) = ccall stmValidateNestOfTransactions(trec "ptr"); outer = StgTRecHeader_enclosing_trec(trec); - foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; - foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") []; + ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr"); + ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr"); if (outer != NO_TREC) { - foreign "C" stmAbortTransaction(MyCapability() "ptr", outer "ptr") []; - foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", outer "ptr") []; + ccall stmAbortTransaction(MyCapability() "ptr", outer "ptr"); + ccall stmFreeAbortedTRec(MyCapability() "ptr", outer "ptr"); } StgTSO_trec(CurrentTSO) = NO_TREC; @@ -468,10 +484,10 @@ retry_pop_stack: } else { // Transaction was not valid: we retry the exception (otherwise continue // with a further call to raiseExceptionHelper) - ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") []; + ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr"); StgTSO_trec(CurrentTSO) = trec; R1 = StgAtomicallyFrame_code(Sp); - jump stg_ap_v_fast; + jump stg_ap_v_fast [R1]; } } @@ -492,7 +508,7 @@ retry_pop_stack: // for exmplae. Perhaps the stop_on_exception flag should // be per-thread. CInt[rts_stop_on_exception] = 0; - ("ptr" ioAction) = foreign "C" deRefStablePtr (W_[rts_breakpoint_io_action] "ptr") []; + ("ptr" ioAction) = ccall deRefStablePtr (W_[rts_breakpoint_io_action] "ptr"); Sp = Sp - WDS(6); Sp(5) = exception; Sp(4) = stg_raise_ret_info; @@ -500,7 +516,7 @@ retry_pop_stack: Sp(2) = ghczmprim_GHCziTypes_True_closure; // dummy breakpoint info Sp(1) = ghczmprim_GHCziTypes_True_closure; // True <=> a breakpoint R1 = ioAction; - jump RET_LBL(stg_ap_pppv); + jump RET_LBL(stg_ap_pppv) [R1]; } } @@ -519,11 +535,12 @@ retry_pop_stack: StgTSO_what_next(CurrentTSO) = ThreadKilled::I16; SAVE_THREAD_STATE(); /* inline! */ - jump stg_threadFinished; + jump stg_threadFinished []; } - /* Ok, Sp points to the enclosing CATCH_FRAME or CATCH_STM_FRAME. Pop everything - * down to and including this frame, update Su, push R1, and enter the handler. + /* Ok, Sp points to the enclosing CATCH_FRAME or CATCH_STM_FRAME. + * Pop everything down to and including this frame, update Su, + * push R1, and enter the handler. */ if (frame_type == CATCH_FRAME) { handler = StgCatchFrame_handler(Sp); @@ -572,8 +589,8 @@ retry_pop_stack: W_ trec, outer; trec = StgTSO_trec(CurrentTSO); outer = StgTRecHeader_enclosing_trec(trec); - foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; - foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") []; + ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr"); + ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr"); StgTSO_trec(CurrentTSO) = outer; Sp = Sp + SIZEOF_StgCatchSTMFrame; } @@ -587,11 +604,10 @@ retry_pop_stack: Sp_adj(-1); TICK_UNKNOWN_CALL(); TICK_SLOW_CALL_pv(); - jump RET_LBL(stg_ap_pv); + jump RET_LBL(stg_ap_pv) [R1]; } -stg_raiseIOzh +stg_raiseIOzh (P_ exception) { - /* Args :: R1 :: Exception */ - jump stg_raisezh; + jump stg_raisezh (exception); } diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm index 90691fa091..08adf45b02 100644 --- a/rts/HeapStackCheck.cmm +++ b/rts/HeapStackCheck.cmm @@ -11,6 +11,7 @@ * ---------------------------------------------------------------------------*/ #include "Cmm.h" +#include "Updates.h" #ifdef __PIC__ import pthread_mutex_unlock; @@ -81,58 +82,66 @@ import LeaveCriticalSection; * ThreadRunGHC thread. */ -#define GC_GENERIC \ - DEBUG_ONLY(foreign "C" heapCheckFail()); \ - if (Hp > HpLim) { \ - Hp = Hp - HpAlloc/*in bytes*/; \ - if (HpLim == 0) { \ - R1 = ThreadYielding; \ - goto sched; \ - } \ - if (HpAlloc <= BLOCK_SIZE \ - && bdescr_link(CurrentNursery) != NULL) { \ - HpAlloc = 0; \ - CLOSE_NURSERY(); \ - CurrentNursery = bdescr_link(CurrentNursery); \ - OPEN_NURSERY(); \ - if (Capability_context_switch(MyCapability()) != 0 :: CInt || \ - Capability_interrupt(MyCapability()) != 0 :: CInt) { \ - R1 = ThreadYielding; \ - goto sched; \ - } else { \ - jump %ENTRY_CODE(Sp(0)); \ - } \ - } else { \ - R1 = HeapOverflow; \ - goto sched; \ - } \ - } else { \ - R1 = StackOverflow; \ - } \ - sched: \ - PRE_RETURN(R1,ThreadRunGHC); \ - jump stg_returnToSched; +stg_gc_noregs +{ + W_ ret; + + DEBUG_ONLY(foreign "C" heapCheckFail()); + if (Hp > HpLim) { + Hp = Hp - HpAlloc/*in bytes*/; + if (HpLim == 0) { + ret = ThreadYielding; + goto sched; + } + if (HpAlloc <= BLOCK_SIZE + && bdescr_link(CurrentNursery) != NULL) { + HpAlloc = 0; + CLOSE_NURSERY(); + CurrentNursery = bdescr_link(CurrentNursery); + OPEN_NURSERY(); + if (Capability_context_switch(MyCapability()) != 0 :: CInt || + Capability_interrupt(MyCapability()) != 0 :: CInt) { + ret = ThreadYielding; + goto sched; + } else { + jump %ENTRY_CODE(Sp(0)) []; + } + } else { + ret = HeapOverflow; + goto sched; + } + } else { + if (CHECK_GC()) { + ret = HeapOverflow; + } else { + ret = StackOverflow; + } + } + sched: + PRE_RETURN(ret,ThreadRunGHC); + jump stg_returnToSched [R1]; +} #define HP_GENERIC \ - PRE_RETURN(HeapOverflow, ThreadRunGHC) \ - jump stg_returnToSched; + PRE_RETURN(HeapOverflow, ThreadRunGHC) \ + jump stg_returnToSched [R1]; #define BLOCK_GENERIC \ - PRE_RETURN(ThreadBlocked, ThreadRunGHC) \ - jump stg_returnToSched; + PRE_RETURN(ThreadBlocked, ThreadRunGHC) \ + jump stg_returnToSched [R1]; #define YIELD_GENERIC \ - PRE_RETURN(ThreadYielding, ThreadRunGHC) \ - jump stg_returnToSched; + PRE_RETURN(ThreadYielding, ThreadRunGHC) \ + jump stg_returnToSched [R1]; #define BLOCK_BUT_FIRST(c) \ - PRE_RETURN(ThreadBlocked, ThreadRunGHC) \ - R2 = c; \ - jump stg_returnToSchedButFirst; + PRE_RETURN(ThreadBlocked, ThreadRunGHC) \ + R2 = c; \ + jump stg_returnToSchedButFirst [R1,R2,R3]; #define YIELD_TO_INTERPRETER \ - PRE_RETURN(ThreadYielding, ThreadInterpret) \ - jump stg_returnToSchedNotPaused; + PRE_RETURN(ThreadYielding, ThreadInterpret) \ + jump stg_returnToSchedNotPaused [R1]; /* ----------------------------------------------------------------------------- Heap checks in thunks/functions. @@ -144,19 +153,55 @@ import LeaveCriticalSection; There are canned sequences for 'n' pointer values in registers. -------------------------------------------------------------------------- */ -INFO_TABLE_RET( stg_enter, RET_SMALL, P_ unused) +INFO_TABLE_RET ( stg_enter, RET_SMALL, W_ info_ptr, P_ closure ) + return (/* no return values */) { - R1 = Sp(1); - Sp_adj(2); - ENTER(); + ENTER(closure); } -__stg_gc_enter_1 +__stg_gc_enter_1 (P_ node) { - Sp_adj(-2); - Sp(1) = R1; - Sp(0) = stg_enter_info; - GC_GENERIC + jump stg_gc_noregs (stg_enter_info, node) (); +} + +/* ----------------------------------------------------------------------------- + Canned heap checks for primitives. + + We can't use stg_gc_fun because primitives are not functions, so + these fragments let us save some boilerplate heap-check-failure + code in a few common cases. + -------------------------------------------------------------------------- */ + +stg_gc_prim () +{ + W_ fun; + fun = R9; + call stg_gc_noregs (); + jump fun(); +} + +stg_gc_prim_p (P_ arg) +{ + W_ fun; + fun = R9; + call stg_gc_noregs (); + jump fun(arg); +} + +stg_gc_prim_pp (P_ arg1, P_ arg2) +{ + W_ fun; + fun = R9; + call stg_gc_noregs (); + jump fun(arg1,arg2); +} + +stg_gc_prim_n (W_ arg) +{ + W_ fun; + fun = R9; + call stg_gc_noregs (); + jump fun(arg); } /* ----------------------------------------------------------------------------- @@ -169,138 +214,121 @@ __stg_gc_enter_1 -------------------------------------------------------------------------- */ /* The stg_enter_checkbh frame has the same shape as an update frame: */ -#if defined(PROFILING) -#define UPD_FRAME_PARAMS W_ unused1, W_ unused2, P_ unused3 -#else -#define UPD_FRAME_PARAMS P_ unused1 -#endif -INFO_TABLE_RET( stg_enter_checkbh, RET_SMALL, UPD_FRAME_PARAMS) +INFO_TABLE_RET ( stg_enter_checkbh, RET_SMALL, + UPDATE_FRAME_FIELDS(W_,P_,info_ptr,updatee)) + return (P_ ret) { - R1 = StgUpdateFrame_updatee(Sp); - Sp = Sp + SIZEOF_StgUpdateFrame; foreign "C" checkBlockingQueues(MyCapability() "ptr", - CurrentTSO) [R1]; - ENTER(); + CurrentTSO); + return (updatee); } /* ----------------------------------------------------------------------------- - Heap checks in Primitive case alternatives - - A primitive case alternative is entered with a value either in - R1, FloatReg1 or D1 depending on the return convention. All the - cases are covered below. + Info tables for returning values of various types. These are used + when we want to push a frame on the stack that will return a value + to the frame underneath it. -------------------------------------------------------------------------- */ -/*-- No Registers live ------------------------------------------------------ */ - -stg_gc_noregs +INFO_TABLE_RET ( stg_ret_v, RET_SMALL, W_ info_ptr ) + return (/* no return values */) { - GC_GENERIC + return (); } -/*-- void return ------------------------------------------------------------ */ - -INFO_TABLE_RET( stg_gc_void, RET_SMALL) +INFO_TABLE_RET ( stg_ret_p, RET_SMALL, W_ info_ptr, P_ ptr ) + return (/* no return values */) { - Sp_adj(1); - jump %ENTRY_CODE(Sp(0)); + return (ptr); } -/*-- R1 is boxed/unpointed -------------------------------------------------- */ - -INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, P_ unused) +INFO_TABLE_RET ( stg_ret_n, RET_SMALL, W_ info_ptr, W_ nptr ) + return (/* no return values */) { - R1 = Sp(1); - Sp_adj(2); - jump %ENTRY_CODE(Sp(0)); + return (nptr); } -stg_gc_unpt_r1 +INFO_TABLE_RET ( stg_ret_f, RET_SMALL, W_ info_ptr, F_ f ) + return (/* no return values */) { - Sp_adj(-2); - Sp(1) = R1; - Sp(0) = stg_gc_unpt_r1_info; - GC_GENERIC + return (f); } -/*-- R1 is unboxed -------------------------------------------------- */ - -/* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */ -INFO_TABLE_RET( stg_gc_unbx_r1, RET_SMALL, W_ unused ) +INFO_TABLE_RET ( stg_ret_d, RET_SMALL, W_ info_ptr, D_ d ) + return (/* no return values */) { - R1 = Sp(1); - Sp_adj(2); - jump %ENTRY_CODE(Sp(0)); + return (d); } -stg_gc_unbx_r1 +INFO_TABLE_RET ( stg_ret_l, RET_SMALL, W_ info_ptr, L_ l ) + return (/* no return values */) { - Sp_adj(-2); - Sp(1) = R1; - Sp(0) = stg_gc_unbx_r1_info; - GC_GENERIC + return (l); } -/*-- F1 contains a float ------------------------------------------------- */ +/* ----------------------------------------------------------------------------- + Canned heap-check failures for case alts, where we have some values + in registers or on the stack according to the NativeReturn + convention. + -------------------------------------------------------------------------- */ + -INFO_TABLE_RET( stg_gc_f1, RET_SMALL, F_ unused ) +/*-- void return ------------------------------------------------------------ */ + +/*-- R1 is a GC pointer, but we don't enter it ----------------------- */ + +stg_gc_unpt_r1 return (P_ ptr) /* NB. return convention */ { - F1 = F_[Sp+WDS(1)]; - Sp_adj(2); - jump %ENTRY_CODE(Sp(0)); + jump stg_gc_noregs (stg_ret_p_info, ptr) (); } -stg_gc_f1 +/*-- R1 is unboxed -------------------------------------------------- */ + +stg_gc_unbx_r1 return (W_ nptr) /* NB. return convention */ { - Sp_adj(-2); - F_[Sp + WDS(1)] = F1; - Sp(0) = stg_gc_f1_info; - GC_GENERIC + jump stg_gc_noregs (stg_ret_n_info, nptr) (); } -/*-- D1 contains a double ------------------------------------------------- */ +/*-- F1 contains a float ------------------------------------------------- */ -INFO_TABLE_RET( stg_gc_d1, RET_SMALL, D_ unused ) +stg_gc_f1 return (F_ f) { - D1 = D_[Sp + WDS(1)]; - Sp = Sp + WDS(1) + SIZEOF_StgDouble; - jump %ENTRY_CODE(Sp(0)); + jump stg_gc_noregs (stg_ret_f_info, f) (); } -stg_gc_d1 +/*-- D1 contains a double ------------------------------------------------- */ + +stg_gc_d1 return (D_ d) { - Sp = Sp - WDS(1) - SIZEOF_StgDouble; - D_[Sp + WDS(1)] = D1; - Sp(0) = stg_gc_d1_info; - GC_GENERIC + jump stg_gc_noregs (stg_ret_d_info, d) (); } /*-- L1 contains an int64 ------------------------------------------------- */ -INFO_TABLE_RET( stg_gc_l1, RET_SMALL, L_ unused ) +stg_gc_l1 return (L_ l) { - L1 = L_[Sp + WDS(1)]; - Sp_adj(1) + SIZEOF_StgWord64; - jump %ENTRY_CODE(Sp(0)); + jump stg_gc_noregs (stg_ret_l_info, l) (); } -stg_gc_l1 +/*-- Unboxed tuples with multiple pointers -------------------------------- */ + +stg_gc_pp return (P_ arg1, P_ arg2) { - Sp_adj(-1) - SIZEOF_StgWord64; - L_[Sp + WDS(1)] = L1; - Sp(0) = stg_gc_l1_info; - GC_GENERIC + call stg_gc_noregs(); + return (arg1,arg2); } -/*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */ +stg_gc_ppp return (P_ arg1, P_ arg2, P_ arg3) +{ + call stg_gc_noregs(); + return (arg1,arg2,arg3); +} -INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, P_ unused ) +stg_gc_pppp return (P_ arg1, P_ arg2, P_ arg3, P_ arg4) { - Sp_adj(1); - // one ptr is on the stack (Sp(0)) - jump %ENTRY_CODE(Sp(1)); + call stg_gc_noregs(); + return (arg1,arg2,arg3,arg4); } /* ----------------------------------------------------------------------------- @@ -333,7 +361,7 @@ INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, P_ unused ) -------------------------------------------------------------------------- */ -__stg_gc_fun +__stg_gc_fun /* explicit stack */ { W_ size; W_ info; @@ -365,7 +393,7 @@ __stg_gc_fun Sp(2) = R1; Sp(1) = size; Sp(0) = stg_gc_fun_info; - GC_GENERIC + jump stg_gc_noregs []; #else W_ type; type = TO_W_(StgFunInfoExtra_fun_type(info)); @@ -377,14 +405,15 @@ __stg_gc_fun Sp(1) = size; Sp(0) = stg_gc_fun_info; // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)");); - GC_GENERIC + jump stg_gc_noregs []; } else { - jump W_[stg_stack_save_entries + WDS(type)]; + jump W_[stg_stack_save_entries + WDS(type)] [*]; // all regs live // jumps to stg_gc_noregs after saving stuff } #endif /* !NO_ARG_REGS */ } + /* ----------------------------------------------------------------------------- Generic Apply (return point) @@ -393,14 +422,15 @@ __stg_gc_fun appropriately. The stack layout is given above. -------------------------------------------------------------------------- */ -INFO_TABLE_RET( stg_gc_fun, RET_FUN ) +INFO_TABLE_RET ( stg_gc_fun, RET_FUN ) + /* explicit stack */ { R1 = Sp(2); Sp_adj(3); #ifdef NO_ARG_REGS // Minor optimisation: there are no argument registers to load up, // so we can just jump straight to the function's entry point. - jump %GET_ENTRY(UNTAG(R1)); + jump %GET_ENTRY(UNTAG(R1)) [R1]; #else W_ info; W_ type; @@ -408,126 +438,25 @@ INFO_TABLE_RET( stg_gc_fun, RET_FUN ) info = %GET_FUN_INFO(UNTAG(R1)); type = TO_W_(StgFunInfoExtra_fun_type(info)); if (type == ARG_GEN || type == ARG_GEN_BIG) { - jump StgFunInfoExtra_slow_apply(info); + jump StgFunInfoExtra_slow_apply(info) [R1]; } else { if (type == ARG_BCO) { // cover this case just to be on the safe side Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_apply_interp_info; - jump stg_yield_to_interpreter; + jump stg_yield_to_interpreter []; } else { - jump W_[stg_ap_stack_entries + WDS(type)]; + jump W_[stg_ap_stack_entries + WDS(type)] [R1]; } } #endif } /* ----------------------------------------------------------------------------- - Generic Heap Check Code. - - Called with Liveness mask in R9, Return address in R10. - Stack must be consistent (containing all necessary info pointers - to relevant SRTs). - - See StgMacros.h for a description of the RET_DYN stack frame. - - We also define an stg_gen_yield here, because it's very similar. - -------------------------------------------------------------------------- */ - -// For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P -// on a 64-bit machine, we'll end up wasting a couple of words, but -// it's not a big deal. - -#define RESTORE_EVERYTHING \ - L1 = L_[Sp + WDS(19)]; \ - D2 = D_[Sp + WDS(17)]; \ - D1 = D_[Sp + WDS(15)]; \ - F4 = F_[Sp + WDS(14)]; \ - F3 = F_[Sp + WDS(13)]; \ - F2 = F_[Sp + WDS(12)]; \ - F1 = F_[Sp + WDS(11)]; \ - R8 = Sp(10); \ - R7 = Sp(9); \ - R6 = Sp(8); \ - R5 = Sp(7); \ - R4 = Sp(6); \ - R3 = Sp(5); \ - R2 = Sp(4); \ - R1 = Sp(3); \ - Sp_adj(21); - -#define RET_OFFSET (-19) - -#define SAVE_EVERYTHING \ - Sp_adj(-21); \ - L_[Sp + WDS(19)] = L1; \ - D_[Sp + WDS(17)] = D2; \ - D_[Sp + WDS(15)] = D1; \ - F_[Sp + WDS(14)] = F4; \ - F_[Sp + WDS(13)] = F3; \ - F_[Sp + WDS(12)] = F2; \ - F_[Sp + WDS(11)] = F1; \ - Sp(10) = R8; \ - Sp(9) = R7; \ - Sp(8) = R6; \ - Sp(7) = R5; \ - Sp(6) = R4; \ - Sp(5) = R3; \ - Sp(4) = R2; \ - Sp(3) = R1; \ - Sp(2) = R10; /* return address */ \ - Sp(1) = R9; /* liveness mask */ \ - Sp(0) = stg_gc_gen_info; - -INFO_TABLE_RET( stg_gc_gen, RET_DYN ) -/* bitmap in the above info table is unused, the real one is on the stack. */ -{ - RESTORE_EVERYTHING; - jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */ -} - -stg_gc_gen -{ - // Hack; see Note [mvar-heap-check] in PrimOps.cmm - if (R10 == stg_putMVarzh || R10 == stg_takeMVarzh) { - unlockClosure(R1, stg_MVAR_DIRTY_info) - } - SAVE_EVERYTHING; - GC_GENERIC -} - -// A heap check at an unboxed tuple return point. The return address -// is on the stack, and we can find it by using the offsets given -// to us in the liveness mask. -stg_gc_ut -{ - R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9))); - SAVE_EVERYTHING; - GC_GENERIC -} - -/* - * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC - * because we've just failed doYouWantToGC(), not a standard heap - * check. GC_GENERIC would end up returning StackOverflow. - */ -stg_gc_gen_hp -{ - SAVE_EVERYTHING; - HP_GENERIC -} - -/* ----------------------------------------------------------------------------- Yields -------------------------------------------------------------------------- */ -stg_gen_yield -{ - SAVE_EVERYTHING; - YIELD_GENERIC -} - stg_yield_noregs { YIELD_GENERIC; @@ -546,25 +475,11 @@ stg_yield_to_interpreter Blocks -------------------------------------------------------------------------- */ -stg_gen_block -{ - SAVE_EVERYTHING; - BLOCK_GENERIC; -} - stg_block_noregs { BLOCK_GENERIC; } -stg_block_1 -{ - Sp_adj(-2); - Sp(1) = R1; - Sp(0) = stg_enter_info; - BLOCK_GENERIC; -} - /* ----------------------------------------------------------------------------- * takeMVar/putMVar-specific blocks * @@ -585,52 +500,48 @@ stg_block_1 * * -------------------------------------------------------------------------- */ -INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, P_ unused ) +INFO_TABLE_RET ( stg_block_takemvar, RET_SMALL, W_ info_ptr, P_ mvar ) + return () { - R1 = Sp(1); - Sp_adj(2); - jump stg_takeMVarzh; + jump stg_takeMVarzh(mvar); } // code fragment executed just before we return to the scheduler stg_block_takemvar_finally { unlockClosure(R3, stg_MVAR_DIRTY_info); - jump StgReturn; + jump StgReturn [R1]; } -stg_block_takemvar +stg_block_takemvar /* mvar passed in R1 */ { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_block_takemvar_info; - R3 = R1; + R3 = R1; // mvar communicated to stg_block_takemvar_finally in R3 BLOCK_BUT_FIRST(stg_block_takemvar_finally); } -INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, P_ unused1, P_ unused2 ) +INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, W_ info_ptr, + P_ mvar, P_ val ) + return () { - R2 = Sp(2); - R1 = Sp(1); - Sp_adj(3); - jump stg_putMVarzh; + jump stg_putMVarzh(mvar, val); } // code fragment executed just before we return to the scheduler stg_block_putmvar_finally { unlockClosure(R3, stg_MVAR_DIRTY_info); - jump StgReturn; + jump StgReturn [R1]; } -stg_block_putmvar +stg_block_putmvar (P_ mvar, P_ val) { - Sp_adj(-3); - Sp(2) = R2; - Sp(1) = R1; - Sp(0) = stg_block_putmvar_info; - R3 = R1; - BLOCK_BUT_FIRST(stg_block_putmvar_finally); + push (stg_block_putmvar_info, mvar, val) { + R3 = R1; // mvar communicated to stg_block_putmvar_finally in R3 + BLOCK_BUT_FIRST(stg_block_putmvar_finally); + } } stg_block_blackhole @@ -641,12 +552,11 @@ stg_block_blackhole BLOCK_GENERIC; } -INFO_TABLE_RET( stg_block_throwto, RET_SMALL, P_ unused, P_ unused ) +INFO_TABLE_RET ( stg_block_throwto, RET_SMALL, W_ info_ptr, + P_ tso, P_ exception ) + return () { - R2 = Sp(2); - R1 = Sp(1); - Sp_adj(3); - jump stg_killThreadzh; + jump stg_killThreadzh(tso, exception); } stg_block_throwto_finally @@ -657,30 +567,26 @@ stg_block_throwto_finally if (StgHeader_info(StgTSO_block_info(CurrentTSO)) == stg_WHITEHOLE_info) { unlockClosure(StgTSO_block_info(CurrentTSO), stg_MSG_THROWTO_info); } - jump StgReturn; + jump StgReturn [R1]; } -stg_block_throwto +stg_block_throwto (P_ tso, P_ exception) { - Sp_adj(-3); - Sp(2) = R2; - Sp(1) = R1; - Sp(0) = stg_block_throwto_info; - BLOCK_BUT_FIRST(stg_block_throwto_finally); + push (stg_block_throwto_info, tso, exception) { + BLOCK_BUT_FIRST(stg_block_throwto_finally); + } } #ifdef mingw32_HOST_OS -INFO_TABLE_RET( stg_block_async, RET_SMALL, W_ unused ) +INFO_TABLE_RET ( stg_block_async, RET_SMALL, W_ info_ptr, W_ ares ) + return () { - W_ ares; W_ len, errC; - ares = Sp(1); len = TO_W_(StgAsyncIOResult_len(ares)); errC = TO_W_(StgAsyncIOResult_errCode(ares)); - foreign "C" free(ares "ptr"); - Sp_adj(2); - RET_NN(len, errC); + ccall free(ares "ptr"); + return (len, errC); } stg_block_async @@ -693,14 +599,11 @@ stg_block_async /* Used by threadDelay implementation; it would be desirable to get rid of * this free()'ing void return continuation. */ -INFO_TABLE_RET( stg_block_async_void, RET_SMALL, W_ ares ) +INFO_TABLE_RET ( stg_block_async_void, RET_SMALL, W_ info_ptr, W_ ares ) + return () { - W_ ares; - - ares = Sp(1); - foreign "C" free(ares "ptr"); - Sp_adj(2); - jump %ENTRY_CODE(Sp(0)); + ccall free(ares "ptr"); + return (); } stg_block_async_void @@ -712,14 +615,15 @@ stg_block_async_void #endif + /* ----------------------------------------------------------------------------- STM-specific waiting -------------------------------------------------------------------------- */ stg_block_stmwait_finally { - foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr"); - jump StgReturn; + ccall stmWaitUnlock(MyCapability() "ptr", R3 "ptr"); + jump StgReturn [R1]; } stg_block_stmwait diff --git a/rts/Interpreter.c b/rts/Interpreter.c index 83973e8c9b..2eb2d0789f 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -503,7 +503,7 @@ do_return: // | XXXX_info | // +---------------+ // - // where XXXX_info is one of the stg_gc_unbx_r1_info family. + // where XXXX_info is one of the stg_ret_*_info family. // // We're only interested in the case when the real return address // is a BCO; otherwise we'll return to the scheduler. @@ -512,12 +512,12 @@ do_return_unboxed: { int offset; - ASSERT( Sp[0] == (W_)&stg_gc_unbx_r1_info - || Sp[0] == (W_)&stg_gc_unpt_r1_info - || Sp[0] == (W_)&stg_gc_f1_info - || Sp[0] == (W_)&stg_gc_d1_info - || Sp[0] == (W_)&stg_gc_l1_info - || Sp[0] == (W_)&stg_gc_void_info // VoidRep + ASSERT( Sp[0] == (W_)&stg_ret_v_info + || Sp[0] == (W_)&stg_ret_p_info + || Sp[0] == (W_)&stg_ret_n_info + || Sp[0] == (W_)&stg_ret_f_info + || Sp[0] == (W_)&stg_ret_d_info + || Sp[0] == (W_)&stg_ret_l_info ); // get the offset of the stg_ctoi_ret_XXX itbl @@ -1336,27 +1336,27 @@ run_BCO: case bci_RETURN_P: Sp--; - Sp[0] = (W_)&stg_gc_unpt_r1_info; + Sp[0] = (W_)&stg_ret_p_info; goto do_return_unboxed; case bci_RETURN_N: Sp--; - Sp[0] = (W_)&stg_gc_unbx_r1_info; + Sp[0] = (W_)&stg_ret_n_info; goto do_return_unboxed; case bci_RETURN_F: Sp--; - Sp[0] = (W_)&stg_gc_f1_info; + Sp[0] = (W_)&stg_ret_f_info; goto do_return_unboxed; case bci_RETURN_D: Sp--; - Sp[0] = (W_)&stg_gc_d1_info; + Sp[0] = (W_)&stg_ret_d_info; goto do_return_unboxed; case bci_RETURN_L: Sp--; - Sp[0] = (W_)&stg_gc_l1_info; + Sp[0] = (W_)&stg_ret_l_info; goto do_return_unboxed; case bci_RETURN_V: Sp--; - Sp[0] = (W_)&stg_gc_void_info; + Sp[0] = (W_)&stg_ret_v_info; goto do_return_unboxed; case bci_SWIZZLE: { @@ -1372,9 +1372,6 @@ run_BCO: int o_itbl = BCO_GET_LARGE_ARG; int interruptible = BCO_NEXT; void(*marshall_fn)(void*) = (void (*)(void*))BCO_LIT(o_itbl); - int ret_dyn_size = - RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE - + sizeofW(StgRetDyn); /* the stack looks like this: @@ -1405,6 +1402,7 @@ run_BCO: nat nargs = cif->nargs; nat ret_size; nat i; + int j; StgPtr p; W_ ret[2]; // max needed W_ *arguments[stk_offset]; // max needed @@ -1446,17 +1444,19 @@ run_BCO: // // We know how many (non-ptr) words there are before the // next valid stack frame: it is the stk_offset arg to the - // CCALL instruction. So we build a RET_DYN stack frame - // on the stack frame to describe this chunk of stack. - // - Sp -= ret_dyn_size; - ((StgRetDyn *)Sp)->liveness = R1_PTR | N_NONPTRS(stk_offset); - ((StgRetDyn *)Sp)->info = (StgInfoTable *)&stg_gc_gen_info; + // CCALL instruction. So we overwrite this area of the + // stack with empty stack frames (stg_ret_v_info); + // + for (j = 0; j < stk_offset; j++) { + Sp[j] = (W_)&stg_ret_v_info; /* an empty stack frame */ + } // save obj (pointer to the current BCO), since this - // might move during the call. We use the R1 slot in the - // RET_DYN frame for this, hence R1_PTR above. - ((StgRetDyn *)Sp)->payload[0] = (StgClosure *)obj; + // might move during the call. We push an stg_ret_p frame + // for this. + Sp -= 2; + Sp[1] = (W_)obj; + Sp[0] = (W_)&stg_ret_p_info; SAVE_STACK_POINTERS; tok = suspendThread(&cap->r, interruptible ? rtsTrue : rtsFalse); @@ -1464,11 +1464,11 @@ run_BCO: // We already made a copy of the arguments above. ffi_call(cif, fn, ret, argptrs); - // And restart the thread again, popping the RET_DYN frame. + // And restart the thread again, popping the stg_ret_p frame. cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - STG_FIELD_OFFSET(Capability,r))); LOAD_STACK_POINTERS; - if (Sp[0] != (W_)&stg_gc_gen_info) { + if (Sp[0] != (W_)&stg_ret_p_info) { // the stack is not how we left it. This probably // means that an exception got raised on exit from the // foreign call, so we should just continue with @@ -1476,16 +1476,16 @@ run_BCO: RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding); } - // Re-load the pointer to the BCO from the RET_DYN frame, + // Re-load the pointer to the BCO from the stg_ret_p frame, // it might have moved during the call. Also reload the // pointers to the components of the BCO. - obj = ((StgRetDyn *)Sp)->payload[0]; + obj = (P_)Sp[1]; bco = (StgBCO*)obj; instrs = (StgWord16*)(bco->instrs->payload); literals = (StgWord*)(&bco->literals->payload[0]); ptrs = (StgPtr*)(&bco->ptrs->payload[0]); - Sp += ret_dyn_size; + Sp += 2; // pop the stg_ret_p frame // Save the Haskell thread's current value of errno cap->r.rCurrentTSO->saved_errno = errno; diff --git a/rts/LdvProfile.c b/rts/LdvProfile.c index 7bc032e05d..8ccafef9e2 100644 --- a/rts/LdvProfile.c +++ b/rts/LdvProfile.c @@ -133,7 +133,6 @@ processHeapClosureForDead( StgClosure *c ) case CATCH_FRAME: case UNDERFLOW_FRAME: case STOP_FRAME: - case RET_DYN: case RET_BCO: case RET_SMALL: case RET_BIG: diff --git a/rts/Linker.c b/rts/Linker.c index cf60c528d3..64d60f23d0 100644 --- a/rts/Linker.c +++ b/rts/Linker.c @@ -1011,301 +1011,300 @@ typedef struct _RtsSymbolVal { #endif -#define RTS_SYMBOLS \ - Maybe_Stable_Names \ - RTS_TICKY_SYMBOLS \ - SymI_HasProto(StgReturn) \ - SymI_HasProto(stg_enter_info) \ - SymI_HasProto(stg_gc_void_info) \ - SymI_HasProto(__stg_gc_enter_1) \ - SymI_HasProto(stg_gc_noregs) \ - SymI_HasProto(stg_gc_unpt_r1_info) \ - SymI_HasProto(stg_gc_unpt_r1) \ - SymI_HasProto(stg_gc_unbx_r1_info) \ - SymI_HasProto(stg_gc_unbx_r1) \ - SymI_HasProto(stg_gc_f1_info) \ - SymI_HasProto(stg_gc_f1) \ - SymI_HasProto(stg_gc_d1_info) \ - SymI_HasProto(stg_gc_d1) \ - SymI_HasProto(stg_gc_l1_info) \ - SymI_HasProto(stg_gc_l1) \ - SymI_HasProto(__stg_gc_fun) \ - SymI_HasProto(stg_gc_fun_info) \ - SymI_HasProto(stg_gc_gen) \ - SymI_HasProto(stg_gc_gen_info) \ - SymI_HasProto(stg_gc_gen_hp) \ - SymI_HasProto(stg_gc_ut) \ - SymI_HasProto(stg_gen_yield) \ - SymI_HasProto(stg_yield_noregs) \ - SymI_HasProto(stg_yield_to_interpreter) \ - SymI_HasProto(stg_gen_block) \ - SymI_HasProto(stg_block_noregs) \ - SymI_HasProto(stg_block_1) \ - SymI_HasProto(stg_block_takemvar) \ - SymI_HasProto(stg_block_putmvar) \ - MAIN_CAP_SYM \ - SymI_HasProto(MallocFailHook) \ - SymI_HasProto(OnExitHook) \ - SymI_HasProto(OutOfHeapHook) \ - SymI_HasProto(StackOverflowHook) \ - SymI_HasProto(addDLL) \ - SymI_HasProto(__int_encodeDouble) \ - SymI_HasProto(__word_encodeDouble) \ - SymI_HasProto(__2Int_encodeDouble) \ - SymI_HasProto(__int_encodeFloat) \ - SymI_HasProto(__word_encodeFloat) \ - SymI_HasProto(stg_atomicallyzh) \ - SymI_HasProto(barf) \ - SymI_HasProto(debugBelch) \ - SymI_HasProto(errorBelch) \ - SymI_HasProto(sysErrorBelch) \ - SymI_HasProto(stg_getMaskingStatezh) \ - SymI_HasProto(stg_maskAsyncExceptionszh) \ - SymI_HasProto(stg_maskUninterruptiblezh) \ - SymI_HasProto(stg_catchzh) \ - SymI_HasProto(stg_catchRetryzh) \ - SymI_HasProto(stg_catchSTMzh) \ - SymI_HasProto(stg_checkzh) \ - SymI_HasProto(closure_flags) \ - SymI_HasProto(cmp_thread) \ - SymI_HasProto(createAdjustor) \ - SymI_HasProto(stg_decodeDoublezu2Intzh) \ - SymI_HasProto(stg_decodeFloatzuIntzh) \ - SymI_HasProto(defaultsHook) \ - SymI_HasProto(stg_delayzh) \ - SymI_HasProto(stg_deRefWeakzh) \ - SymI_HasProto(stg_deRefStablePtrzh) \ - SymI_HasProto(dirty_MUT_VAR) \ - SymI_HasProto(stg_forkzh) \ - SymI_HasProto(stg_forkOnzh) \ - SymI_HasProto(forkProcess) \ - SymI_HasProto(forkOS_createThread) \ - SymI_HasProto(freeHaskellFunctionPtr) \ +#define RTS_SYMBOLS \ + Maybe_Stable_Names \ + RTS_TICKY_SYMBOLS \ + SymI_HasProto(StgReturn) \ + SymI_HasProto(stg_gc_noregs) \ + SymI_HasProto(stg_ret_v_info) \ + SymI_HasProto(stg_ret_p_info) \ + SymI_HasProto(stg_ret_n_info) \ + SymI_HasProto(stg_ret_f_info) \ + SymI_HasProto(stg_ret_d_info) \ + SymI_HasProto(stg_ret_l_info) \ + SymI_HasProto(stg_gc_prim_p) \ + SymI_HasProto(stg_gc_prim_pp) \ + SymI_HasProto(stg_gc_prim_n) \ + SymI_HasProto(stg_enter_info) \ + SymI_HasProto(__stg_gc_enter_1) \ + SymI_HasProto(stg_gc_unpt_r1) \ + SymI_HasProto(stg_gc_unbx_r1) \ + SymI_HasProto(stg_gc_f1) \ + SymI_HasProto(stg_gc_d1) \ + SymI_HasProto(stg_gc_l1) \ + SymI_HasProto(stg_gc_pp) \ + SymI_HasProto(stg_gc_ppp) \ + SymI_HasProto(stg_gc_pppp) \ + SymI_HasProto(__stg_gc_fun) \ + SymI_HasProto(stg_gc_fun_info) \ + SymI_HasProto(stg_yield_noregs) \ + SymI_HasProto(stg_yield_to_interpreter) \ + SymI_HasProto(stg_block_noregs) \ + SymI_HasProto(stg_block_takemvar) \ + SymI_HasProto(stg_block_putmvar) \ + MAIN_CAP_SYM \ + SymI_HasProto(MallocFailHook) \ + SymI_HasProto(OnExitHook) \ + SymI_HasProto(OutOfHeapHook) \ + SymI_HasProto(StackOverflowHook) \ + SymI_HasProto(addDLL) \ + SymI_HasProto(__int_encodeDouble) \ + SymI_HasProto(__word_encodeDouble) \ + SymI_HasProto(__2Int_encodeDouble) \ + SymI_HasProto(__int_encodeFloat) \ + SymI_HasProto(__word_encodeFloat) \ + SymI_HasProto(stg_atomicallyzh) \ + SymI_HasProto(barf) \ + SymI_HasProto(debugBelch) \ + SymI_HasProto(errorBelch) \ + SymI_HasProto(sysErrorBelch) \ + SymI_HasProto(stg_getMaskingStatezh) \ + SymI_HasProto(stg_maskAsyncExceptionszh) \ + SymI_HasProto(stg_maskUninterruptiblezh) \ + SymI_HasProto(stg_catchzh) \ + SymI_HasProto(stg_catchRetryzh) \ + SymI_HasProto(stg_catchSTMzh) \ + SymI_HasProto(stg_checkzh) \ + SymI_HasProto(closure_flags) \ + SymI_HasProto(cmp_thread) \ + SymI_HasProto(createAdjustor) \ + SymI_HasProto(stg_decodeDoublezu2Intzh) \ + SymI_HasProto(stg_decodeFloatzuIntzh) \ + SymI_HasProto(defaultsHook) \ + SymI_HasProto(stg_delayzh) \ + SymI_HasProto(stg_deRefWeakzh) \ + SymI_HasProto(stg_deRefStablePtrzh) \ + SymI_HasProto(dirty_MUT_VAR) \ + SymI_HasProto(stg_forkzh) \ + SymI_HasProto(stg_forkOnzh) \ + SymI_HasProto(forkProcess) \ + SymI_HasProto(forkOS_createThread) \ + SymI_HasProto(freeHaskellFunctionPtr) \ SymI_HasProto(getOrSetGHCConcSignalSignalHandlerStore) \ SymI_HasProto(getOrSetGHCConcWindowsPendingDelaysStore) \ SymI_HasProto(getOrSetGHCConcWindowsIOManagerThreadStore) \ SymI_HasProto(getOrSetGHCConcWindowsProddingStore) \ SymI_HasProto(getOrSetSystemEventThreadEventManagerStore) \ SymI_HasProto(getOrSetSystemEventThreadIOManagerThreadStore) \ - SymI_HasProto(getGCStats) \ - SymI_HasProto(getGCStatsEnabled) \ - SymI_HasProto(genSymZh) \ - SymI_HasProto(genericRaise) \ - SymI_HasProto(getProgArgv) \ - SymI_HasProto(getFullProgArgv) \ - SymI_HasProto(getStablePtr) \ - SymI_HasProto(hs_init) \ - SymI_HasProto(hs_exit) \ - SymI_HasProto(hs_set_argv) \ - SymI_HasProto(hs_add_root) \ - SymI_HasProto(hs_perform_gc) \ - SymI_HasProto(hs_free_stable_ptr) \ - SymI_HasProto(hs_free_fun_ptr) \ - SymI_HasProto(hs_hpc_rootModule) \ - SymI_HasProto(hs_hpc_module) \ - SymI_HasProto(initLinker) \ - SymI_HasProto(stg_unpackClosurezh) \ - SymI_HasProto(stg_getApStackValzh) \ - SymI_HasProto(stg_getSparkzh) \ - SymI_HasProto(stg_numSparkszh) \ - SymI_HasProto(stg_isCurrentThreadBoundzh) \ - SymI_HasProto(stg_isEmptyMVarzh) \ - SymI_HasProto(stg_killThreadzh) \ - SymI_HasProto(loadArchive) \ - SymI_HasProto(loadObj) \ - SymI_HasProto(insertStableSymbol) \ - SymI_HasProto(insertSymbol) \ - SymI_HasProto(lookupSymbol) \ - SymI_HasProto(stg_makeStablePtrzh) \ - SymI_HasProto(stg_mkApUpd0zh) \ - SymI_HasProto(stg_myThreadIdzh) \ - SymI_HasProto(stg_labelThreadzh) \ - SymI_HasProto(stg_newArrayzh) \ - SymI_HasProto(stg_newArrayArrayzh) \ - SymI_HasProto(stg_newBCOzh) \ - SymI_HasProto(stg_newByteArrayzh) \ - SymI_HasProto_redirect(newCAF, newDynCAF) \ - SymI_HasProto(stg_newMVarzh) \ - SymI_HasProto(stg_newMutVarzh) \ - SymI_HasProto(stg_newTVarzh) \ - SymI_HasProto(stg_noDuplicatezh) \ - SymI_HasProto(stg_atomicModifyMutVarzh) \ - SymI_HasProto(stg_casMutVarzh) \ - SymI_HasProto(stg_newPinnedByteArrayzh) \ - SymI_HasProto(stg_newAlignedPinnedByteArrayzh) \ - SymI_HasProto(newSpark) \ - SymI_HasProto(performGC) \ - SymI_HasProto(performMajorGC) \ - SymI_HasProto(prog_argc) \ - SymI_HasProto(prog_argv) \ - SymI_HasProto(stg_putMVarzh) \ - SymI_HasProto(stg_raisezh) \ - SymI_HasProto(stg_raiseIOzh) \ - SymI_HasProto(stg_readTVarzh) \ - SymI_HasProto(stg_readTVarIOzh) \ - SymI_HasProto(resumeThread) \ - SymI_HasProto(setNumCapabilities) \ - SymI_HasProto(getNumberOfProcessors) \ - SymI_HasProto(resolveObjs) \ - SymI_HasProto(stg_retryzh) \ - SymI_HasProto(rts_apply) \ - SymI_HasProto(rts_checkSchedStatus) \ - SymI_HasProto(rts_eval) \ - SymI_HasProto(rts_evalIO) \ - SymI_HasProto(rts_evalLazyIO) \ - SymI_HasProto(rts_evalStableIO) \ - SymI_HasProto(rts_eval_) \ - SymI_HasProto(rts_getBool) \ - SymI_HasProto(rts_getChar) \ - SymI_HasProto(rts_getDouble) \ - SymI_HasProto(rts_getFloat) \ - SymI_HasProto(rts_getInt) \ - SymI_HasProto(rts_getInt8) \ - SymI_HasProto(rts_getInt16) \ - SymI_HasProto(rts_getInt32) \ - SymI_HasProto(rts_getInt64) \ - SymI_HasProto(rts_getPtr) \ - SymI_HasProto(rts_getFunPtr) \ - SymI_HasProto(rts_getStablePtr) \ - SymI_HasProto(rts_getThreadId) \ - SymI_HasProto(rts_getWord) \ - SymI_HasProto(rts_getWord8) \ - SymI_HasProto(rts_getWord16) \ - SymI_HasProto(rts_getWord32) \ - SymI_HasProto(rts_getWord64) \ - SymI_HasProto(rts_lock) \ - SymI_HasProto(rts_mkBool) \ - SymI_HasProto(rts_mkChar) \ - SymI_HasProto(rts_mkDouble) \ - SymI_HasProto(rts_mkFloat) \ - SymI_HasProto(rts_mkInt) \ - SymI_HasProto(rts_mkInt8) \ - SymI_HasProto(rts_mkInt16) \ - SymI_HasProto(rts_mkInt32) \ - SymI_HasProto(rts_mkInt64) \ - SymI_HasProto(rts_mkPtr) \ - SymI_HasProto(rts_mkFunPtr) \ - SymI_HasProto(rts_mkStablePtr) \ - SymI_HasProto(rts_mkString) \ - SymI_HasProto(rts_mkWord) \ - SymI_HasProto(rts_mkWord8) \ - SymI_HasProto(rts_mkWord16) \ - SymI_HasProto(rts_mkWord32) \ - SymI_HasProto(rts_mkWord64) \ - SymI_HasProto(rts_unlock) \ - SymI_HasProto(rts_unsafeGetMyCapability) \ - SymI_HasProto(rtsSupportsBoundThreads) \ - SymI_HasProto(rts_isProfiled) \ - SymI_HasProto(setProgArgv) \ - SymI_HasProto(startupHaskell) \ - SymI_HasProto(shutdownHaskell) \ - SymI_HasProto(shutdownHaskellAndExit) \ - SymI_HasProto(stable_ptr_table) \ - SymI_HasProto(stackOverflow) \ - SymI_HasProto(stg_CAF_BLACKHOLE_info) \ - SymI_HasProto(stg_BLACKHOLE_info) \ - SymI_HasProto(__stg_EAGER_BLACKHOLE_info) \ - SymI_HasProto(stg_BLOCKING_QUEUE_CLEAN_info) \ - SymI_HasProto(stg_BLOCKING_QUEUE_DIRTY_info) \ - SymI_HasProto(startTimer) \ - SymI_HasProto(stg_MVAR_CLEAN_info) \ - SymI_HasProto(stg_MVAR_DIRTY_info) \ - SymI_HasProto(stg_IND_STATIC_info) \ - SymI_HasProto(stg_ARR_WORDS_info) \ - SymI_HasProto(stg_MUT_ARR_PTRS_DIRTY_info) \ - SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN_info) \ - SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN0_info) \ - SymI_HasProto(stg_WEAK_info) \ - SymI_HasProto(stg_ap_v_info) \ - SymI_HasProto(stg_ap_f_info) \ - SymI_HasProto(stg_ap_d_info) \ - SymI_HasProto(stg_ap_l_info) \ - SymI_HasProto(stg_ap_n_info) \ - SymI_HasProto(stg_ap_p_info) \ - SymI_HasProto(stg_ap_pv_info) \ - SymI_HasProto(stg_ap_pp_info) \ - SymI_HasProto(stg_ap_ppv_info) \ - SymI_HasProto(stg_ap_ppp_info) \ - SymI_HasProto(stg_ap_pppv_info) \ - SymI_HasProto(stg_ap_pppp_info) \ - SymI_HasProto(stg_ap_ppppp_info) \ - SymI_HasProto(stg_ap_pppppp_info) \ - SymI_HasProto(stg_ap_0_fast) \ - SymI_HasProto(stg_ap_v_fast) \ - SymI_HasProto(stg_ap_f_fast) \ - SymI_HasProto(stg_ap_d_fast) \ - SymI_HasProto(stg_ap_l_fast) \ - SymI_HasProto(stg_ap_n_fast) \ - SymI_HasProto(stg_ap_p_fast) \ - SymI_HasProto(stg_ap_pv_fast) \ - SymI_HasProto(stg_ap_pp_fast) \ - SymI_HasProto(stg_ap_ppv_fast) \ - SymI_HasProto(stg_ap_ppp_fast) \ - SymI_HasProto(stg_ap_pppv_fast) \ - SymI_HasProto(stg_ap_pppp_fast) \ - SymI_HasProto(stg_ap_ppppp_fast) \ - SymI_HasProto(stg_ap_pppppp_fast) \ - SymI_HasProto(stg_ap_1_upd_info) \ - SymI_HasProto(stg_ap_2_upd_info) \ - SymI_HasProto(stg_ap_3_upd_info) \ - SymI_HasProto(stg_ap_4_upd_info) \ - SymI_HasProto(stg_ap_5_upd_info) \ - SymI_HasProto(stg_ap_6_upd_info) \ - SymI_HasProto(stg_ap_7_upd_info) \ - SymI_HasProto(stg_exit) \ - SymI_HasProto(stg_sel_0_upd_info) \ - SymI_HasProto(stg_sel_10_upd_info) \ - SymI_HasProto(stg_sel_11_upd_info) \ - SymI_HasProto(stg_sel_12_upd_info) \ - SymI_HasProto(stg_sel_13_upd_info) \ - SymI_HasProto(stg_sel_14_upd_info) \ - SymI_HasProto(stg_sel_15_upd_info) \ - SymI_HasProto(stg_sel_1_upd_info) \ - SymI_HasProto(stg_sel_2_upd_info) \ - SymI_HasProto(stg_sel_3_upd_info) \ - SymI_HasProto(stg_sel_4_upd_info) \ - SymI_HasProto(stg_sel_5_upd_info) \ - SymI_HasProto(stg_sel_6_upd_info) \ - SymI_HasProto(stg_sel_7_upd_info) \ - SymI_HasProto(stg_sel_8_upd_info) \ - SymI_HasProto(stg_sel_9_upd_info) \ - SymI_HasProto(stg_upd_frame_info) \ - SymI_HasProto(stg_bh_upd_frame_info) \ - SymI_HasProto(suspendThread) \ - SymI_HasProto(stg_takeMVarzh) \ - SymI_HasProto(stg_threadStatuszh) \ - SymI_HasProto(stg_tryPutMVarzh) \ - SymI_HasProto(stg_tryTakeMVarzh) \ - SymI_HasProto(stg_unmaskAsyncExceptionszh) \ - SymI_HasProto(unloadObj) \ - SymI_HasProto(stg_unsafeThawArrayzh) \ - SymI_HasProto(stg_waitReadzh) \ - SymI_HasProto(stg_waitWritezh) \ - SymI_HasProto(stg_writeTVarzh) \ - SymI_HasProto(stg_yieldzh) \ - SymI_NeedsProto(stg_interp_constr_entry) \ - SymI_HasProto(stg_arg_bitmaps) \ - SymI_HasProto(large_alloc_lim) \ - SymI_HasProto(g0) \ - SymI_HasProto(allocate) \ - SymI_HasProto(allocateExec) \ - SymI_HasProto(freeExec) \ - SymI_HasProto(getAllocations) \ - SymI_HasProto(revertCAFs) \ - SymI_HasProto(RtsFlags) \ - SymI_NeedsProto(rts_breakpoint_io_action) \ - SymI_NeedsProto(rts_stop_next_breakpoint) \ - SymI_NeedsProto(rts_stop_on_exception) \ - SymI_HasProto(stopTimer) \ - SymI_HasProto(n_capabilities) \ - SymI_HasProto(stg_traceCcszh) \ - SymI_HasProto(stg_traceEventzh) \ - SymI_HasProto(getMonotonicNSec) \ - SymI_HasProto(lockFile) \ - SymI_HasProto(unlockFile) \ - SymI_HasProto(startProfTimer) \ - SymI_HasProto(stopProfTimer) \ - RTS_USER_SIGNALS_SYMBOLS \ + SymI_HasProto(getGCStats) \ + SymI_HasProto(getGCStatsEnabled) \ + SymI_HasProto(genSymZh) \ + SymI_HasProto(genericRaise) \ + SymI_HasProto(getProgArgv) \ + SymI_HasProto(getFullProgArgv) \ + SymI_HasProto(getStablePtr) \ + SymI_HasProto(hs_init) \ + SymI_HasProto(hs_exit) \ + SymI_HasProto(hs_set_argv) \ + SymI_HasProto(hs_add_root) \ + SymI_HasProto(hs_perform_gc) \ + SymI_HasProto(hs_free_stable_ptr) \ + SymI_HasProto(hs_free_fun_ptr) \ + SymI_HasProto(hs_hpc_rootModule) \ + SymI_HasProto(hs_hpc_module) \ + SymI_HasProto(initLinker) \ + SymI_HasProto(stg_unpackClosurezh) \ + SymI_HasProto(stg_getApStackValzh) \ + SymI_HasProto(stg_getSparkzh) \ + SymI_HasProto(stg_numSparkszh) \ + SymI_HasProto(stg_isCurrentThreadBoundzh) \ + SymI_HasProto(stg_isEmptyMVarzh) \ + SymI_HasProto(stg_killThreadzh) \ + SymI_HasProto(loadArchive) \ + SymI_HasProto(loadObj) \ + SymI_HasProto(insertStableSymbol) \ + SymI_HasProto(insertSymbol) \ + SymI_HasProto(lookupSymbol) \ + SymI_HasProto(stg_makeStablePtrzh) \ + SymI_HasProto(stg_mkApUpd0zh) \ + SymI_HasProto(stg_myThreadIdzh) \ + SymI_HasProto(stg_labelThreadzh) \ + SymI_HasProto(stg_newArrayzh) \ + SymI_HasProto(stg_newArrayArrayzh) \ + SymI_HasProto(stg_newBCOzh) \ + SymI_HasProto(stg_newByteArrayzh) \ + SymI_HasProto_redirect(newCAF, newDynCAF) \ + SymI_HasProto(stg_newMVarzh) \ + SymI_HasProto(stg_newMutVarzh) \ + SymI_HasProto(stg_newTVarzh) \ + SymI_HasProto(stg_noDuplicatezh) \ + SymI_HasProto(stg_atomicModifyMutVarzh) \ + SymI_HasProto(stg_casMutVarzh) \ + SymI_HasProto(stg_newPinnedByteArrayzh) \ + SymI_HasProto(stg_newAlignedPinnedByteArrayzh) \ + SymI_HasProto(newSpark) \ + SymI_HasProto(performGC) \ + SymI_HasProto(performMajorGC) \ + SymI_HasProto(prog_argc) \ + SymI_HasProto(prog_argv) \ + SymI_HasProto(stg_putMVarzh) \ + SymI_HasProto(stg_raisezh) \ + SymI_HasProto(stg_raiseIOzh) \ + SymI_HasProto(stg_readTVarzh) \ + SymI_HasProto(stg_readTVarIOzh) \ + SymI_HasProto(resumeThread) \ + SymI_HasProto(setNumCapabilities) \ + SymI_HasProto(getNumberOfProcessors) \ + SymI_HasProto(resolveObjs) \ + SymI_HasProto(stg_retryzh) \ + SymI_HasProto(rts_apply) \ + SymI_HasProto(rts_checkSchedStatus) \ + SymI_HasProto(rts_eval) \ + SymI_HasProto(rts_evalIO) \ + SymI_HasProto(rts_evalLazyIO) \ + SymI_HasProto(rts_evalStableIO) \ + SymI_HasProto(rts_eval_) \ + SymI_HasProto(rts_getBool) \ + SymI_HasProto(rts_getChar) \ + SymI_HasProto(rts_getDouble) \ + SymI_HasProto(rts_getFloat) \ + SymI_HasProto(rts_getInt) \ + SymI_HasProto(rts_getInt8) \ + SymI_HasProto(rts_getInt16) \ + SymI_HasProto(rts_getInt32) \ + SymI_HasProto(rts_getInt64) \ + SymI_HasProto(rts_getPtr) \ + SymI_HasProto(rts_getFunPtr) \ + SymI_HasProto(rts_getStablePtr) \ + SymI_HasProto(rts_getThreadId) \ + SymI_HasProto(rts_getWord) \ + SymI_HasProto(rts_getWord8) \ + SymI_HasProto(rts_getWord16) \ + SymI_HasProto(rts_getWord32) \ + SymI_HasProto(rts_getWord64) \ + SymI_HasProto(rts_lock) \ + SymI_HasProto(rts_mkBool) \ + SymI_HasProto(rts_mkChar) \ + SymI_HasProto(rts_mkDouble) \ + SymI_HasProto(rts_mkFloat) \ + SymI_HasProto(rts_mkInt) \ + SymI_HasProto(rts_mkInt8) \ + SymI_HasProto(rts_mkInt16) \ + SymI_HasProto(rts_mkInt32) \ + SymI_HasProto(rts_mkInt64) \ + SymI_HasProto(rts_mkPtr) \ + SymI_HasProto(rts_mkFunPtr) \ + SymI_HasProto(rts_mkStablePtr) \ + SymI_HasProto(rts_mkString) \ + SymI_HasProto(rts_mkWord) \ + SymI_HasProto(rts_mkWord8) \ + SymI_HasProto(rts_mkWord16) \ + SymI_HasProto(rts_mkWord32) \ + SymI_HasProto(rts_mkWord64) \ + SymI_HasProto(rts_unlock) \ + SymI_HasProto(rts_unsafeGetMyCapability) \ + SymI_HasProto(rtsSupportsBoundThreads) \ + SymI_HasProto(rts_isProfiled) \ + SymI_HasProto(setProgArgv) \ + SymI_HasProto(startupHaskell) \ + SymI_HasProto(shutdownHaskell) \ + SymI_HasProto(shutdownHaskellAndExit) \ + SymI_HasProto(stable_ptr_table) \ + SymI_HasProto(stackOverflow) \ + SymI_HasProto(stg_CAF_BLACKHOLE_info) \ + SymI_HasProto(stg_BLACKHOLE_info) \ + SymI_HasProto(__stg_EAGER_BLACKHOLE_info) \ + SymI_HasProto(stg_BLOCKING_QUEUE_CLEAN_info) \ + SymI_HasProto(stg_BLOCKING_QUEUE_DIRTY_info) \ + SymI_HasProto(startTimer) \ + SymI_HasProto(stg_MVAR_CLEAN_info) \ + SymI_HasProto(stg_MVAR_DIRTY_info) \ + SymI_HasProto(stg_IND_STATIC_info) \ + SymI_HasProto(stg_ARR_WORDS_info) \ + SymI_HasProto(stg_MUT_ARR_PTRS_DIRTY_info) \ + SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN_info) \ + SymI_HasProto(stg_MUT_ARR_PTRS_FROZEN0_info) \ + SymI_HasProto(stg_WEAK_info) \ + SymI_HasProto(stg_ap_v_info) \ + SymI_HasProto(stg_ap_f_info) \ + SymI_HasProto(stg_ap_d_info) \ + SymI_HasProto(stg_ap_l_info) \ + SymI_HasProto(stg_ap_n_info) \ + SymI_HasProto(stg_ap_p_info) \ + SymI_HasProto(stg_ap_pv_info) \ + SymI_HasProto(stg_ap_pp_info) \ + SymI_HasProto(stg_ap_ppv_info) \ + SymI_HasProto(stg_ap_ppp_info) \ + SymI_HasProto(stg_ap_pppv_info) \ + SymI_HasProto(stg_ap_pppp_info) \ + SymI_HasProto(stg_ap_ppppp_info) \ + SymI_HasProto(stg_ap_pppppp_info) \ + SymI_HasProto(stg_ap_0_fast) \ + SymI_HasProto(stg_ap_v_fast) \ + SymI_HasProto(stg_ap_f_fast) \ + SymI_HasProto(stg_ap_d_fast) \ + SymI_HasProto(stg_ap_l_fast) \ + SymI_HasProto(stg_ap_n_fast) \ + SymI_HasProto(stg_ap_p_fast) \ + SymI_HasProto(stg_ap_pv_fast) \ + SymI_HasProto(stg_ap_pp_fast) \ + SymI_HasProto(stg_ap_ppv_fast) \ + SymI_HasProto(stg_ap_ppp_fast) \ + SymI_HasProto(stg_ap_pppv_fast) \ + SymI_HasProto(stg_ap_pppp_fast) \ + SymI_HasProto(stg_ap_ppppp_fast) \ + SymI_HasProto(stg_ap_pppppp_fast) \ + SymI_HasProto(stg_ap_1_upd_info) \ + SymI_HasProto(stg_ap_2_upd_info) \ + SymI_HasProto(stg_ap_3_upd_info) \ + SymI_HasProto(stg_ap_4_upd_info) \ + SymI_HasProto(stg_ap_5_upd_info) \ + SymI_HasProto(stg_ap_6_upd_info) \ + SymI_HasProto(stg_ap_7_upd_info) \ + SymI_HasProto(stg_exit) \ + SymI_HasProto(stg_sel_0_upd_info) \ + SymI_HasProto(stg_sel_10_upd_info) \ + SymI_HasProto(stg_sel_11_upd_info) \ + SymI_HasProto(stg_sel_12_upd_info) \ + SymI_HasProto(stg_sel_13_upd_info) \ + SymI_HasProto(stg_sel_14_upd_info) \ + SymI_HasProto(stg_sel_15_upd_info) \ + SymI_HasProto(stg_sel_1_upd_info) \ + SymI_HasProto(stg_sel_2_upd_info) \ + SymI_HasProto(stg_sel_3_upd_info) \ + SymI_HasProto(stg_sel_4_upd_info) \ + SymI_HasProto(stg_sel_5_upd_info) \ + SymI_HasProto(stg_sel_6_upd_info) \ + SymI_HasProto(stg_sel_7_upd_info) \ + SymI_HasProto(stg_sel_8_upd_info) \ + SymI_HasProto(stg_sel_9_upd_info) \ + SymI_HasProto(stg_upd_frame_info) \ + SymI_HasProto(stg_bh_upd_frame_info) \ + SymI_HasProto(suspendThread) \ + SymI_HasProto(stg_takeMVarzh) \ + SymI_HasProto(stg_threadStatuszh) \ + SymI_HasProto(stg_tryPutMVarzh) \ + SymI_HasProto(stg_tryTakeMVarzh) \ + SymI_HasProto(stg_unmaskAsyncExceptionszh) \ + SymI_HasProto(unloadObj) \ + SymI_HasProto(stg_unsafeThawArrayzh) \ + SymI_HasProto(stg_waitReadzh) \ + SymI_HasProto(stg_waitWritezh) \ + SymI_HasProto(stg_writeTVarzh) \ + SymI_HasProto(stg_yieldzh) \ + SymI_NeedsProto(stg_interp_constr_entry) \ + SymI_HasProto(stg_arg_bitmaps) \ + SymI_HasProto(large_alloc_lim) \ + SymI_HasProto(g0) \ + SymI_HasProto(allocate) \ + SymI_HasProto(allocateExec) \ + SymI_HasProto(freeExec) \ + SymI_HasProto(getAllocations) \ + SymI_HasProto(revertCAFs) \ + SymI_HasProto(RtsFlags) \ + SymI_NeedsProto(rts_breakpoint_io_action) \ + SymI_NeedsProto(rts_stop_next_breakpoint) \ + SymI_NeedsProto(rts_stop_on_exception) \ + SymI_HasProto(stopTimer) \ + SymI_HasProto(n_capabilities) \ + SymI_HasProto(stg_traceCcszh) \ + SymI_HasProto(stg_traceEventzh) \ + SymI_HasProto(getMonotonicNSec) \ + SymI_HasProto(lockFile) \ + SymI_HasProto(unlockFile) \ + SymI_HasProto(startProfTimer) \ + SymI_HasProto(stopProfTimer) \ + RTS_USER_SIGNALS_SYMBOLS \ RTS_INTCHAR_SYMBOLS diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 9cedabdca8..1a531b2149 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------------- * - * (c) The GHC Team, 1998-2011 + * (c) The GHC Team, 1998-2012 * * Out-of-line primitive operations * @@ -10,14 +10,9 @@ * this file contains code for most of those with the attribute * out_of_line=True. * - * Entry convention: the entry convention for a primop is that all the - * args are in Stg registers (R1, R2, etc.). This is to make writing - * the primops easier. (see compiler/codeGen/CgCallConv.hs). - * - * Return convention: results from a primop are generally returned - * using the ordinary unboxed tuple return convention. The C-- parser - * implements the RET_xxxx() macros to perform unboxed-tuple returns - * based on the prevailing return convention. + * Entry convention: the entry convention for a primop is the + * NativeNodeCall convention, and the return convention is + * NativeReturn. (see compiler/cmm/CmmCallConv.hs) * * This file is written in a subset of C--, extended with various * features specific to GHC. It is compiled by GHC directly. For the @@ -44,8 +39,6 @@ import sm_mutex; Basically just new*Array - the others are all inline macros. - The size arg is always passed in R1, and the result returned in R1. - The slow entry point is for returning from a heap check, the saved size argument must be re-loaded from the stack. -------------------------------------------------------------------------- */ @@ -54,29 +47,32 @@ import sm_mutex; * round up to the nearest word for the size of the array. */ -stg_newByteArrayzh +stg_newByteArrayzh ( W_ n ) { - W_ words, payload_words, n, p; - MAYBE_GC(NO_PTRS,stg_newByteArrayzh); - n = R1; + W_ words, payload_words; + gcptr p; + + MAYBE_GC_N(stg_newByteArrayzh, n); + payload_words = ROUNDUP_BYTES_TO_WDS(n); words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocate(MyCapability() "ptr",words) []; + ("ptr" p) = ccall allocate(MyCapability() "ptr",words); TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrWords_bytes(p) = n; - RET_P(p); + return (p); } #define BA_ALIGN 16 #define BA_MASK (BA_ALIGN-1) -stg_newPinnedByteArrayzh +stg_newPinnedByteArrayzh ( W_ n ) { - W_ words, n, bytes, payload_words, p; + W_ words, bytes, payload_words; + gcptr p; + + MAYBE_GC_N(stg_newPinnedByteArrayzh, n); - MAYBE_GC(NO_PTRS,stg_newPinnedByteArrayzh); - n = R1; bytes = n; /* payload_words is what we will tell the profiler we had to allocate */ payload_words = ROUNDUP_BYTES_TO_WDS(bytes); @@ -89,7 +85,7 @@ stg_newPinnedByteArrayzh /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; + ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words); TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned @@ -98,16 +94,15 @@ stg_newPinnedByteArrayzh SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrWords_bytes(p) = n; - RET_P(p); + return (p); } -stg_newAlignedPinnedByteArrayzh +stg_newAlignedPinnedByteArrayzh ( W_ n, W_ alignment ) { - W_ words, n, bytes, payload_words, p, alignment; + W_ words, bytes, payload_words; + gcptr p; - MAYBE_GC(NO_PTRS,stg_newAlignedPinnedByteArrayzh); - n = R1; - alignment = R2; + again: MAYBE_GC(again); /* we always supply at least word-aligned memory, so there's no need to allow extra space for alignment if the requirement is less @@ -128,7 +123,7 @@ stg_newAlignedPinnedByteArrayzh /* Now we convert to a number of words: */ words = ROUNDUP_BYTES_TO_WDS(bytes); - ("ptr" p) = foreign "C" allocatePinned(MyCapability() "ptr", words) []; + ("ptr" p) = ccall allocatePinned(MyCapability() "ptr", words); TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); /* Now we need to move p forward so that the payload is aligned @@ -138,23 +133,22 @@ stg_newAlignedPinnedByteArrayzh SET_HDR(p, stg_ARR_WORDS_info, CCCS); StgArrWords_bytes(p) = n; - RET_P(p); + return (p); } -stg_newArrayzh +stg_newArrayzh ( W_ n /* words */, gcptr init ) { - W_ words, n, init, arr, p, size; - /* Args: R1 = words, R2 = initialisation value */ + W_ words, size; + gcptr p, arr; - n = R1; - MAYBE_GC(R2_PTR,stg_newArrayzh); + again: MAYBE_GC(again); // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words // in the array, making sure we round up, and then rounding up to a whole // number of words. size = n + mutArrPtrsCardWords(n); words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; - ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) [R2]; + ("ptr" arr) = ccall allocate(MyCapability() "ptr",words); TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); @@ -162,7 +156,6 @@ stg_newArrayzh StgMutArrPtrs_size(arr) = size; // Initialise all elements of the the array with the value in R2 - init = R2; p = arr + SIZEOF_StgMutArrPtrs; for: if (p < arr + WDS(words)) { @@ -178,10 +171,10 @@ stg_newArrayzh goto for2; } - RET_P(arr); + return (arr); } -stg_unsafeThawArrayzh +stg_unsafeThawArrayzh ( gcptr arr ) { // SUBTLETY TO DO WITH THE OLD GEN MUTABLE LIST // @@ -201,31 +194,30 @@ stg_unsafeThawArrayzh // we put it on the mutable list more than once, but it would get scavenged // multiple times during GC, which would be unnecessarily slow. // - if (StgHeader_info(R1) != stg_MUT_ARR_PTRS_FROZEN0_info) { - SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info); - recordMutable(R1, R1); + if (StgHeader_info(arr) != stg_MUT_ARR_PTRS_FROZEN0_info) { + SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info); + recordMutable(arr); // must be done after SET_INFO, because it ASSERTs closure_MUTABLE() - RET_P(R1); + return (arr); } else { - SET_INFO(R1,stg_MUT_ARR_PTRS_DIRTY_info); - RET_P(R1); + SET_INFO(arr,stg_MUT_ARR_PTRS_DIRTY_info); + return (arr); } } -stg_newArrayArrayzh +stg_newArrayArrayzh ( W_ n /* words */ ) { - W_ words, n, arr, p, size; - /* Args: R1 = words */ + W_ words, size; + gcptr p, arr; - n = R1; - MAYBE_GC(NO_PTRS,stg_newArrayArrayzh); + MAYBE_GC_N(stg_newArrayArrayzh, n); // the mark area contains one byte for each 2^MUT_ARR_PTRS_CARD_BITS words // in the array, making sure we round up, and then rounding up to a whole // number of words. size = n + mutArrPtrsCardWords(n); words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; - ("ptr" arr) = foreign "C" allocate(MyCapability() "ptr",words) []; + ("ptr" arr) = ccall allocate(MyCapability() "ptr",words); TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(n), 0); SET_HDR(arr, stg_MUT_ARR_PTRS_DIRTY_info, W_[CCCS]); @@ -248,7 +240,7 @@ stg_newArrayArrayzh goto for2; } - RET_P(arr); + return (arr); } @@ -256,46 +248,39 @@ stg_newArrayArrayzh MutVar primitives -------------------------------------------------------------------------- */ -stg_newMutVarzh +stg_newMutVarzh ( gcptr init ) { W_ mv; - /* Args: R1 = initialisation value */ - ALLOC_PRIM( SIZEOF_StgMutVar, R1_PTR, stg_newMutVarzh); + ALLOC_PRIM (SIZEOF_StgMutVar); mv = Hp - SIZEOF_StgMutVar + WDS(1); SET_HDR(mv,stg_MUT_VAR_DIRTY_info,CCCS); - StgMutVar_var(mv) = R1; + StgMutVar_var(mv) = init; - RET_P(mv); + return (mv); } -stg_casMutVarzh +stg_casMutVarzh ( gcptr mv, gcptr old, gcptr new ) /* MutVar# s a -> a -> a -> State# s -> (# State#, Int#, a #) */ { - W_ mv, old, new, h; - - mv = R1; - old = R2; - new = R3; + gcptr h; - (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, - old, new) []; + (h) = ccall cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, + old, new); if (h != old) { - RET_NP(1,h); + return (1,h); } else { if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) { - foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") []; + ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr"); } - RET_NP(0,h); + return (0,h); } } - -stg_atomicModifyMutVarzh +stg_atomicModifyMutVarzh ( gcptr mv, gcptr f ) { - W_ mv, f, z, x, y, r, h; - /* Args: R1 :: MutVar#, R2 :: a -> (a,b) */ + W_ z, x, y, r, h; /* If x is the current contents of the MutVar#, then We want to make the new contents point to @@ -331,10 +316,7 @@ stg_atomicModifyMutVarzh #define SIZE (THUNK_2_SIZE + THUNK_1_SIZE + THUNK_1_SIZE) - HP_CHK_GEN_TICKY(SIZE, R1_PTR & R2_PTR, stg_atomicModifyMutVarzh); - - mv = R1; - f = R2; + HP_CHK_GEN_TICKY(SIZE); TICK_ALLOC_THUNK_2(); CCCS_ALLOC(THUNK_2_SIZE); @@ -361,17 +343,17 @@ stg_atomicModifyMutVarzh x = StgMutVar_var(mv); StgThunk_payload(z,1) = x; #ifdef THREADED_RTS - (h) = foreign "C" cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y) []; + (h) = ccall cas(mv + SIZEOF_StgHeader + OFFSET_StgMutVar_var, x, y); if (h != x) { goto retry; } #else StgMutVar_var(mv) = y; #endif if (GET_INFO(mv) == stg_MUT_VAR_CLEAN_info) { - foreign "C" dirty_MUT_VAR(BaseReg "ptr", mv "ptr") []; + ccall dirty_MUT_VAR(BaseReg "ptr", mv "ptr"); } - RET_P(r); + return (r); } /* ----------------------------------------------------------------------------- @@ -380,15 +362,13 @@ stg_atomicModifyMutVarzh STRING(stg_weak_msg,"New weak pointer at %p\n") -stg_mkWeakzh +stg_mkWeakzh ( gcptr key, + gcptr value, + gcptr finalizer /* or stg_NO_FINALIZER_closure */ ) { - /* R1 = key - R2 = value - R3 = finalizer (or stg_NO_FINALIZER_closure) - */ - W_ w; + gcptr w; - ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR & R3_PTR, stg_mkWeakzh ); + ALLOC_PRIM (SIZEOF_StgWeak) w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, CCCS); @@ -397,9 +377,9 @@ stg_mkWeakzh // Should StgWeak_cfinalizer(w) be stg_NO_FINALIZER_closure or // something else? - StgWeak_key(w) = R1; - StgWeak_value(w) = R2; - StgWeak_finalizer(w) = R3; + StgWeak_key(w) = key; + StgWeak_value(w) = value; + StgWeak_finalizer(w) = finalizer; StgWeak_cfinalizer(w) = stg_NO_FINALIZER_closure; ACQUIRE_LOCK(sm_mutex); @@ -407,49 +387,34 @@ stg_mkWeakzh W_[weak_ptr_list] = w; RELEASE_LOCK(sm_mutex); - IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); + IF_DEBUG(weak, ccall debugBelch(stg_weak_msg,w)); - RET_P(w); + return (w); } -stg_mkWeakNoFinalizzerzh +stg_mkWeakNoFinalizzerzh ( gcptr key, gcptr value ) { - /* R1 = key - R2 = value - */ - R3 = stg_NO_FINALIZER_closure; - - jump stg_mkWeakzh; + jump stg_mkWeakzh (key, value, stg_NO_FINALIZER_closure); } -stg_mkWeakForeignEnvzh +stg_mkWeakForeignEnvzh ( gcptr key, + gcptr val, + W_ fptr, // finalizer + W_ ptr, + W_ flag, // has environment (0 or 1) + W_ eptr ) { - /* R1 = key - R2 = value - R3 = finalizer - R4 = pointer - R5 = has environment (0 or 1) - R6 = environment - */ - W_ w, payload_words, words, p; - - W_ key, val, fptr, ptr, flag, eptr; - - key = R1; - val = R2; - fptr = R3; - ptr = R4; - flag = R5; - eptr = R6; - - ALLOC_PRIM( SIZEOF_StgWeak, R1_PTR & R2_PTR, stg_mkWeakForeignEnvzh ); + W_ payload_words, words; + gcptr w, p; + + ALLOC_PRIM (SIZEOF_StgWeak); w = Hp - SIZEOF_StgWeak + WDS(1); SET_HDR(w, stg_WEAK_info, CCCS); payload_words = 4; words = BYTES_TO_WDS(SIZEOF_StgArrWords) + payload_words; - ("ptr" p) = foreign "C" allocate(MyCapability() "ptr", words) []; + ("ptr" p) = ccall allocate(MyCapability() "ptr", words); TICK_ALLOC_PRIM(SIZEOF_StgArrWords,WDS(payload_words),0); SET_HDR(p, stg_ARR_WORDS_info, CCCS); @@ -473,22 +438,18 @@ stg_mkWeakForeignEnvzh W_[weak_ptr_list] = w; RELEASE_LOCK(sm_mutex); - IF_DEBUG(weak, foreign "C" debugBelch(stg_weak_msg,w) []); + IF_DEBUG(weak, ccall debugBelch(stg_weak_msg,w)); - RET_P(w); + return (w); } -stg_finalizzeWeakzh +stg_finalizzeWeakzh ( gcptr w ) { - /* R1 = weak ptr - */ - W_ w, f, arr; - - w = R1; + gcptr f, arr; // already dead? if (GET_INFO(w) == stg_DEAD_WEAK_info) { - RET_NP(0,stg_NO_FINALIZER_closure); + return (0,stg_NO_FINALIZER_closure); } // kill it @@ -516,26 +477,25 @@ stg_finalizzeWeakzh StgDeadWeak_link(w) = StgWeak_link(w); if (arr != stg_NO_FINALIZER_closure) { - foreign "C" runCFinalizer(StgArrWords_payload(arr,0), + ccall runCFinalizer(StgArrWords_payload(arr,0), StgArrWords_payload(arr,1), StgArrWords_payload(arr,2), - StgArrWords_payload(arr,3)) []; + StgArrWords_payload(arr,3)); } /* return the finalizer */ if (f == stg_NO_FINALIZER_closure) { - RET_NP(0,stg_NO_FINALIZER_closure); + return (0,stg_NO_FINALIZER_closure); } else { - RET_NP(1,f); + return (1,f); } } -stg_deRefWeakzh +stg_deRefWeakzh ( gcptr w ) { - /* R1 = weak ptr */ - W_ w, code, val; + W_ code; + gcptr val; - w = R1; if (GET_INFO(w) == stg_WEAK_info) { code = 1; val = StgWeak_value(w); @@ -543,171 +503,144 @@ stg_deRefWeakzh code = 0; val = w; } - RET_NP(code,val); + return (code,val); } /* ----------------------------------------------------------------------------- Floating point operations. -------------------------------------------------------------------------- */ -stg_decodeFloatzuIntzh +stg_decodeFloatzuIntzh ( F_ arg ) { W_ p; - F_ arg; W_ mp_tmp1; W_ mp_tmp_w; - STK_CHK_GEN( WDS(2), NO_PTRS, stg_decodeFloatzuIntzh ); + STK_CHK_GEN_N (WDS(2)); mp_tmp1 = Sp - WDS(1); mp_tmp_w = Sp - WDS(2); - /* arguments: F1 = Float# */ - arg = F1; - /* Perform the operation */ - foreign "C" __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg) []; + ccall __decodeFloat_Int(mp_tmp1 "ptr", mp_tmp_w "ptr", arg); /* returns: (Int# (mantissa), Int# (exponent)) */ - RET_NN(W_[mp_tmp1], W_[mp_tmp_w]); + return (W_[mp_tmp1], W_[mp_tmp_w]); } -stg_decodeDoublezu2Intzh +stg_decodeDoublezu2Intzh ( D_ arg ) { - D_ arg; W_ p; W_ mp_tmp1; W_ mp_tmp2; W_ mp_result1; W_ mp_result2; - STK_CHK_GEN( WDS(4), NO_PTRS, stg_decodeDoublezu2Intzh ); + STK_CHK_GEN_N (WDS(4)); mp_tmp1 = Sp - WDS(1); mp_tmp2 = Sp - WDS(2); mp_result1 = Sp - WDS(3); mp_result2 = Sp - WDS(4); - /* arguments: D1 = Double# */ - arg = D1; - /* Perform the operation */ - foreign "C" __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr", + ccall __decodeDouble_2Int(mp_tmp1 "ptr", mp_tmp2 "ptr", mp_result1 "ptr", mp_result2 "ptr", - arg) []; + arg); /* returns: (Int# (mant sign), Word# (mant high), Word# (mant low), Int# (expn)) */ - RET_NNNN(W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]); + return (W_[mp_tmp1], W_[mp_tmp2], W_[mp_result1], W_[mp_result2]); } /* ----------------------------------------------------------------------------- * Concurrency primitives * -------------------------------------------------------------------------- */ -stg_forkzh +stg_forkzh ( gcptr closure ) { - /* args: R1 = closure to spark */ - - MAYBE_GC(R1_PTR, stg_forkzh); + MAYBE_GC_P(stg_forkzh, closure); - W_ closure; - W_ threadid; - closure = R1; + gcptr threadid; - ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", + ("ptr" threadid) = ccall createIOThread( MyCapability() "ptr", RtsFlags_GcFlags_initialStkSize(RtsFlags), - closure "ptr") []; + closure "ptr"); /* start blocked if the current thread is blocked */ StgTSO_flags(threadid) = %lobits16( TO_W_(StgTSO_flags(threadid)) | TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE)); - foreign "C" scheduleThread(MyCapability() "ptr", threadid "ptr") []; + ccall scheduleThread(MyCapability() "ptr", threadid "ptr"); // context switch soon, but not immediately: we don't want every // forkIO to force a context-switch. Capability_context_switch(MyCapability()) = 1 :: CInt; - RET_P(threadid); + return (threadid); } -stg_forkOnzh +stg_forkOnzh ( W_ cpu, gcptr closure ) { - /* args: R1 = cpu, R2 = closure to spark */ +again: MAYBE_GC(again); - MAYBE_GC(R2_PTR, stg_forkOnzh); + gcptr threadid; - W_ cpu; - W_ closure; - W_ threadid; - cpu = R1; - closure = R2; - - ("ptr" threadid) = foreign "C" createIOThread( MyCapability() "ptr", + ("ptr" threadid) = ccall createIOThread( MyCapability() "ptr", RtsFlags_GcFlags_initialStkSize(RtsFlags), - closure "ptr") []; + closure "ptr"); /* start blocked if the current thread is blocked */ StgTSO_flags(threadid) = %lobits16( TO_W_(StgTSO_flags(threadid)) | TO_W_(StgTSO_flags(CurrentTSO)) & (TSO_BLOCKEX | TSO_INTERRUPTIBLE)); - foreign "C" scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr") []; + ccall scheduleThreadOn(MyCapability() "ptr", cpu, threadid "ptr"); // context switch soon, but not immediately: we don't want every // forkIO to force a context-switch. Capability_context_switch(MyCapability()) = 1 :: CInt; - RET_P(threadid); + return (threadid); } -stg_yieldzh +stg_yieldzh () { // when we yield to the scheduler, we have to tell it to put the // current thread to the back of the queue by setting the // context_switch flag. If we don't do this, it will run the same // thread again. Capability_context_switch(MyCapability()) = 1 :: CInt; - jump stg_yield_noregs; + jump stg_yield_noregs(); } -stg_myThreadIdzh +stg_myThreadIdzh () { - /* no args. */ - RET_P(CurrentTSO); + return (CurrentTSO); } -stg_labelThreadzh +stg_labelThreadzh ( gcptr threadid, W_ addr ) { - /* args: - R1 = ThreadId# - R2 = Addr# */ #if defined(DEBUG) || defined(TRACING) || defined(DTRACE) - foreign "C" labelThread(MyCapability() "ptr", R1 "ptr", R2 "ptr") []; + ccall labelThread(MyCapability() "ptr", threadid "ptr", addr "ptr"); #endif - jump %ENTRY_CODE(Sp(0)); + return (); } -stg_isCurrentThreadBoundzh +stg_isCurrentThreadBoundzh (/* no args */) { - /* no args */ W_ r; - (r) = foreign "C" isThreadBound(CurrentTSO) []; - RET_N(r); + (r) = ccall isThreadBound(CurrentTSO); + return (r); } -stg_threadStatuszh +stg_threadStatuszh ( gcptr tso ) { - /* args: R1 :: ThreadId# */ - W_ tso; W_ why_blocked; W_ what_next; W_ ret, cap, locked; - tso = R1; - what_next = TO_W_(StgTSO_what_next(tso)); why_blocked = TO_W_(StgTSO_why_blocked(tso)); // Note: these two reads are not atomic, so they might end up @@ -733,214 +666,250 @@ stg_threadStatuszh locked = 0; } - RET_NNN(ret,cap,locked); + return (ret,cap,locked); } /* ----------------------------------------------------------------------------- * TVar primitives * -------------------------------------------------------------------------- */ -#define SP_OFF 0 +// Catch retry frame ----------------------------------------------------------- + +#define CATCH_RETRY_FRAME_FIELDS(w_,p_,info_ptr, \ + running_alt_code, \ + first_code, \ + alt_code) \ + w_ info_ptr, \ + PROF_HDR_FIELDS(w_) \ + w_ running_alt_code, \ + p_ first_code, \ + p_ alt_code -// Catch retry frame ------------------------------------------------------------ INFO_TABLE_RET(stg_catch_retry_frame, CATCH_RETRY_FRAME, -#if defined(PROFILING) - W_ unused1, W_ unused2, -#endif - W_ unused3, P_ unused4, P_ unused5) + CATCH_RETRY_FRAME_FIELDS(W_,P_, + info_ptr, + running_alt_code, + first_code, + alt_code)) + return (P_ ret) { - W_ r, frame, trec, outer; - - frame = Sp; - trec = StgTSO_trec(CurrentTSO); - outer = StgTRecHeader_enclosing_trec(trec); - (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; - if (r != 0) { - /* Succeeded (either first branch or second branch) */ - StgTSO_trec(CurrentTSO) = outer; - Sp = Sp + SIZEOF_StgCatchRetryFrame; - jump %ENTRY_CODE(Sp(SP_OFF)); - } else { - /* Did not commit: re-execute */ - W_ new_trec; - ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; - StgTSO_trec(CurrentTSO) = new_trec; - if (StgCatchRetryFrame_running_alt_code(frame) != 0::I32) { - R1 = StgCatchRetryFrame_alt_code(frame); - } else { - R1 = StgCatchRetryFrame_first_code(frame); - } - jump stg_ap_v_fast; - } -} + W_ r; + gcptr trec, outer, arg; + trec = StgTSO_trec(CurrentTSO); + outer = StgTRecHeader_enclosing_trec(trec); + (r) = ccall stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr"); + if (r != 0) { + // Succeeded (either first branch or second branch) + StgTSO_trec(CurrentTSO) = outer; + return (ret); + } else { + // Did not commit: re-execute + P_ new_trec; + ("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr", + outer "ptr"); + StgTSO_trec(CurrentTSO) = new_trec; + if (running_alt_code != 0) { + jump stg_ap_v_fast + (CATCH_RETRY_FRAME_FIELDS(,,info_ptr, + running_alt_code, + first_code, + alt_code)) + (alt_code); + } else { + jump stg_ap_v_fast + (CATCH_RETRY_FRAME_FIELDS(,,info_ptr, + running_alt_code, + first_code, + alt_code)) + (first_code); + } + } +} // Atomically frame ------------------------------------------------------------ +// This must match StgAtomicallyFrame in Closures.h +#define ATOMICALLY_FRAME_FIELDS(w_,p_,info_ptr,code,next,result) \ + w_ info_ptr, \ + PROF_HDR_FIELDS(w_) \ + p_ code, \ + p_ next, \ + p_ result + + INFO_TABLE_RET(stg_atomically_frame, ATOMICALLY_FRAME, -#if defined(PROFILING) - W_ unused1, W_ unused2, -#endif - P_ code, P_ next_invariant_to_check, P_ result) + // layout of the frame, and bind the field names + ATOMICALLY_FRAME_FIELDS(W_,P_, + info_ptr, + code, + next_invariant, + frame_result)) + return (P_ result) // value returned to the frame { - W_ frame, trec, valid, next_invariant, q, outer; + W_ valid; + gcptr trec, outer, next_invariant, q; - frame = Sp; trec = StgTSO_trec(CurrentTSO); - result = R1; outer = StgTRecHeader_enclosing_trec(trec); if (outer == NO_TREC) { /* First time back at the atomically frame -- pick up invariants */ - ("ptr" q) = foreign "C" stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr") []; - StgAtomicallyFrame_next_invariant_to_check(frame) = q; - StgAtomicallyFrame_result(frame) = result; + ("ptr" next_invariant) = + ccall stmGetInvariantsToCheck(MyCapability() "ptr", trec "ptr"); + frame_result = result; } else { /* Second/subsequent time back at the atomically frame -- abort the * tx that's checking the invariant and move on to the next one */ StgTSO_trec(CurrentTSO) = outer; - q = StgAtomicallyFrame_next_invariant_to_check(frame); - StgInvariantCheckQueue_my_execution(q) = trec; - foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; + StgInvariantCheckQueue_my_execution(next_invariant) = trec; + ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr"); /* Don't free trec -- it's linked from q and will be stashed in the * invariant if we eventually commit. */ - q = StgInvariantCheckQueue_next_queue_entry(q); - StgAtomicallyFrame_next_invariant_to_check(frame) = q; + next_invariant = + StgInvariantCheckQueue_next_queue_entry(next_invariant); trec = outer; } - q = StgAtomicallyFrame_next_invariant_to_check(frame); - - if (q != END_INVARIANT_CHECK_QUEUE) { + if (next_invariant != END_INVARIANT_CHECK_QUEUE) { /* We can't commit yet: another invariant to check */ - ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") []; + ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", trec "ptr"); StgTSO_trec(CurrentTSO) = trec; - - next_invariant = StgInvariantCheckQueue_invariant(q); - R1 = StgAtomicInvariant_code(next_invariant); - jump stg_ap_v_fast; + q = StgInvariantCheckQueue_invariant(next_invariant); + jump stg_ap_v_fast + (ATOMICALLY_FRAME_FIELDS(,,info_ptr,code,next_invariant,frame_result)) + (StgAtomicInvariant_code(q)); } else { /* We've got no more invariants to check, try to commit */ - (valid) = foreign "C" stmCommitTransaction(MyCapability() "ptr", trec "ptr") []; + (valid) = ccall stmCommitTransaction(MyCapability() "ptr", trec "ptr"); if (valid != 0) { /* Transaction was valid: commit succeeded */ StgTSO_trec(CurrentTSO) = NO_TREC; - R1 = StgAtomicallyFrame_result(frame); - Sp = Sp + SIZEOF_StgAtomicallyFrame; - jump %ENTRY_CODE(Sp(SP_OFF)); + return (frame_result); } else { /* Transaction was not valid: try again */ - ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") []; + ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr"); StgTSO_trec(CurrentTSO) = trec; - StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE; - R1 = StgAtomicallyFrame_code(frame); - jump stg_ap_v_fast; + next_invariant = END_INVARIANT_CHECK_QUEUE; + + jump stg_ap_v_fast + // push the StgAtomicallyFrame again: the code generator is + // clever enough to only assign the fields that have changed. + (ATOMICALLY_FRAME_FIELDS(,,info_ptr,code,next_invariant,frame_result)) + (code); } } } + INFO_TABLE_RET(stg_atomically_waiting_frame, ATOMICALLY_FRAME, -#if defined(PROFILING) - W_ unused1, W_ unused2, -#endif - P_ code, P_ next_invariant_to_check, P_ result) + // layout of the frame, and bind the field names + ATOMICALLY_FRAME_FIELDS(W_,P_, + info_ptr, + code, + next_invariant, + frame_result)) + return (/* no return values */) { - W_ frame, trec, valid; - - frame = Sp; + W_ trec, valid; /* The TSO is currently waiting: should we stop waiting? */ - (valid) = foreign "C" stmReWait(MyCapability() "ptr", CurrentTSO "ptr") []; + (valid) = ccall stmReWait(MyCapability() "ptr", CurrentTSO "ptr"); if (valid != 0) { - /* Previous attempt is still valid: no point trying again yet */ - jump stg_block_noregs; + /* Previous attempt is still valid: no point trying again yet */ + jump stg_block_noregs + (ATOMICALLY_FRAME_FIELDS(,,info_ptr, + code,next_invariant,frame_result)) + (); } else { /* Previous attempt is no longer valid: try again */ - ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr") []; + ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", NO_TREC "ptr"); StgTSO_trec(CurrentTSO) = trec; - StgHeader_info(frame) = stg_atomically_frame_info; - R1 = StgAtomicallyFrame_code(frame); - jump stg_ap_v_fast; + + // change the frame header to stg_atomically_frame_info + jump stg_ap_v_fast + (ATOMICALLY_FRAME_FIELDS(,,stg_atomically_frame_info, + code,next_invariant,frame_result)) + (code); } } -// STM catch frame -------------------------------------------------------------- - -#define SP_OFF 0 +// STM catch frame ------------------------------------------------------------- /* Catch frames are very similar to update frames, but when entering * one we just pop the frame off the stack and perform the correct * kind of return to the activation record underneath us on the stack. */ +#define CATCH_STM_FRAME_FIELDS(w_,p_,info_ptr,code,handler) \ + w_ info_ptr, \ + PROF_HDR_FIELDS(w_) \ + p_ code, \ + p_ handler + INFO_TABLE_RET(stg_catch_stm_frame, CATCH_STM_FRAME, -#if defined(PROFILING) - W_ unused1, W_ unused2, -#endif - P_ unused3, P_ unused4) - { - W_ r, frame, trec, outer; - frame = Sp; - trec = StgTSO_trec(CurrentTSO); - outer = StgTRecHeader_enclosing_trec(trec); - (r) = foreign "C" stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr") []; - if (r != 0) { + // layout of the frame, and bind the field names + CATCH_STM_FRAME_FIELDS(W_,P_,info_ptr,code,handler)) + return (P_ ret) +{ + W_ r, trec, outer; + + trec = StgTSO_trec(CurrentTSO); + outer = StgTRecHeader_enclosing_trec(trec); + (r) = ccall stmCommitNestedTransaction(MyCapability() "ptr", trec "ptr"); + if (r != 0) { /* Commit succeeded */ StgTSO_trec(CurrentTSO) = outer; - Sp = Sp + SIZEOF_StgCatchSTMFrame; - jump %ENTRY_CODE(Sp(SP_OFF)); - } else { + return (ret); + } else { /* Commit failed */ W_ new_trec; - ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; + ("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr", outer "ptr"); StgTSO_trec(CurrentTSO) = new_trec; - R1 = StgCatchSTMFrame_code(frame); - jump stg_ap_v_fast; - } - } + + jump stg_ap_v_fast + (CATCH_STM_FRAME_FIELDS(,,info_ptr,code,handler)) + (code); + } +} -// Primop definition ------------------------------------------------------------ +// Primop definition ----------------------------------------------------------- -stg_atomicallyzh +stg_atomicallyzh (P_ stm) { - W_ frame; W_ old_trec; W_ new_trec; - + W_ code, next_invariant, frame_result; + // stmStartTransaction may allocate - MAYBE_GC (R1_PTR, stg_atomicallyzh); + MAYBE_GC_P(stg_atomicallyzh, stm); - /* Args: R1 = m :: STM a */ - STK_CHK_GEN(SIZEOF_StgAtomicallyFrame + WDS(1), R1_PTR, stg_atomicallyzh); + STK_CHK_GEN(); old_trec = StgTSO_trec(CurrentTSO); /* Nested transactions are not allowed; raise an exception */ if (old_trec != NO_TREC) { - R1 = base_ControlziExceptionziBase_nestedAtomically_closure; - jump stg_raisezh; + jump stg_raisezh(base_ControlziExceptionziBase_nestedAtomically_closure); } - /* Set up the atomically frame */ - Sp = Sp - SIZEOF_StgAtomicallyFrame; - frame = Sp; - - SET_HDR(frame,stg_atomically_frame_info, CCCS); - StgAtomicallyFrame_code(frame) = R1; - StgAtomicallyFrame_result(frame) = NO_TREC; - StgAtomicallyFrame_next_invariant_to_check(frame) = END_INVARIANT_CHECK_QUEUE; + code = stm; + next_invariant = END_INVARIANT_CHECK_QUEUE; + frame_result = NO_TREC; /* Start the memory transcation */ - ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", old_trec "ptr") [R1]; + ("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr", old_trec "ptr"); StgTSO_trec(CurrentTSO) = new_trec; - /* Apply R1 to the realworld token */ - jump stg_ap_v_fast; + jump stg_ap_v_fast + (ATOMICALLY_FRAME_FIELDS(,,stg_atomically_frame_info, + code,next_invariant,frame_result)) + (stm); } // A closure representing "atomically x". This is used when a thread @@ -948,73 +917,57 @@ stg_atomicallyzh // It is somewhat similar to the stg_raise closure. // INFO_TABLE(stg_atomically,1,0,THUNK_1_0,"atomically","atomically") + (P_ thunk) { - R1 = StgThunk_payload(R1,0); - jump stg_atomicallyzh; + jump stg_atomicallyzh(StgThunk_payload(thunk,0)); } -stg_catchSTMzh +stg_catchSTMzh (P_ code /* :: STM a */, + P_ handler /* :: Exception -> STM a */) { - W_ frame; - - /* Args: R1 :: STM a */ - /* Args: R2 :: Exception -> STM a */ - STK_CHK_GEN(SIZEOF_StgCatchSTMFrame + WDS(1), R1_PTR & R2_PTR, stg_catchSTMzh); - - /* Set up the catch frame */ - Sp = Sp - SIZEOF_StgCatchSTMFrame; - frame = Sp; - - SET_HDR(frame, stg_catch_stm_frame_info, CCCS); - StgCatchSTMFrame_handler(frame) = R2; - StgCatchSTMFrame_code(frame) = R1; - - /* Start a nested transaction to run the body of the try block in */ - W_ cur_trec; - W_ new_trec; - cur_trec = StgTSO_trec(CurrentTSO); - ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", cur_trec "ptr"); - StgTSO_trec(CurrentTSO) = new_trec; - - /* Apply R1 to the realworld token */ - jump stg_ap_v_fast; + STK_CHK_GEN(); + + /* Start a nested transaction to run the body of the try block in */ + W_ cur_trec; + W_ new_trec; + cur_trec = StgTSO_trec(CurrentTSO); + ("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr", + cur_trec "ptr"); + StgTSO_trec(CurrentTSO) = new_trec; + + jump stg_ap_v_fast + (CATCH_STM_FRAME_FIELDS(,,stg_catch_stm_frame_info, code, handler)) + (code); } -stg_catchRetryzh +stg_catchRetryzh (P_ first_code, /* :: STM a */ + P_ alt_code /* :: STM a */) { - W_ frame; W_ new_trec; - W_ trec; // stmStartTransaction may allocate - MAYBE_GC (R1_PTR & R2_PTR, stg_catchRetryzh); + MAYBE_GC_PP (stg_catchRetryzh, first_code, alt_code); - /* Args: R1 :: STM a */ - /* Args: R2 :: STM a */ - STK_CHK_GEN(SIZEOF_StgCatchRetryFrame + WDS(1), R1_PTR & R2_PTR, stg_catchRetryzh); + STK_CHK_GEN(); /* Start a nested transaction within which to run the first code */ - trec = StgTSO_trec(CurrentTSO); - ("ptr" new_trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", trec "ptr") [R1,R2]; + ("ptr" new_trec) = ccall stmStartTransaction(MyCapability() "ptr", + StgTSO_trec(CurrentTSO) "ptr"); StgTSO_trec(CurrentTSO) = new_trec; - /* Set up the catch-retry frame */ - Sp = Sp - SIZEOF_StgCatchRetryFrame; - frame = Sp; - - SET_HDR(frame, stg_catch_retry_frame_info, CCCS); - StgCatchRetryFrame_running_alt_code(frame) = 0 :: CInt; // false; - StgCatchRetryFrame_first_code(frame) = R1; - StgCatchRetryFrame_alt_code(frame) = R2; - - /* Apply R1 to the realworld token */ - jump stg_ap_v_fast; + // push the CATCH_RETRY stack frame, and apply first_code to realWorld# + jump stg_ap_v_fast + (CATCH_RETRY_FRAME_FIELDS(,, stg_catch_retry_frame_info, + 0, /* not running_alt_code */ + first_code, + alt_code)) + (first_code); } -stg_retryzh +stg_retryzh /* no arg list: explicit stack layout */ { W_ frame_type; W_ frame; @@ -1022,12 +975,14 @@ stg_retryzh W_ outer; W_ r; - MAYBE_GC (NO_PTRS, stg_retryzh); // STM operations may allocate + // STM operations may allocate + MAYBE_GC_ (stg_retryzh); // NB. not MAYBE_GC(), we cannot make a + // function call in an explicit-stack proc // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME retry_pop_stack: SAVE_THREAD_STATE(); - (frame_type) = foreign "C" findRetryFrameHelper(MyCapability(), CurrentTSO "ptr") []; + (frame_type) = ccall findRetryFrameHelper(MyCapability(), CurrentTSO "ptr"); LOAD_THREAD_STATE(); frame = Sp; trec = StgTSO_trec(CurrentTSO); @@ -1037,15 +992,15 @@ retry_pop_stack: // The retry reaches a CATCH_RETRY_FRAME before the atomic frame ASSERT(outer != NO_TREC); // Abort the transaction attempting the current branch - foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; - foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") []; - if (!StgCatchRetryFrame_running_alt_code(frame) != 0::I32) { + ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr"); + ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr"); + if (!StgCatchRetryFrame_running_alt_code(frame) != 0) { // Retry in the first branch: try the alternative - ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; + ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", outer "ptr"); StgTSO_trec(CurrentTSO) = trec; StgCatchRetryFrame_running_alt_code(frame) = 1 :: CInt; // true; R1 = StgCatchRetryFrame_alt_code(frame); - jump stg_ap_v_fast; + jump stg_ap_v_fast [R1]; } else { // Retry in the alternative code: propagate the retry StgTSO_trec(CurrentTSO) = outer; @@ -1060,108 +1015,93 @@ retry_pop_stack: // We called retry while checking invariants, so abort the current // invariant check (merging its TVar accesses into the parents read // set so we'll wait on them) - foreign "C" stmAbortTransaction(MyCapability() "ptr", trec "ptr") []; - foreign "C" stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr") []; + ccall stmAbortTransaction(MyCapability() "ptr", trec "ptr"); + ccall stmFreeAbortedTRec(MyCapability() "ptr", trec "ptr"); trec = outer; StgTSO_trec(CurrentTSO) = trec; outer = StgTRecHeader_enclosing_trec(trec); } ASSERT(outer == NO_TREC); - (r) = foreign "C" stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr") []; + (r) = ccall stmWait(MyCapability() "ptr", CurrentTSO "ptr", trec "ptr"); if (r != 0) { // Transaction was valid: stmWait put us on the TVars' queues, we now block StgHeader_info(frame) = stg_atomically_waiting_frame_info; Sp = frame; - // Fix up the stack in the unregisterised case: the return convention is different. R3 = trec; // passing to stmWaitUnblock() - jump stg_block_stmwait; + jump stg_block_stmwait [R3]; } else { // Transaction was not valid: retry immediately - ("ptr" trec) = foreign "C" stmStartTransaction(MyCapability() "ptr", outer "ptr") []; + ("ptr" trec) = ccall stmStartTransaction(MyCapability() "ptr", outer "ptr"); StgTSO_trec(CurrentTSO) = trec; - R1 = StgAtomicallyFrame_code(frame); Sp = frame; - jump stg_ap_v_fast; + R1 = StgAtomicallyFrame_code(frame); + jump stg_ap_v_fast [R1]; } } - -stg_checkzh +stg_checkzh (P_ closure /* STM a */) { - W_ trec, closure; - - /* Args: R1 = invariant closure */ - MAYBE_GC (R1_PTR, stg_checkzh); + W_ trec; - trec = StgTSO_trec(CurrentTSO); - closure = R1; - foreign "C" stmAddInvariantToCheck(MyCapability() "ptr", - trec "ptr", - closure "ptr") []; + MAYBE_GC_P (stg_checkzh, closure); - jump %ENTRY_CODE(Sp(0)); + trec = StgTSO_trec(CurrentTSO); + ccall stmAddInvariantToCheck(MyCapability() "ptr", + trec "ptr", + closure "ptr"); + return (); } -stg_newTVarzh +stg_newTVarzh (P_ init) { - W_ tv; - W_ new_value; + W_ tv; - /* Args: R1 = initialisation value */ - - MAYBE_GC (R1_PTR, stg_newTVarzh); - new_value = R1; - ("ptr" tv) = foreign "C" stmNewTVar(MyCapability() "ptr", new_value "ptr") []; - RET_P(tv); + MAYBE_GC_P (stg_newTVarzh, init); + ("ptr" tv) = ccall stmNewTVar(MyCapability() "ptr", init "ptr"); + return (tv); } -stg_readTVarzh +stg_readTVarzh (P_ tvar) { W_ trec; - W_ tvar; W_ result; - /* Args: R1 = TVar closure */ + // Call to stmReadTVar may allocate + MAYBE_GC_P (stg_readTVarzh, tvar); - MAYBE_GC (R1_PTR, stg_readTVarzh); // Call to stmReadTVar may allocate trec = StgTSO_trec(CurrentTSO); - tvar = R1; - ("ptr" result) = foreign "C" stmReadTVar(MyCapability() "ptr", trec "ptr", tvar "ptr") []; - - RET_P(result); + ("ptr" result) = ccall stmReadTVar(MyCapability() "ptr", trec "ptr", + tvar "ptr"); + return (result); } -stg_readTVarIOzh +stg_readTVarIOzh ( P_ tvar /* :: TVar a */ ) { W_ result; again: - result = StgTVar_current_value(R1); + result = StgTVar_current_value(tvar); if (%INFO_PTR(result) == stg_TREC_HEADER_info) { goto again; } - RET_P(result); + return (result); } -stg_writeTVarzh +stg_writeTVarzh (P_ tvar, /* :: TVar a */ + P_ new_value /* :: a */) { - W_ trec; - W_ tvar; - W_ new_value; - - /* Args: R1 = TVar closure */ - /* R2 = New value */ + W_ trec; - MAYBE_GC (R1_PTR & R2_PTR, stg_writeTVarzh); // Call to stmWriteTVar may allocate - trec = StgTSO_trec(CurrentTSO); - tvar = R1; - new_value = R2; - foreign "C" stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", new_value "ptr") []; + // Call to stmWriteTVar may allocate + MAYBE_GC_PP (stg_writeTVarzh, tvar, new_value); - jump %ENTRY_CODE(Sp(0)); + trec = StgTSO_trec(CurrentTSO); + ccall stmWriteTVar(MyCapability() "ptr", trec "ptr", tvar "ptr", + new_value "ptr"); + return (); } @@ -1197,23 +1137,20 @@ stg_writeTVarzh * * -------------------------------------------------------------------------- */ -stg_isEmptyMVarzh +stg_isEmptyMVarzh ( P_ mvar /* :: MVar a */ ) { - /* args: R1 = MVar closure */ - - if (StgMVar_value(R1) == stg_END_TSO_QUEUE_closure) { - RET_N(1); + if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { + return (1); } else { - RET_N(0); + return (0); } } -stg_newMVarzh +stg_newMVarzh () { - /* args: none */ W_ mvar; - ALLOC_PRIM ( SIZEOF_StgMVar, NO_PTRS, stg_newMVarzh ); + ALLOC_PRIM (SIZEOF_StgMVar); mvar = Hp - SIZEOF_StgMVar + WDS(1); SET_HDR(mvar,stg_MVAR_DIRTY_info,CCCS); @@ -1221,7 +1158,7 @@ stg_newMVarzh StgMVar_head(mvar) = stg_END_TSO_QUEUE_closure; StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; - RET_P(mvar); + return (mvar); } @@ -1229,7 +1166,7 @@ stg_newMVarzh W_ sp; \ sp = StgStack_sp(stack); \ W_[sp + WDS(1)] = value; \ - W_[sp + WDS(0)] = stg_gc_unpt_r1_info; + W_[sp + WDS(0)] = stg_ret_p_info; #define PerformPut(stack,lval) \ W_ sp; \ @@ -1237,21 +1174,19 @@ stg_newMVarzh StgStack_sp(stack) = sp; \ lval = W_[sp - WDS(1)]; -stg_takeMVarzh -{ - W_ mvar, val, info, tso, q; - /* args: R1 = MVar closure */ - mvar = R1; +stg_takeMVarzh ( P_ mvar /* :: MVar a */ ) +{ + W_ val, info, tso, q; #if defined(THREADED_RTS) - ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; + ("ptr" info) = ccall lockClosure(mvar "ptr"); #else info = GET_INFO(mvar); #endif if (info == stg_MVAR_CLEAN_info) { - foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; + ccall dirty_MVAR(BaseReg "ptr", mvar "ptr"); } /* If the MVar is empty, put ourselves on its blocking queue, @@ -1259,16 +1194,13 @@ stg_takeMVarzh */ if (StgMVar_value(mvar) == stg_END_TSO_QUEUE_closure) { - // Note [mvar-heap-check] We want to do the heap check in the - // branch here, to avoid the conditional in the common case. - // However, we've already locked the MVar above, so we better - // be careful to unlock it again if the the heap check fails. - // Unfortunately we don't have an easy way to inject any code - // into the heap check generated by the code generator, so we - // have to do it in stg_gc_gen (see HeapStackCheck.cmm). - HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh); - TICK_ALLOC_PRIM(SIZEOF_StgMVarTSOQueue, 0, 0); - CCCS_ALLOC(SIZEOF_StgMVarTSOQueue); + // We want to put the heap check down here in the slow path, + // but be careful to unlock the closure before returning to + // the RTS if the check fails. + ALLOC_PRIM_WITH_CUSTOM_FAILURE + (SIZEOF_StgMVarTSOQueue, + unlockClosure(mvar, stg_MVAR_DIRTY_info); + GC_PRIM_P(stg_takeMVarzh, mvar)); q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); @@ -1280,16 +1212,15 @@ stg_takeMVarzh StgMVar_head(mvar) = q; } else { StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q; - foreign "C" recordClosureMutated(MyCapability() "ptr", - StgMVar_tail(mvar)) []; + ccall recordClosureMutated(MyCapability() "ptr", + StgMVar_tail(mvar)); } StgTSO__link(CurrentTSO) = q; StgTSO_block_info(CurrentTSO) = mvar; StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgMVar_tail(mvar) = q; - R1 = mvar; - jump stg_block_takemvar; + jump stg_block_takemvar(mvar); } /* we got the value... */ @@ -1301,14 +1232,14 @@ loop: /* No further putMVars, MVar is now empty */ StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; unlockClosure(mvar, stg_MVAR_DIRTY_info); - RET_P(val); + return (val); } if (StgHeader_info(q) == stg_IND_info || StgHeader_info(q) == stg_MSG_NULL_info) { q = StgInd_indirectee(q); goto loop; } - + // There are putMVar(s) waiting... wake up the first thread on the queue tso = StgMVarTSOQueue_tso(q); @@ -1330,22 +1261,18 @@ loop: // no need to mark the TSO dirty, we have only written END_TSO_QUEUE. - foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; + ccall tryWakeupThread(MyCapability() "ptr", tso); unlockClosure(mvar, stg_MVAR_DIRTY_info); - RET_P(val); + return (val); } - -stg_tryTakeMVarzh +stg_tryTakeMVarzh ( P_ mvar /* :: MVar a */ ) { - W_ mvar, val, info, tso, q; - - /* args: R1 = MVar closure */ - mvar = R1; + W_ val, info, tso, q; #if defined(THREADED_RTS) - ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; + ("ptr" info) = ccall lockClosure(mvar "ptr"); #else info = GET_INFO(mvar); #endif @@ -1360,11 +1287,11 @@ stg_tryTakeMVarzh /* HACK: we need a pointer to pass back, * so we abuse NO_FINALIZER_closure */ - RET_NP(0, stg_NO_FINALIZER_closure); + return (0, stg_NO_FINALIZER_closure); } if (info == stg_MVAR_CLEAN_info) { - foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr") []; + ccall dirty_MVAR(BaseReg "ptr", mvar "ptr"); } /* we got the value... */ @@ -1376,7 +1303,7 @@ loop: /* No further putMVars, MVar is now empty */ StgMVar_value(mvar) = stg_END_TSO_QUEUE_closure; unlockClosure(mvar, stg_MVAR_DIRTY_info); - RET_NP(1, val); + return (1, val); } if (StgHeader_info(q) == stg_IND_info || StgHeader_info(q) == stg_MSG_NULL_info) { @@ -1405,37 +1332,36 @@ loop: // no need to mark the TSO dirty, we have only written END_TSO_QUEUE. - foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; + ccall tryWakeupThread(MyCapability() "ptr", tso); unlockClosure(mvar, stg_MVAR_DIRTY_info); - RET_NP(1,val); + return (1,val); } - -stg_putMVarzh +stg_putMVarzh ( P_ mvar, /* :: MVar a */ + P_ val, /* :: a */ ) { - W_ mvar, val, info, tso, q; - - /* args: R1 = MVar, R2 = value */ - mvar = R1; - val = R2; + W_ info, tso, q; #if defined(THREADED_RTS) - ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; + ("ptr" info) = ccall lockClosure(mvar "ptr"); #else info = GET_INFO(mvar); #endif if (info == stg_MVAR_CLEAN_info) { - foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + ccall dirty_MVAR(BaseReg "ptr", mvar "ptr"); } if (StgMVar_value(mvar) != stg_END_TSO_QUEUE_closure) { - // see Note [mvar-heap-check] above - HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh); - TICK_ALLOC_PRIM(SIZEOF_StgMVarTSOQueue, 0, 0); - CCCS_ALLOC(SIZEOF_StgMVarTSOQueue); + // We want to put the heap check down here in the slow path, + // but be careful to unlock the closure before returning to + // the RTS if the check fails. + ALLOC_PRIM_WITH_CUSTOM_FAILURE + (SIZEOF_StgMVarTSOQueue, + unlockClosure(mvar, stg_MVAR_DIRTY_info); + GC_PRIM_P(stg_putMVarzh, mvar)); q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); @@ -1447,17 +1373,15 @@ stg_putMVarzh StgMVar_head(mvar) = q; } else { StgMVarTSOQueue_link(StgMVar_tail(mvar)) = q; - foreign "C" recordClosureMutated(MyCapability() "ptr", - StgMVar_tail(mvar)) []; + ccall recordClosureMutated(MyCapability() "ptr", + StgMVar_tail(mvar)); } StgTSO__link(CurrentTSO) = q; StgTSO_block_info(CurrentTSO) = mvar; StgTSO_why_blocked(CurrentTSO) = BlockedOnMVar::I16; StgMVar_tail(mvar) = q; - R1 = mvar; - R2 = val; - jump stg_block_putmvar; + jump stg_block_putmvar(mvar,val); } q = StgMVar_head(mvar); @@ -1466,7 +1390,7 @@ loop: /* No further takes, the MVar is now full. */ StgMVar_value(mvar) = val; unlockClosure(mvar, stg_MVAR_DIRTY_info); - jump %ENTRY_CODE(Sp(0)); + return (); } if (StgHeader_info(q) == stg_IND_info || StgHeader_info(q) == stg_MSG_NULL_info) { @@ -1494,26 +1418,23 @@ loop: StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; if (TO_W_(StgStack_dirty(stack)) == 0) { - foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") []; + ccall dirty_STACK(MyCapability() "ptr", stack "ptr"); } - foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; + ccall tryWakeupThread(MyCapability() "ptr", tso); unlockClosure(mvar, stg_MVAR_DIRTY_info); - jump %ENTRY_CODE(Sp(0)); + return (); } -stg_tryPutMVarzh +stg_tryPutMVarzh ( P_ mvar, /* :: MVar a */ + P_ val, /* :: a */ ) { - W_ mvar, val, info, tso, q; - - /* args: R1 = MVar, R2 = value */ - mvar = R1; - val = R2; + W_ info, tso, q; #if defined(THREADED_RTS) - ("ptr" info) = foreign "C" lockClosure(mvar "ptr") []; + ("ptr" info) = ccall lockClosure(mvar "ptr"); #else info = GET_INFO(mvar); #endif @@ -1522,11 +1443,11 @@ stg_tryPutMVarzh #if defined(THREADED_RTS) unlockClosure(mvar, info); #endif - RET_N(0); + return (0); } if (info == stg_MVAR_CLEAN_info) { - foreign "C" dirty_MVAR(BaseReg "ptr", mvar "ptr"); + ccall dirty_MVAR(BaseReg "ptr", mvar "ptr"); } q = StgMVar_head(mvar); @@ -1535,7 +1456,7 @@ loop: /* No further takes, the MVar is now full. */ StgMVar_value(mvar) = val; unlockClosure(mvar, stg_MVAR_DIRTY_info); - RET_N(1); + return (1); } if (StgHeader_info(q) == stg_IND_info || StgHeader_info(q) == stg_MSG_NULL_info) { @@ -1563,13 +1484,13 @@ loop: StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; if (TO_W_(StgStack_dirty(stack)) == 0) { - foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") []; + ccall dirty_STACK(MyCapability() "ptr", stack "ptr"); } - foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; + ccall tryWakeupThread(MyCapability() "ptr", tso); unlockClosure(mvar, stg_MVAR_DIRTY_info); - RET_N(1); + return (1); } @@ -1577,13 +1498,13 @@ loop: Stable pointer primitives ------------------------------------------------------------------------- */ -stg_makeStableNamezh +stg_makeStableNamezh ( P_ obj ) { W_ index, sn_obj; - ALLOC_PRIM( SIZEOF_StgStableName, R1_PTR, stg_makeStableNamezh ); + ALLOC_PRIM_P (SIZEOF_StgStableName, stg_makeStableNamezh, obj); - (index) = foreign "C" lookupStableName(R1 "ptr") []; + (index) = ccall lookupStableName(obj "ptr"); /* Is there already a StableName for this heap object? * stable_ptr_table is a pointer to an array of snEntry structs. @@ -1597,56 +1518,48 @@ stg_makeStableNamezh sn_obj = snEntry_sn_obj(W_[stable_ptr_table] + index*SIZEOF_snEntry); } - RET_P(sn_obj); + return (sn_obj); } - -stg_makeStablePtrzh +stg_makeStablePtrzh ( P_ obj ) { - /* Args: R1 = a */ W_ sp; - MAYBE_GC(R1_PTR, stg_makeStablePtrzh); - ("ptr" sp) = foreign "C" getStablePtr(R1 "ptr") []; - RET_N(sp); + + ("ptr" sp) = ccall getStablePtr(obj "ptr"); + return (sp); } -stg_deRefStablePtrzh +stg_deRefStablePtrzh ( P_ sp ) { - /* Args: R1 = the stable ptr */ - W_ r, sp; - sp = R1; + W_ r; r = snEntry_addr(W_[stable_ptr_table] + sp*SIZEOF_snEntry); - RET_P(r); + return (r); } /* ----------------------------------------------------------------------------- Bytecode object primitives ------------------------------------------------------------------------- */ -stg_newBCOzh +stg_newBCOzh ( P_ instrs, + P_ literals, + P_ ptrs, + W_ arity, + P_ bitmap_arr ) { - /* R1 = instrs - R2 = literals - R3 = ptrs - R4 = arity - R5 = bitmap array - */ - W_ bco, bitmap_arr, bytes, words; - - bitmap_arr = R5; + W_ bco, bytes, words; words = BYTES_TO_WDS(SIZEOF_StgBCO) + BYTE_ARR_WDS(bitmap_arr); bytes = WDS(words); - ALLOC_PRIM( bytes, R1_PTR&R2_PTR&R3_PTR&R5_PTR, stg_newBCOzh ); + ALLOC_PRIM (bytes); bco = Hp - bytes + WDS(1); SET_HDR(bco, stg_BCO_info, CCCS); - StgBCO_instrs(bco) = R1; - StgBCO_literals(bco) = R2; - StgBCO_ptrs(bco) = R3; - StgBCO_arity(bco) = HALF_W_(R4); + StgBCO_instrs(bco) = instrs; + StgBCO_literals(bco) = literals; + StgBCO_ptrs(bco) = ptrs; + StgBCO_arity(bco) = HALF_W_(arity); StgBCO_size(bco) = HALF_W_(words); // Copy the arity/bitmap info into the BCO @@ -1659,23 +1572,20 @@ for: goto for; } - RET_P(bco); + return (bco); } - -stg_mkApUpd0zh +stg_mkApUpd0zh ( P_ bco ) { - // R1 = the BCO# for the AP - // W_ ap; // This function is *only* used to wrap zero-arity BCOs in an // updatable wrapper (see ByteCodeLink.lhs). An AP thunk is always // saturated and always points directly to a FUN or BCO. - ASSERT(%INFO_TYPE(%GET_STD_INFO(R1)) == HALF_W_(BCO) && - StgBCO_arity(R1) == HALF_W_(0)); + ASSERT(%INFO_TYPE(%GET_STD_INFO(bco)) == HALF_W_(BCO) && + StgBCO_arity(bco) == HALF_W_(0)); - HP_CHK_GEN_TICKY(SIZEOF_StgAP, R1_PTR, stg_mkApUpd0zh); + HP_CHK_P(SIZEOF_StgAP, stg_mkApUpd0zh, bco); TICK_ALLOC_UP_THK(0, 0); CCCS_ALLOC(SIZEOF_StgAP); @@ -1683,18 +1593,17 @@ stg_mkApUpd0zh SET_HDR(ap, stg_AP_info, CCCS); StgAP_n_args(ap) = HALF_W_(0); - StgAP_fun(ap) = R1; + StgAP_fun(ap) = bco; - RET_P(ap); + return (ap); } -stg_unpackClosurezh +stg_unpackClosurezh ( P_ closure ) { -/* args: R1 = closure to analyze */ // TODO: Consider the absence of ptrs or nonptrs as a special case ? W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr; - info = %GET_STD_INFO(UNTAG(R1)); + info = %GET_STD_INFO(UNTAG(closure)); // Some closures have non-standard layout, so we omit those here. W_ type; @@ -1723,10 +1632,10 @@ out: ptrs_arr_cards = mutArrPtrsCardWords(ptrs); ptrs_arr_sz = SIZEOF_StgMutArrPtrs + WDS(ptrs) + WDS(ptrs_arr_cards); - ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, stg_unpackClosurezh); + ALLOC_PRIM_P (ptrs_arr_sz + nptrs_arr_sz, stg_unpackClosurezh, closure); W_ clos; - clos = UNTAG(R1); + clos = UNTAG(closure); ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1); nptrs_arr = Hp - nptrs_arr_sz + WDS(1); @@ -1755,7 +1664,7 @@ for2: p = p + 1; goto for2; } - RET_NPP(info, ptrs_arr, nptrs_arr); + return (info, ptrs_arr, nptrs_arr); } /* ----------------------------------------------------------------------------- @@ -1770,47 +1679,45 @@ for2: if (W_[blocked_queue_hd] == END_TSO_QUEUE) { \ W_[blocked_queue_hd] = tso; \ } else { \ - foreign "C" setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso) []; \ + ccall setTSOLink(MyCapability() "ptr", W_[blocked_queue_tl] "ptr", tso); \ } \ W_[blocked_queue_tl] = tso; -stg_waitReadzh +stg_waitReadzh ( W_ fd ) { - /* args: R1 */ #ifdef THREADED_RTS - foreign "C" barf("waitRead# on threaded RTS") never returns; + ccall barf("waitRead# on threaded RTS") never returns; #else ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16; - StgTSO_block_info(CurrentTSO) = R1; + StgTSO_block_info(CurrentTSO) = fd; // No locking - we're not going to use this interface in the // threaded RTS anyway. APPEND_TO_BLOCKED_QUEUE(CurrentTSO); - jump stg_block_noregs; + jump stg_block_noregs(); #endif } -stg_waitWritezh +stg_waitWritezh ( W_ fd ) { - /* args: R1 */ #ifdef THREADED_RTS - foreign "C" barf("waitWrite# on threaded RTS") never returns; + ccall barf("waitWrite# on threaded RTS") never returns; #else ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16; - StgTSO_block_info(CurrentTSO) = R1; + StgTSO_block_info(CurrentTSO) = fd; // No locking - we're not going to use this interface in the // threaded RTS anyway. APPEND_TO_BLOCKED_QUEUE(CurrentTSO); - jump stg_block_noregs; + jump stg_block_noregs(); #endif } STRING(stg_delayzh_malloc_str, "stg_delayzh") -stg_delayzh +stg_delayzh ( W_ us_delay ) { #ifdef mingw32_HOST_OS W_ ares; @@ -1820,19 +1727,18 @@ stg_delayzh #endif #ifdef THREADED_RTS - foreign "C" barf("delay# on threaded RTS") never returns; + ccall barf("delay# on threaded RTS") never returns; #else - /* args: R1 (microsecond delay amount) */ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnDelay::I16; #ifdef mingw32_HOST_OS /* could probably allocate this on the heap instead */ - ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, + ("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult, stg_delayzh_malloc_str); - (reqID) = foreign "C" addDelayRequest(R1); + (reqID) = ccall addDelayRequest(us_delay); StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; @@ -1844,12 +1750,12 @@ stg_delayzh */ StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16; APPEND_TO_BLOCKED_QUEUE(CurrentTSO); - jump stg_block_async_void; + jump stg_block_async_void(); #else - (target) = foreign "C" getDelayTarget(R1) [R1]; + (target) = ccall getDelayTarget(us_delay); StgTSO_block_info(CurrentTSO) = target; @@ -1867,9 +1773,9 @@ while: if (prev == NULL) { W_[sleeping_queue] = CurrentTSO; } else { - foreign "C" setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO) []; + ccall setTSOLink(MyCapability() "ptr", prev "ptr", CurrentTSO); } - jump stg_block_noregs; + jump stg_block_noregs(); #endif #endif /* !THREADED_RTS */ } @@ -1877,86 +1783,80 @@ while: #ifdef mingw32_HOST_OS STRING(stg_asyncReadzh_malloc_str, "stg_asyncReadzh") -stg_asyncReadzh +stg_asyncReadzh ( W_ fd, W_ is_sock, W_ len, W_ buf ) { W_ ares; CInt reqID; #ifdef THREADED_RTS - foreign "C" barf("asyncRead# on threaded RTS") never returns; + ccall barf("asyncRead# on threaded RTS") never returns; #else - /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnRead::I16; /* could probably allocate this on the heap instead */ - ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, - stg_asyncReadzh_malloc_str) - [R1,R2,R3,R4]; - (reqID) = foreign "C" addIORequest(R1, 0/*FALSE*/,R2,R3,R4 "ptr") []; + ("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult, + stg_asyncReadzh_malloc_str); + (reqID) = ccall addIORequest(fd, 0/*FALSE*/,is_sock,len,buf "ptr"); StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; StgTSO_block_info(CurrentTSO) = ares; APPEND_TO_BLOCKED_QUEUE(CurrentTSO); - jump stg_block_async; + jump stg_block_async(); #endif } STRING(stg_asyncWritezh_malloc_str, "stg_asyncWritezh") -stg_asyncWritezh +stg_asyncWritezh ( W_ fd, W_ is_sock, W_ len, W_ buf ) { W_ ares; CInt reqID; #ifdef THREADED_RTS - foreign "C" barf("asyncWrite# on threaded RTS") never returns; + ccall barf("asyncWrite# on threaded RTS") never returns; #else - /* args: R1 = fd, R2 = isSock, R3 = len, R4 = buf */ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnWrite::I16; - ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, - stg_asyncWritezh_malloc_str) - [R1,R2,R3,R4]; - (reqID) = foreign "C" addIORequest(R1, 1/*TRUE*/,R2,R3,R4 "ptr") []; + ("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult, + stg_asyncWritezh_malloc_str); + (reqID) = ccall addIORequest(fd, 1/*TRUE*/,is_sock,len,buf "ptr"); StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; StgTSO_block_info(CurrentTSO) = ares; APPEND_TO_BLOCKED_QUEUE(CurrentTSO); - jump stg_block_async; + jump stg_block_async(); #endif } STRING(stg_asyncDoProczh_malloc_str, "stg_asyncDoProczh") -stg_asyncDoProczh +stg_asyncDoProczh ( W_ proc, W_ param ) { W_ ares; CInt reqID; #ifdef THREADED_RTS - foreign "C" barf("asyncDoProc# on threaded RTS") never returns; + ccall barf("asyncDoProc# on threaded RTS") never returns; #else - /* args: R1 = proc, R2 = param */ ASSERT(StgTSO_why_blocked(CurrentTSO) == NotBlocked::I16); StgTSO_why_blocked(CurrentTSO) = BlockedOnDoProc::I16; /* could probably allocate this on the heap instead */ - ("ptr" ares) = foreign "C" stgMallocBytes(SIZEOF_StgAsyncIOResult, - stg_asyncDoProczh_malloc_str) - [R1,R2]; - (reqID) = foreign "C" addDoProcRequest(R1 "ptr",R2 "ptr") []; + ("ptr" ares) = ccall stgMallocBytes(SIZEOF_StgAsyncIOResult, + stg_asyncDoProczh_malloc_str); + (reqID) = ccall addDoProcRequest(proc "ptr",param "ptr"); StgAsyncIOResult_reqID(ares) = reqID; StgAsyncIOResult_len(ares) = 0; StgAsyncIOResult_errCode(ares) = 0; StgTSO_block_info(CurrentTSO) = ares; APPEND_TO_BLOCKED_QUEUE(CurrentTSO); - jump stg_block_async; + jump stg_block_async(); #endif } #endif @@ -2012,15 +1912,16 @@ stg_asyncDoProczh * only manifests occasionally (once very 10 runs or so). * -------------------------------------------------------------------------- */ -INFO_TABLE_RET(stg_noDuplicate, RET_SMALL) +INFO_TABLE_RET(stg_noDuplicate, RET_SMALL, W_ info_ptr) + return (/* no return values */) { - Sp_adj(1); - jump stg_noDuplicatezh; + jump stg_noDuplicatezh(); } -stg_noDuplicatezh +stg_noDuplicatezh /* no arg list: explicit stack layout */ { - STK_CHK_GEN( WDS(1), NO_PTRS, stg_noDuplicatezh ); + STK_CHK(WDS(1), stg_noDuplicatezh); + // leave noDuplicate frame in case the current // computation is suspended and restarted (see above). Sp_adj(-1); @@ -2028,10 +1929,10 @@ stg_noDuplicatezh SAVE_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); - foreign "C" threadPaused (MyCapability() "ptr", CurrentTSO "ptr") []; + ccall threadPaused (MyCapability() "ptr", CurrentTSO "ptr"); if (StgTSO_what_next(CurrentTSO) == ThreadKilled::I16) { - jump stg_threadFinished; + jump stg_threadFinished []; } else { LOAD_THREAD_STATE(); ASSERT(StgTSO_what_next(CurrentTSO) == ThreadRunGHC::I16); @@ -2039,7 +1940,7 @@ stg_noDuplicatezh if (Sp(0) == stg_noDuplicate_info) { Sp_adj(1); } - jump %ENTRY_CODE(Sp(0)); + jump %ENTRY_CODE(Sp(0)) []; } } @@ -2047,75 +1948,62 @@ stg_noDuplicatezh Misc. primitives -------------------------------------------------------------------------- */ -stg_getApStackValzh +stg_getApStackValzh ( P_ ap_stack, W_ offset ) { - W_ ap_stack, offset, val, ok; - - /* args: R1 = AP_STACK, R2 = offset */ - ap_stack = R1; - offset = R2; - if (%INFO_PTR(ap_stack) == stg_AP_STACK_info) { - ok = 1; - val = StgAP_STACK_payload(ap_stack,offset); + return (1,StgAP_STACK_payload(ap_stack,offset)); } else { - ok = 0; - val = R1; + return (0,ap_stack); } - RET_NP(ok,val); } // Write the cost center stack of the first argument on stderr; return // the second. Possibly only makes sense for already evaluated // things? -stg_traceCcszh +stg_traceCcszh ( P_ obj, P_ ret ) { W_ ccs; #ifdef PROFILING - ccs = StgHeader_ccs(UNTAG(R1)); - foreign "C" fprintCCS_stderr(ccs "ptr") [R2]; + ccs = StgHeader_ccs(UNTAG(obj)); + ccall fprintCCS_stderr(ccs "ptr"); #endif - R1 = R2; - ENTER(); + jump stg_ap_0_fast(ret); } -stg_getSparkzh +stg_getSparkzh () { W_ spark; #ifndef THREADED_RTS - RET_NP(0,ghczmprim_GHCziTypes_False_closure); + return (0,ghczmprim_GHCziTypes_False_closure); #else - (spark) = foreign "C" findSpark(MyCapability()); + (spark) = ccall findSpark(MyCapability()); if (spark != 0) { - RET_NP(1,spark); + return (1,spark); } else { - RET_NP(0,ghczmprim_GHCziTypes_False_closure); + return (0,ghczmprim_GHCziTypes_False_closure); } #endif } -stg_numSparkszh +stg_numSparkszh () { W_ n; #ifdef THREADED_RTS - (n) = foreign "C" dequeElements(Capability_sparks(MyCapability())); + (n) = ccall dequeElements(Capability_sparks(MyCapability())); #else n = 0; #endif - RET_N(n); + return (n); } -stg_traceEventzh +stg_traceEventzh ( W_ msg ) { - W_ msg; - msg = R1; - #if defined(TRACING) || defined(DEBUG) - foreign "C" traceUserMsg(MyCapability() "ptr", msg "ptr") []; + ccall traceUserMsg(MyCapability() "ptr", msg "ptr"); #elif defined(DTRACE) @@ -2125,7 +2013,7 @@ stg_traceEventzh // RtsProbes.h, but that header file includes unistd.h, which doesn't // work in Cmm #if !defined(solaris2_TARGET_OS) - (enabled) = foreign "C" __dtrace_isenabled$HaskellEvent$user__msg$v1() []; + (enabled) = ccall __dtrace_isenabled$HaskellEvent$user__msg$v1(); #else // Solaris' DTrace can't handle the // __dtrace_isenabled$HaskellEvent$user__msg$v1 @@ -2139,9 +2027,10 @@ stg_traceEventzh enabled = 1; #endif if (enabled != 0) { - foreign "C" dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr") []; + ccall dtraceUserMsgWrapper(MyCapability() "ptr", msg "ptr"); } #endif - jump %ENTRY_CODE(Sp(0)); + return (); } + diff --git a/rts/Printer.c b/rts/Printer.c index fb00401f59..4f9f83db52 100644 --- a/rts/Printer.c +++ b/rts/Printer.c @@ -251,7 +251,6 @@ printClosure( StgClosure *obj ) case RET_BCO: case RET_SMALL: case RET_BIG: - case RET_DYN: case RET_FUN: */ @@ -478,38 +477,7 @@ printStackChunk( StgPtr sp, StgPtr spBottom ) printObj((StgClosure*)sp); continue; - case RET_DYN: - { - StgRetDyn* r; - StgPtr p; - StgWord dyn; - nat size; - - r = (StgRetDyn *)sp; - dyn = r->liveness; - debugBelch("RET_DYN (%p)\n", r); - - p = (P_)(r->payload); - printSmallBitmap(spBottom, sp, - RET_DYN_LIVENESS(r->liveness), - RET_DYN_BITMAP_SIZE); - p += RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE; - - for (size = RET_DYN_NONPTRS(dyn); size > 0; size--) { - debugBelch(" stk[%ld] (%p) = ", (long)(spBottom-p), p); - debugBelch("Word# %ld\n", (long)*p); - p++; - } - - for (size = RET_DYN_PTRS(dyn); size > 0; size--) { - debugBelch(" stk[%ld] (%p) = ", (long)(spBottom-p), p); - printPtr(p); - p++; - } - continue; - } - - case RET_SMALL: + case RET_SMALL: debugBelch("RET_SMALL (%p)\n", info); bitmap = info->layout.bitmap; printSmallBitmap(spBottom, sp+1, @@ -1112,7 +1080,6 @@ char *closure_type_names[] = { [RET_BCO] = "RET_BCO", [RET_SMALL] = "RET_SMALL", [RET_BIG] = "RET_BIG", - [RET_DYN] = "RET_DYN", [RET_FUN] = "RET_FUN", [UPDATE_FRAME] = "UPDATE_FRAME", [CATCH_FRAME] = "CATCH_FRAME", diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index 47d88068bf..f5669cb8ec 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -959,7 +959,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, // transactions, but I don't fully understand the // interaction with STM invariants. stack->sp[1] = (W_)&stg_NO_TREC_closure; - stack->sp[0] = (W_)&stg_gc_unpt_r1_info; + stack->sp[0] = (W_)&stg_ret_p_info; tso->what_next = ThreadRunGHC; goto done; } diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c index c07dff76e4..5f9164b77b 100644 --- a/rts/RetainerProfile.c +++ b/rts/RetainerProfile.c @@ -605,7 +605,6 @@ push( StgClosure *c, retainer c_child_r, StgClosure **first_child ) case CATCH_FRAME: case UNDERFLOW_FRAME: case STOP_FRAME: - case RET_DYN: case RET_BCO: case RET_SMALL: case RET_BIG: @@ -931,8 +930,7 @@ pop( StgClosure **c, StgClosure **cp, retainer *r ) case IND_STATIC: case CONSTR_NOCAF_STATIC: // stack objects - case RET_DYN: - case UPDATE_FRAME: + case UPDATE_FRAME: case CATCH_FRAME: case UNDERFLOW_FRAME: case STOP_FRAME: @@ -1087,7 +1085,6 @@ isRetainer( StgClosure *c ) case CATCH_FRAME: case UNDERFLOW_FRAME: case STOP_FRAME: - case RET_DYN: case RET_BCO: case RET_SMALL: case RET_BIG: @@ -1349,29 +1346,7 @@ retainStack( StgClosure *c, retainer c_child_r, // and don't forget to follow the SRT goto follow_srt; - // Dynamic bitmap: the mask is stored on the stack - case RET_DYN: { - StgWord dyn; - dyn = ((StgRetDyn *)p)->liveness; - - // traverse the bitmap first - bitmap = RET_DYN_LIVENESS(dyn); - p = (P_)&((StgRetDyn *)p)->payload[0]; - size = RET_DYN_BITMAP_SIZE; - p = retain_small_bitmap(p, size, bitmap, c, c_child_r); - - // skip over the non-ptr words - p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE; - - // follow the ptr words - for (size = RET_DYN_PTRS(dyn); size > 0; size--) { - retainClosure((StgClosure *)*p, c, c_child_r); - p++; - } - continue; - } - - case RET_FUN: { + case RET_FUN: { StgRetFun *ret_fun = (StgRetFun *)p; StgFunInfoTable *fun_info; diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index b99126187a..2985982d64 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -22,30 +22,36 @@ import LeaveCriticalSection; Stack underflow ------------------------------------------------------------------------- */ -INFO_TABLE_RET (stg_stack_underflow_frame, UNDERFLOW_FRAME, P_ unused) +INFO_TABLE_RET (stg_stack_underflow_frame, UNDERFLOW_FRAME, + W_ info_ptr, P_ unused) + /* no args => explicit stack */ { W_ new_tso; W_ ret_off; + SAVE_STGREGS + SAVE_THREAD_STATE(); ("ptr" ret_off) = foreign "C" threadStackUnderflow(MyCapability(), CurrentTSO); LOAD_THREAD_STATE(); - jump %ENTRY_CODE(Sp(ret_off)); + RESTORE_STGREGS + + jump %ENTRY_CODE(Sp(ret_off)) [*]; // NB. all registers live! } /* ---------------------------------------------------------------------------- Restore a saved cost centre ------------------------------------------------------------------------- */ -INFO_TABLE_RET (stg_restore_cccs, RET_SMALL, W_ cccs) +INFO_TABLE_RET (stg_restore_cccs, RET_SMALL, W_ info_ptr, W_ cccs) { #if defined(PROFILING) CCCS = Sp(1); #endif Sp_adj(2); - jump %ENTRY_CODE(Sp(0)); + jump %ENTRY_CODE(Sp(0)) [*]; // NB. all registers live! } /* ---------------------------------------------------------------------------- @@ -53,10 +59,9 @@ INFO_TABLE_RET (stg_restore_cccs, RET_SMALL, W_ cccs) ------------------------------------------------------------------------- */ /* 9 bits of return code for constructors created by the interpreter. */ -stg_interp_constr_entry +stg_interp_constr_entry (P_ ret) { - /* R1 points at the constructor */ - jump %ENTRY_CODE(Sp(0)); + return (ret); } /* Some info tables to be used when compiled code returns a value to @@ -94,76 +99,83 @@ stg_interp_constr_entry */ INFO_TABLE_RET( stg_ctoi_R1p, RET_BCO) + /* explicit stack */ { Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_enter_info; - jump stg_yield_to_interpreter; + jump stg_yield_to_interpreter []; } /* * When the returned value is a pointer, but unlifted, in R1 ... */ INFO_TABLE_RET( stg_ctoi_R1unpt, RET_BCO ) + /* explicit stack */ { Sp_adj(-2); Sp(1) = R1; - Sp(0) = stg_gc_unpt_r1_info; - jump stg_yield_to_interpreter; + Sp(0) = stg_ret_p_info; + jump stg_yield_to_interpreter []; } /* * When the returned value is a non-pointer in R1 ... */ INFO_TABLE_RET( stg_ctoi_R1n, RET_BCO ) + /* explicit stack */ { Sp_adj(-2); Sp(1) = R1; - Sp(0) = stg_gc_unbx_r1_info; - jump stg_yield_to_interpreter; + Sp(0) = stg_ret_p_info; + jump stg_yield_to_interpreter []; } /* * When the returned value is in F1 */ INFO_TABLE_RET( stg_ctoi_F1, RET_BCO ) + /* explicit stack */ { Sp_adj(-2); F_[Sp + WDS(1)] = F1; - Sp(0) = stg_gc_f1_info; - jump stg_yield_to_interpreter; + Sp(0) = stg_ret_f_info; + jump stg_yield_to_interpreter []; } /* * When the returned value is in D1 */ INFO_TABLE_RET( stg_ctoi_D1, RET_BCO ) + /* explicit stack */ { Sp_adj(-1) - SIZEOF_DOUBLE; D_[Sp + WDS(1)] = D1; - Sp(0) = stg_gc_d1_info; - jump stg_yield_to_interpreter; + Sp(0) = stg_ret_d_info; + jump stg_yield_to_interpreter []; } /* * When the returned value is in L1 */ INFO_TABLE_RET( stg_ctoi_L1, RET_BCO ) + /* explicit stack */ { Sp_adj(-1) - 8; L_[Sp + WDS(1)] = L1; - Sp(0) = stg_gc_l1_info; - jump stg_yield_to_interpreter; + Sp(0) = stg_ret_l_info; + jump stg_yield_to_interpreter []; } /* * When the returned value is a void */ INFO_TABLE_RET( stg_ctoi_V, RET_BCO ) + /* explicit stack */ { Sp_adj(-1); - Sp(0) = stg_gc_void_info; - jump stg_yield_to_interpreter; + Sp(0) = stg_ret_v_info; + jump stg_yield_to_interpreter []; } /* @@ -172,9 +184,10 @@ INFO_TABLE_RET( stg_ctoi_V, RET_BCO ) * stack. */ INFO_TABLE_RET( stg_apply_interp, RET_BCO ) + /* explicit stack */ { /* Just in case we end up in here... (we shouldn't) */ - jump stg_yield_to_interpreter; + jump stg_yield_to_interpreter []; } /* ---------------------------------------------------------------------------- @@ -182,12 +195,13 @@ INFO_TABLE_RET( stg_apply_interp, RET_BCO ) ------------------------------------------------------------------------- */ INFO_TABLE_FUN( stg_BCO, 4, 0, BCO, "BCO", "BCO", ARG_BCO ) + /* explicit stack */ { /* entering a BCO means "apply it", same as a function */ Sp_adj(-2); Sp(1) = R1; Sp(0) = stg_apply_interp_info; - jump stg_yield_to_interpreter; + jump stg_yield_to_interpreter []; } /* ---------------------------------------------------------------------------- @@ -201,30 +215,48 @@ INFO_TABLE_FUN( stg_BCO, 4, 0, BCO, "BCO", "BCO", ARG_BCO ) ------------------------------------------------------------------------- */ INFO_TABLE(stg_IND,1,0,IND,"IND","IND") +#if 0 +/* + This version in high-level cmm generates slightly less good code + than the low-level version below it. (ToDo) +*/ + (P_ node) +{ + TICK_ENT_DYN_IND(); /* tick */ + node = UNTAG(StgInd_indirectee(node)); + TICK_ENT_VIA_NODE(); + jump %GET_ENTRY(node) (node); +} +#else + /* explicit stack */ { TICK_ENT_DYN_IND(); /* tick */ R1 = UNTAG(StgInd_indirectee(R1)); TICK_ENT_VIA_NODE(); - jump %GET_ENTRY(R1); + jump %GET_ENTRY(R1) [R1]; } +#endif INFO_TABLE(stg_IND_direct,1,0,IND,"IND","IND") + (P_ node) { TICK_ENT_DYN_IND(); /* tick */ - R1 = StgInd_indirectee(R1); + node = StgInd_indirectee(node); TICK_ENT_VIA_NODE(); - jump %ENTRY_CODE(Sp(0)); + jump %ENTRY_CODE(Sp(0)) (node); } INFO_TABLE(stg_IND_STATIC,1,0,IND_STATIC,"IND_STATIC","IND_STATIC") + /* explicit stack */ { TICK_ENT_STATIC_IND(); /* tick */ R1 = UNTAG(StgInd_indirectee(R1)); TICK_ENT_VIA_NODE(); - jump %GET_ENTRY(R1); + jump %GET_ENTRY(R1) [R1]; } INFO_TABLE(stg_IND_PERM,1,0,IND_PERM,"IND_PERM","IND_PERM") + /* explicit stack */ { /* Don't add INDs to granularity cost */ @@ -259,7 +291,7 @@ INFO_TABLE(stg_IND_PERM,1,0,IND_PERM,"IND_PERM","IND_PERM") TICK_ENT_VIA_NODE(); #endif - jump %GET_ENTRY(R1); + jump %GET_ENTRY(R1) [R1]; } /* ---------------------------------------------------------------------------- @@ -272,16 +304,17 @@ INFO_TABLE(stg_IND_PERM,1,0,IND_PERM,"IND_PERM","IND_PERM") ------------------------------------------------------------------------- */ INFO_TABLE(stg_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE") + (P_ node) { - W_ r, p, info, bq, msg, owner, bd; + W_ r, info, owner, bd; + P_ p, bq, msg; TICK_ENT_DYN_IND(); /* tick */ retry: - p = StgInd_indirectee(R1); + p = StgInd_indirectee(node); if (GETTAG(p) != 0) { - R1 = p; - jump %ENTRY_CODE(Sp(0)); + return (p); } info = StgHeader_info(p); @@ -296,33 +329,33 @@ retry: info == stg_BLOCKING_QUEUE_CLEAN_info || info == stg_BLOCKING_QUEUE_DIRTY_info) { - ("ptr" msg) = foreign "C" allocate(MyCapability() "ptr", - BYTES_TO_WDS(SIZEOF_MessageBlackHole)) [R1]; + ("ptr" msg) = ccall allocate(MyCapability() "ptr", + BYTES_TO_WDS(SIZEOF_MessageBlackHole)); SET_HDR(msg, stg_MSG_BLACKHOLE_info, CCS_SYSTEM); MessageBlackHole_tso(msg) = CurrentTSO; - MessageBlackHole_bh(msg) = R1; + MessageBlackHole_bh(msg) = node; - (r) = foreign "C" messageBlackHole(MyCapability() "ptr", msg "ptr") [R1]; + (r) = ccall messageBlackHole(MyCapability() "ptr", msg "ptr"); if (r == 0) { goto retry; } else { StgTSO_why_blocked(CurrentTSO) = BlockedOnBlackHole::I16; StgTSO_block_info(CurrentTSO) = msg; - jump stg_block_blackhole; + jump stg_block_blackhole(node); } } else { - R1 = p; - ENTER(); + ENTER(p); } } INFO_TABLE(__stg_EAGER_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE") + (P_ node) { - jump ENTRY_LBL(stg_BLACKHOLE); + jump ENTRY_LBL(stg_BLACKHOLE) (node); } // CAF_BLACKHOLE is allocated when entering a CAF. The reason it is @@ -332,8 +365,9 @@ INFO_TABLE(__stg_EAGER_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE") // evaluation by another thread (a BLACKHOLE). See threadPaused(). // INFO_TABLE(stg_CAF_BLACKHOLE,1,0,BLACKHOLE,"BLACKHOLE","BLACKHOLE") + (P_ node) { - jump ENTRY_LBL(stg_BLACKHOLE); + jump ENTRY_LBL(stg_BLACKHOLE) (node); } INFO_TABLE(stg_BLOCKING_QUEUE_CLEAN,4,0,BLOCKING_QUEUE,"BLOCKING_QUEUE","BLOCKING_QUEUE") @@ -349,6 +383,7 @@ INFO_TABLE(stg_BLOCKING_QUEUE_DIRTY,4,0,BLOCKING_QUEUE,"BLOCKING_QUEUE","BLOCKIN ------------------------------------------------------------------------- */ INFO_TABLE(stg_WHITEHOLE, 0,0, WHITEHOLE, "WHITEHOLE", "WHITEHOLE") + (P_ node) { #if defined(THREADED_RTS) W_ info, i; @@ -356,18 +391,18 @@ INFO_TABLE(stg_WHITEHOLE, 0,0, WHITEHOLE, "WHITEHOLE", "WHITEHOLE") i = 0; loop: // spin until the WHITEHOLE is updated - info = StgHeader_info(R1); + info = StgHeader_info(node); if (info == stg_WHITEHOLE_info) { i = i + 1; if (i == SPIN_COUNT) { i = 0; - foreign "C" yieldThread() [R1]; + ccall yieldThread(); } goto loop; } - jump %ENTRY_CODE(info); + jump %ENTRY_CODE(info) (node); #else - foreign "C" barf("WHITEHOLE object entered!") never returns; + ccall barf("WHITEHOLE object entered!") never returns; #endif } @@ -556,8 +591,9 @@ INFO_TABLE(stg_MUT_VAR_DIRTY, 1, 0, MUT_VAR_DIRTY, "MUT_VAR_DIRTY", "MUT_VAR_DIR * ------------------------------------------------------------------------- */ INFO_TABLE( stg_dummy_ret, 0, 0, CONSTR_NOCAF_STATIC, "DUMMY_RET", "DUMMY_RET") + () { - jump %ENTRY_CODE(Sp(0)); + return (); } CLOSURE(stg_dummy_ret_closure,stg_dummy_ret); diff --git a/rts/StgStartup.cmm b/rts/StgStartup.cmm index 4aace82deb..6793913464 100644 --- a/rts/StgStartup.cmm +++ b/rts/StgStartup.cmm @@ -35,11 +35,9 @@ -------------------------------------------------------------------------- */ INFO_TABLE_RET(stg_stop_thread, STOP_FRAME, -#if defined(PROFILING) - W_ unused, - W_ unused -#endif -) + W_ info_ptr, + PROF_HDR_FIELDS(W_)) +/* no return list: explicit stack layout */ { /* The final exit. @@ -75,7 +73,7 @@ INFO_TABLE_RET(stg_stop_thread, STOP_FRAME, StgRegTable_rRet(BaseReg) = ThreadFinished; R1 = BaseReg; - jump StgReturn; + jump StgReturn [R1]; } /* ----------------------------------------------------------------------------- @@ -87,46 +85,57 @@ INFO_TABLE_RET(stg_stop_thread, STOP_FRAME, the thread's state away nicely. -------------------------------------------------------------------------- */ -stg_returnToStackTop +stg_returnToStackTop /* no args: explicit stack layout */ { LOAD_THREAD_STATE(); CHECK_SENSIBLE_REGS(); - jump %ENTRY_CODE(Sp(0)); + jump %ENTRY_CODE(Sp(0)) []; } -stg_returnToSched +stg_returnToSched /* no args: explicit stack layout */ { + W_ r1; + r1 = R1; // foreign calls may clobber R1 SAVE_THREAD_STATE(); foreign "C" threadPaused(MyCapability() "ptr", CurrentTSO); - jump StgReturn; + R1 = r1; + jump StgReturn [R1]; } // A variant of stg_returnToSched that doesn't call threadPaused() on the // current thread. This is used for switching from compiled execution to the // interpreter, where calling threadPaused() on every switch would be too // expensive. -stg_returnToSchedNotPaused +stg_returnToSchedNotPaused /* no args: explicit stack layout */ { SAVE_THREAD_STATE(); - jump StgReturn; + jump StgReturn [R1]; } // A variant of stg_returnToSched, but instead of returning directly to the // scheduler, we jump to the code fragment pointed to by R2. This lets us // perform some final actions after making the thread safe, such as unlocking // the MVar on which we are about to block in SMP mode. -stg_returnToSchedButFirst +stg_returnToSchedButFirst /* no args: explicit stack layout */ { + W_ r1, r2, r3; + r1 = R1; + r2 = R2; + r3 = R3; SAVE_THREAD_STATE(); + // foreign calls may clobber R1/R2/.., so we save them above foreign "C" threadPaused(MyCapability() "ptr", CurrentTSO); - jump R2; + R1 = r1; + R2 = r2; + R3 = r3; + jump R2 [R1,R3]; } -stg_threadFinished +stg_threadFinished /* no args: explicit stack layout */ { StgRegTable_rRet(BaseReg) = ThreadFinished; R1 = BaseReg; - jump StgReturn; + jump StgReturn [R1]; } /* ----------------------------------------------------------------------------- @@ -143,31 +152,30 @@ stg_threadFinished ------------------------------------------------------------------------- */ -INFO_TABLE_RET(stg_forceIO, RET_SMALL) - +INFO_TABLE_RET(stg_forceIO, RET_SMALL, P_ info_ptr) + return (P_ ret) { - Sp_adj(1); - ENTER(); + ENTER(ret); } /* ----------------------------------------------------------------------------- Special STG entry points for module registration. -------------------------------------------------------------------------- */ -stg_init_finish +stg_init_finish /* no args: explicit stack layout */ { - jump StgReturn; + jump StgReturn []; } /* On entry to stg_init: * init_stack[0] = &stg_init_ret; * init_stack[1] = __stginit_Something; */ -stg_init +stg_init /* no args: explicit stack layout */ { W_ next; Sp = W_[BaseReg + OFFSET_StgRegTable_rSp]; next = W_[Sp]; Sp_adj(1); - jump next; + jump next []; } diff --git a/rts/StgStdThunks.cmm b/rts/StgStdThunks.cmm index 171ab52b96..0b69a9a279 100644 --- a/rts/StgStdThunks.cmm +++ b/rts/StgStdThunks.cmm @@ -11,6 +11,7 @@ * ---------------------------------------------------------------------------*/ #include "Cmm.h" +#include "Updates.h" /* ----------------------------------------------------------------------------- The code for a thunk that simply extracts a field from a @@ -26,17 +27,15 @@ matching. -------------------------------------------------------------------------- */ -#define WITHUPD_FRAME_SIZE (SIZEOF_StgUpdateFrame + SIZEOF_StgHeader) -#define NOUPD_FRAME_SIZE (SIZEOF_StgHeader) - #ifdef PROFILING -#define SAVE_CCCS(fs) StgHeader_ccs(Sp-fs) = CCCS -#define GET_SAVED_CCCS CCCS = StgHeader_ccs(Sp) -#define RET_PARAMS W_ unused1, W_ unused2 +#define RET_FIELDS(w_,info_ptr,ccs) \ + w_ info_ptr, \ + w_ ccs +#define GET_SAVED_CCCS CCCS = ccs #else -#define SAVE_CCCS(fs) /* empty */ +#define RET_FIELDS(w_,info_ptr,ccs) \ + w_ info_ptr #define GET_SAVED_CCCS /* empty */ -#define RET_PARAMS #endif /* @@ -56,42 +55,34 @@ // When profiling, we cannot shortcut by checking the tag, // because LDV profiling relies on entering closures to mark them as // "used". -#define SEL_ENTER(offset) \ - R1 = UNTAG(R1); \ - jump %GET_ENTRY(R1); +#define NEED_EVAL(__x__) 1 #else -#define SEL_ENTER(offset) \ - if (GETTAG(R1) != 0) { \ - jump RET_LBL(stg_sel_ret_##offset##_upd); \ - } \ - jump %GET_ENTRY(R1); +#define NEED_EVAL(__x__) GETTAG(__x__) == 0 #endif #define SELECTOR_CODE_UPD(offset) \ - INFO_TABLE_RET(stg_sel_ret_##offset##_upd, RET_SMALL, RET_PARAMS) \ - { \ - R1 = StgClosure_payload(UNTAG(R1),offset); \ - GET_SAVED_CCCS; \ - Sp = Sp + SIZEOF_StgHeader; \ - ENTER(); \ - } \ - \ INFO_TABLE_SELECTOR(stg_sel_##offset##_upd, offset, THUNK_SELECTOR, "stg_sel_upd", "stg_sel_upd") \ - { \ - TICK_ENT_DYN_THK(); \ - STK_CHK_NP(WITHUPD_FRAME_SIZE); \ - UPD_BH_UPDATABLE(); \ - LDV_ENTER(R1); \ - PUSH_UPD_FRAME(Sp - SIZEOF_StgUpdateFrame, R1); \ - ENTER_CCS_THUNK(R1); \ - SAVE_CCCS(WITHUPD_FRAME_SIZE); \ - W_[Sp-WITHUPD_FRAME_SIZE] = stg_sel_ret_##offset##_upd_info; \ - Sp = Sp - WITHUPD_FRAME_SIZE; \ - R1 = StgThunk_payload(R1,0); \ - SEL_ENTER(offset); \ + (P_ node) \ + { \ + P_ selectee, field; \ + TICK_ENT_DYN_THK(); \ + STK_CHK_NP(node); \ + UPD_BH_UPDATABLE(node); \ + LDV_ENTER(node); \ + ENTER_CCS_THUNK(node); \ + selectee = StgThunk_payload(node,0); \ + push (UPDATE_FRAME_FIELDS(,,stg_upd_frame_info,node)) { \ + if (NEED_EVAL(selectee)) { \ + (P_ constr) = call %GET_ENTRY(selectee) (selectee); \ + selectee = constr; \ + } \ + field = StgClosure_payload(UNTAG(selectee),offset); \ + jump stg_ap_0_fast(field); \ + } \ } - /* NOTE: no need to ENTER() here, we know the closure cannot evaluate to a function, - because we're going to do a field selection on the result. */ + /* NOTE: no need to ENTER() here, we know the closure cannot + evaluate to a function, because we're going to do a field + selection on the result. */ SELECTOR_CODE_UPD(0) SELECTOR_CODE_UPD(1) @@ -110,33 +101,27 @@ SELECTOR_CODE_UPD(13) SELECTOR_CODE_UPD(14) SELECTOR_CODE_UPD(15) -#define SELECTOR_CODE_NOUPD(offset) \ - INFO_TABLE_RET(stg_sel_ret_##offset##_noupd, RET_SMALL, RET_PARAMS) \ - { \ - R1 = StgClosure_payload(UNTAG(R1),offset); \ - GET_SAVED_CCCS; \ - Sp = Sp + SIZEOF_StgHeader; \ - ENTER(); \ - } \ - \ - INFO_TABLE_SELECTOR(stg_sel_##offset##_noupd, offset, THUNK_SELECTOR, "stg_sel_noupd", "stg_sel_noupd")\ - { \ - TICK_ENT_DYN_THK(); \ - STK_CHK_NP(NOUPD_FRAME_SIZE); \ - UPD_BH_SINGLE_ENTRY(); \ - LDV_ENTER(R1); \ - TICK_UPDF_OMITTED(); \ - ENTER_CCS_THUNK(R1); \ - SAVE_CCCS(NOUPD_FRAME_SIZE); \ - W_[Sp-NOUPD_FRAME_SIZE] = stg_sel_ret_##offset##_noupd_info; \ - Sp = Sp - NOUPD_FRAME_SIZE; \ - R1 = StgThunk_payload(R1,0); \ - if (GETTAG(R1) != 0) { \ - jump RET_LBL(stg_sel_ret_##offset##_noupd); \ + +#define SELECTOR_CODE_NOUPD(offset) \ + INFO_TABLE_SELECTOR(stg_sel_##offset##_noupd, offset, THUNK_SELECTOR, "stg_sel_noupd", "stg_sel_noupd") \ + (P_ node) \ + { \ + P_ selectee, field; \ + TICK_ENT_DYN_THK(); \ + STK_CHK_NP(node); \ + UPD_BH_UPDATABLE(node); \ + LDV_ENTER(node); \ + ENTER_CCS_THUNK(node); \ + selectee = StgThunk_payload(node,0); \ + if (NEED_EVAL(selectee)) { \ + (P_ constr) = call %GET_ENTRY(selectee) (selectee); \ + selectee = constr; \ } \ - jump %GET_ENTRY(R1); \ + field = StgClosure_payload(UNTAG(selectee),offset); \ + jump stg_ap_0_fast(field); \ } + SELECTOR_CODE_NOUPD(0) SELECTOR_CODE_NOUPD(1) SELECTOR_CODE_NOUPD(2) @@ -173,131 +158,120 @@ SELECTOR_CODE_NOUPD(15) */ INFO_TABLE(stg_ap_1_upd,1,0,THUNK_1_0,"stg_ap_1_upd_info","stg_ap_1_upd_info") + (P_ node) { - TICK_ENT_DYN_THK(); - STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(1)); - UPD_BH_UPDATABLE(); - LDV_ENTER(R1); - ENTER_CCS_THUNK(R1); - PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1); - R1 = StgThunk_payload(R1,0); - Sp = Sp - SIZEOF_StgUpdateFrame; - jump stg_ap_0_fast; + TICK_ENT_DYN_THK(); + STK_CHK_NP(node); + UPD_BH_UPDATABLE(node); + LDV_ENTER(node); + ENTER_CCS_THUNK(node); + push (UPDATE_FRAME_FIELDS(,,stg_upd_frame_info, node)) { + jump stg_ap_0_fast + (StgThunk_payload(node,0)); + } } INFO_TABLE(stg_ap_2_upd,2,0,THUNK_2_0,"stg_ap_2_upd_info","stg_ap_2_upd_info") + (P_ node) { - TICK_ENT_DYN_THK(); - STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(2)); - UPD_BH_UPDATABLE(); - LDV_ENTER(R1); - ENTER_CCS_THUNK(R1); - PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,1); - R1 = StgThunk_payload(R1,0); - Sp = Sp - SIZEOF_StgUpdateFrame - WDS(1); - Sp_adj(-1); // for stg_ap_*_ret - TICK_UNKNOWN_CALL(); - TICK_SLOW_CALL_p(); - jump RET_LBL(stg_ap_p); + TICK_ENT_DYN_THK(); + STK_CHK_NP(node); + UPD_BH_UPDATABLE(node); + LDV_ENTER(node); + ENTER_CCS_THUNK(node); + push (UPDATE_FRAME_FIELDS(,,stg_upd_frame_info, node)) { + jump stg_ap_p_fast + (StgThunk_payload(node,0), + StgThunk_payload(node,1)); + } } INFO_TABLE(stg_ap_3_upd,3,0,THUNK,"stg_ap_3_upd_info","stg_ap_3_upd_info") + (P_ node) { - TICK_ENT_DYN_THK(); - STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(3)); - UPD_BH_UPDATABLE(); - LDV_ENTER(R1); - ENTER_CCS_THUNK(R1); - PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,2); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,1); - R1 = StgThunk_payload(R1,0); - Sp = Sp - SIZEOF_StgUpdateFrame - WDS(2); - Sp_adj(-1); // for stg_ap_*_ret - TICK_UNKNOWN_CALL(); - TICK_SLOW_CALL_pp(); - jump RET_LBL(stg_ap_pp); + TICK_ENT_DYN_THK(); + STK_CHK_NP(node); + UPD_BH_UPDATABLE(node); + LDV_ENTER(node); + ENTER_CCS_THUNK(node); + push (UPDATE_FRAME_FIELDS(,,stg_upd_frame_info, node)) { + jump stg_ap_pp_fast + (StgThunk_payload(node,0), + StgThunk_payload(node,1), + StgThunk_payload(node,2)); + } } INFO_TABLE(stg_ap_4_upd,4,0,THUNK,"stg_ap_4_upd_info","stg_ap_4_upd_info") + (P_ node) { - TICK_ENT_DYN_THK(); - STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(4)); - UPD_BH_UPDATABLE(); - LDV_ENTER(R1); - ENTER_CCS_THUNK(R1); - PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,3); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,2); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,1); - R1 = StgThunk_payload(R1,0); - Sp = Sp - SIZEOF_StgUpdateFrame - WDS(3); - Sp_adj(-1); // for stg_ap_*_ret - TICK_UNKNOWN_CALL(); - TICK_SLOW_CALL_ppp(); - jump RET_LBL(stg_ap_ppp); + TICK_ENT_DYN_THK(); + STK_CHK_NP(node); + UPD_BH_UPDATABLE(node); + LDV_ENTER(node); + ENTER_CCS_THUNK(node); + push (UPDATE_FRAME_FIELDS(,,stg_upd_frame_info, node)) { + jump stg_ap_ppp_fast + (StgThunk_payload(node,0), + StgThunk_payload(node,1), + StgThunk_payload(node,2), + StgThunk_payload(node,3)); + } } INFO_TABLE(stg_ap_5_upd,5,0,THUNK,"stg_ap_5_upd_info","stg_ap_5_upd_info") + (P_ node) { - TICK_ENT_DYN_THK(); - STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(5)); - UPD_BH_UPDATABLE(); - LDV_ENTER(R1); - ENTER_CCS_THUNK(R1); - PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,4); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,3); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,2); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(4)] = StgThunk_payload(R1,1); - R1 = StgThunk_payload(R1,0); - Sp = Sp - SIZEOF_StgUpdateFrame - WDS(4); - Sp_adj(-1); // for stg_ap_*_ret - TICK_UNKNOWN_CALL(); - TICK_SLOW_CALL_pppp(); - jump RET_LBL(stg_ap_pppp); + TICK_ENT_DYN_THK(); + STK_CHK_NP(node); + UPD_BH_UPDATABLE(node); + LDV_ENTER(node); + ENTER_CCS_THUNK(node); + push (UPDATE_FRAME_FIELDS(,,stg_upd_frame_info, node)) { + jump stg_ap_pppp_fast + (StgThunk_payload(node,0), + StgThunk_payload(node,1), + StgThunk_payload(node,2), + StgThunk_payload(node,3), + StgThunk_payload(node,4)); + } } INFO_TABLE(stg_ap_6_upd,6,0,THUNK,"stg_ap_6_upd_info","stg_ap_6_upd_info") + (P_ node) { - TICK_ENT_DYN_THK(); - STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(6)); - UPD_BH_UPDATABLE(); - LDV_ENTER(R1); - ENTER_CCS_THUNK(R1); - PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,5); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,4); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,3); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(4)] = StgThunk_payload(R1,2); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(5)] = StgThunk_payload(R1,1); - R1 = StgThunk_payload(R1,0); - Sp = Sp - SIZEOF_StgUpdateFrame - WDS(5); - Sp_adj(-1); // for stg_ap_*_ret - TICK_UNKNOWN_CALL(); - TICK_SLOW_CALL_ppppp(); - jump RET_LBL(stg_ap_ppppp); + TICK_ENT_DYN_THK(); + STK_CHK_NP(node); + UPD_BH_UPDATABLE(node); + LDV_ENTER(node); + ENTER_CCS_THUNK(node); + push (UPDATE_FRAME_FIELDS(,,stg_upd_frame_info, node)) { + jump stg_ap_ppppp_fast + (StgThunk_payload(node,0), + StgThunk_payload(node,1), + StgThunk_payload(node,2), + StgThunk_payload(node,3), + StgThunk_payload(node,4), + StgThunk_payload(node,5)); + } } INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info") + (P_ node) { - TICK_ENT_DYN_THK(); - STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(7)); - UPD_BH_UPDATABLE(); - LDV_ENTER(R1); - ENTER_CCS_THUNK(R1); - PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,6); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,5); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,4); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(4)] = StgThunk_payload(R1,3); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(5)] = StgThunk_payload(R1,2); - W_[Sp-SIZEOF_StgUpdateFrame-WDS(6)] = StgThunk_payload(R1,1); - R1 = StgThunk_payload(R1,0); - Sp = Sp - SIZEOF_StgUpdateFrame - WDS(6); - Sp_adj(-1); // for stg_ap_*_ret - TICK_UNKNOWN_CALL(); - TICK_SLOW_CALL_pppppp(); - jump RET_LBL(stg_ap_pppppp); + TICK_ENT_DYN_THK(); + STK_CHK_NP(node); + UPD_BH_UPDATABLE(node); + LDV_ENTER(node); + ENTER_CCS_THUNK(node); + push (UPDATE_FRAME_FIELDS(,,stg_upd_frame_info, node)) { + jump stg_ap_pppppp_fast + (StgThunk_payload(node,0), + StgThunk_payload(node,1), + StgThunk_payload(node,2), + StgThunk_payload(node,3), + StgThunk_payload(node,4), + StgThunk_payload(node,5), + StgThunk_payload(node,6)); + } } diff --git a/rts/Updates.cmm b/rts/Updates.cmm index 44fbc0e194..2bc21ec332 100644 --- a/rts/Updates.cmm +++ b/rts/Updates.cmm @@ -16,85 +16,72 @@ #include "Updates.h" -#if defined(PROFILING) -#define UPD_FRAME_PARAMS W_ unused1, W_ unused2, P_ unused3 -#else -#define UPD_FRAME_PARAMS P_ unused1 -#endif - -/* The update fragment has been tuned so as to generate good - code with gcc, which accounts for some of the strangeness in the - way it is written. - - In particular, the JMP_(ret) bit is passed down and pinned on the - end of each branch (there end up being two major branches in the - code), since we don't mind duplicating this jump. -*/ - -/* on entry to the update code - (1) R1 points to the closure being returned - (2) Sp points to the update frame -*/ - -INFO_TABLE_RET( stg_upd_frame, UPDATE_FRAME, UPD_FRAME_PARAMS) +/* + * The update code is PERFORMANCE CRITICAL, if you make any changes + * here make sure you eyeball the assembly and check that the fast + * path (update in generation 0) is optimal. + * + * The return(ret) bit is passed down and pinned on the end of each + * branch (there end up being two major branches in the code), since + * we don't mind duplicating this jump. + */ +INFO_TABLE_RET ( stg_upd_frame, UPDATE_FRAME, + UPDATE_FRAME_FIELDS(W_,P_,info_ptr,updatee) ) + return (P_ ret) /* the closure being returned */ { - W_ updatee; - - updatee = StgUpdateFrame_updatee(Sp); - - /* remove the update frame from the stack */ - Sp = Sp + SIZEOF_StgUpdateFrame; - /* ToDo: it might be a PAP, so we should check... */ TICK_UPD_CON_IN_NEW(sizeW_fromITBL(%GET_STD_INFO(updatee))); - - updateWithIndirection(updatee, - R1, - jump %ENTRY_CODE(Sp(0)) [R1]); -} + updateWithIndirection(updatee, ret, return (ret)); +} -INFO_TABLE_RET( stg_marked_upd_frame, UPDATE_FRAME, UPD_FRAME_PARAMS) +/* + * An update frame where the updatee has been replaced by a BLACKHOLE + * closure by threadPaused. We may have threads to wake up, and we + * also have to check whether the blackhole has been updated by + * another thread in the meantime. + */ +INFO_TABLE_RET ( stg_marked_upd_frame, UPDATE_FRAME, + UPDATE_FRAME_FIELDS(W_,P_,info_ptr,updatee) ) + return (P_ ret) /* the closure being returned */ { - W_ updatee, v, i, tso, link; + W_ v, i, tso, link; // we know the closure is a BLACKHOLE - updatee = StgUpdateFrame_updatee(Sp); v = StgInd_indirectee(updatee); - // remove the update frame from the stack - Sp = Sp + SIZEOF_StgUpdateFrame; - if (GETTAG(v) != 0) { // updated by someone else: discard our value and use the // other one to increase sharing, but check the blocking // queues to see if any threads were waiting on this BLACKHOLE. - R1 = v; - foreign "C" checkBlockingQueues(MyCapability() "ptr", - CurrentTSO "ptr") [R1]; - jump %ENTRY_CODE(Sp(0)) [R1]; + ccall checkBlockingQueues(MyCapability() "ptr", CurrentTSO "ptr"); + return (v); } // common case: it is still our BLACKHOLE if (v == CurrentTSO) { - updateWithIndirection(updatee, - R1, - jump %ENTRY_CODE(Sp(0)) [R1]); + updateWithIndirection(updatee, ret, return (ret)); } // The other cases are all handled by the generic code - foreign "C" updateThunk (MyCapability() "ptr", CurrentTSO "ptr", - updatee "ptr", R1 "ptr") [R1]; + ccall updateThunk (MyCapability() "ptr", CurrentTSO "ptr", + updatee "ptr", ret "ptr"); - jump %ENTRY_CODE(Sp(0)) [R1]; + return (ret); } -// Special update frame code for CAFs and eager-blackholed thunks: it -// knows how to update blackholes, but is distinct from -// stg_marked_upd_frame so that lazy blackholing won't treat it as the -// high watermark. -INFO_TABLE_RET (stg_bh_upd_frame, UPDATE_FRAME, UPD_FRAME_PARAMS) +/* + * Special update frame code for CAFs and eager-blackholed thunks: it + * knows how to update blackholes, but is distinct from + * stg_marked_upd_frame so that lazy blackholing won't treat it as the + * high watermark. + */ +INFO_TABLE_RET ( stg_bh_upd_frame, UPDATE_FRAME, + UPDATE_FRAME_FIELDS(W_,P_,info_ptr,updatee) ) + return (P_ ret) /* the closure being returned */ { - jump RET_LBL(stg_marked_upd_frame) [R1]; + // This all compiles away to a single jump instruction (sigh) + jump RET_LBL(stg_marked_upd_frame) + ( UPDATE_FRAME_FIELDS(,,info_ptr,updatee) ) + (ret); } - diff --git a/rts/Updates.h b/rts/Updates.h index 954f02afe1..0205e6e763 100644 --- a/rts/Updates.h +++ b/rts/Updates.h @@ -24,29 +24,34 @@ * field. So, we call LDV_RECORD_CREATE(). */ -/* We have two versions of this macro (sadly), one for use in C-- code, +/* + * We have two versions of this macro (sadly), one for use in C-- code, * and the other for C. * * The and_then argument is a performance hack so that we can paste in * the continuation code directly. It helps shave a couple of * instructions off the common case in the update code, which is * worthwhile (the update code is often part of the inner loop). - * (except that gcc now appears to common up this code again and - * invert the optimisation. Grrrr --SDM). */ #ifdef CMINUSMINUS -#define updateWithIndirection(p1, p2, and_then) \ +#define UPDATE_FRAME_FIELDS(w_,p_,info_ptr,updatee) \ + w_ info_ptr, \ + PROF_HDR_FIELDS(w_) \ + p_ updatee + + +#define updateWithIndirection(p1, p2, and_then) \ W_ bd; \ \ OVERWRITING_CLOSURE(p1); \ StgInd_indirectee(p1) = p2; \ - prim %write_barrier() []; \ + prim %write_barrier(); \ SET_INFO(p1, stg_BLACKHOLE_info); \ LDV_RECORD_CREATE(p1); \ bd = Bdescr(p1); \ if (bdescr_gen_no(bd) != 0 :: bits16) { \ - recordMutableCap(p1, TO_W_(bdescr_gen_no(bd)), R1); \ + recordMutableCap(p1, TO_W_(bdescr_gen_no(bd))); \ TICK_UPD_OLD_IND(); \ and_then; \ } else { \ diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c index c97e168433..34111f9206 100644 --- a/rts/sm/Compact.c +++ b/rts/sm/Compact.c @@ -301,37 +301,7 @@ thread_stack(StgPtr p, StgPtr stack_end) switch (info->i.type) { - // Dynamic bitmap: the mask is stored on the stack - case RET_DYN: - { - StgWord dyn; - dyn = ((StgRetDyn *)p)->liveness; - - // traverse the bitmap first - bitmap = RET_DYN_LIVENESS(dyn); - p = (P_)&((StgRetDyn *)p)->payload[0]; - size = RET_DYN_BITMAP_SIZE; - while (size > 0) { - if ((bitmap & 1) == 0) { - thread((StgClosure **)p); - } - p++; - bitmap = bitmap >> 1; - size--; - } - - // skip over the non-ptr words - p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE; - - // follow the ptr words - for (size = RET_DYN_PTRS(dyn); size > 0; size--) { - thread((StgClosure **)p); - p++; - } - continue; - } - - // small bitmap (<= 32 entries, or 64 on a 64-bit machine) + // small bitmap (<= 32 entries, or 64 on a 64-bit machine) case CATCH_RETRY_FRAME: case CATCH_STM_FRAME: case ATOMICALLY_FRAME: diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 8be393b4bc..0ac9e2623a 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -670,7 +670,6 @@ loop: case RET_BCO: case RET_SMALL: case RET_BIG: - case RET_DYN: case UPDATE_FRAME: case UNDERFLOW_FRAME: case STOP_FRAME: diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 5c7fb8aa76..6237662720 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -105,32 +105,6 @@ checkStackFrame( StgPtr c ) /* All activation records have 'bitmap' style layout info. */ switch (info->i.type) { - case RET_DYN: /* Dynamic bitmap: the mask is stored on the stack */ - { - StgWord dyn; - StgPtr p; - StgRetDyn* r; - - r = (StgRetDyn *)c; - dyn = r->liveness; - - p = (P_)(r->payload); - checkSmallBitmap(p,RET_DYN_LIVENESS(r->liveness),RET_DYN_BITMAP_SIZE); - p += RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE; - - // skip over the non-pointers - p += RET_DYN_NONPTRS(dyn); - - // follow the ptr words - for (size = RET_DYN_PTRS(dyn); size > 0; size--) { - checkClosureShallow((StgClosure *)*p); - p++; - } - - return sizeofW(StgRetDyn) + RET_DYN_BITMAP_SIZE + - RET_DYN_NONPTR_REGS_SIZE + - RET_DYN_NONPTRS(dyn) + RET_DYN_PTRS(dyn); - } case UPDATE_FRAME: ASSERT(LOOKS_LIKE_CLOSURE_PTR(((StgUpdateFrame*)c)->updatee)); @@ -381,7 +355,6 @@ checkClosure( StgClosure* p ) case RET_BCO: case RET_SMALL: case RET_BIG: - case RET_DYN: case UPDATE_FRAME: case UNDERFLOW_FRAME: case STOP_FRAME: diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index cbdf01b720..668b95da6b 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -1685,32 +1685,6 @@ scavenge_stack(StgPtr p, StgPtr stack_end) goto follow_srt; } - // Dynamic bitmap: the mask is stored on the stack, and - // there are a number of non-pointers followed by a number - // of pointers above the bitmapped area. (see StgMacros.h, - // HEAP_CHK_GEN). - case RET_DYN: - { - StgWord dyn; - dyn = ((StgRetDyn *)p)->liveness; - - // traverse the bitmap first - bitmap = RET_DYN_LIVENESS(dyn); - p = (P_)&((StgRetDyn *)p)->payload[0]; - size = RET_DYN_BITMAP_SIZE; - p = scavenge_small_bitmap(p, size, bitmap); - - // skip over the non-ptr words - p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE; - - // follow the ptr words - for (size = RET_DYN_PTRS(dyn); size > 0; size--) { - evacuate((StgClosure **)p); - p++; - } - continue; - } - case RET_FUN: { StgRetFun *ret_fun = (StgRetFun *)p; diff --git a/utils/genapply/GenApply.hs b/utils/genapply/GenApply.hs index 18a2f85a12..e859184c59 100644 --- a/utils/genapply/GenApply.hs +++ b/utils/genapply/GenApply.hs @@ -232,7 +232,7 @@ genMkPAP regstatus macro jump ticker disamb if is_fun_case then mb_tag_node arity else empty, if overflow_regs then text "jump_SAVE_CCCS" <> parens (text jump) <> semi - else text "jump " <> text jump <> semi + else text "jump " <> text jump <+> text "[*]" <> semi ]) $$ text "}" @@ -334,7 +334,7 @@ genMkPAP regstatus macro jump ticker disamb then text "R2 = " <> fun_info_label <> semi else empty, if is_fun_case then mb_tag_node n_args else empty, - text "jump " <> text jump <> semi + text "jump " <> text jump <+> text "[*]" <> semi ]) -- The LARGER ARITY cases: @@ -416,7 +416,7 @@ enterFastPathHelper tag regstatus no_load_regs args_in_regs args = reg_doc, text " Sp_adj(" <> int sp' <> text ");", -- enter, but adjust offset with tag - text " jump " <> text "%GET_ENTRY(R1-" <> int tag <> text ");", + text " jump " <> text "%GET_ENTRY(R1-" <> int tag <> text ") [*];", text "}" ] -- I don't totally understand this code, I copied it from @@ -478,7 +478,7 @@ genApply regstatus args = in vcat [ text "INFO_TABLE_RET(" <> mkApplyName args <> text ", " <> - text "RET_SMALL, " <> (cat $ zipWith formalParam args [1..]) <> + text "RET_SMALL, W_ info_ptr, " <> (cat $ zipWith formalParam args [1..]) <> text ")\n{", nest 4 (vcat [ text "W_ info;", @@ -701,7 +701,7 @@ genApplyFast regstatus args = nest 4 (vcat [ text "Sp_adj" <> parens (int (-sp_offset)) <> semi, saveRegOffs reg_locs, - text "jump" <+> fun_ret_label <> semi + text "jump" <+> fun_ret_label <+> text "[*]" <> semi ]), char '}' ]), @@ -739,7 +739,7 @@ genStackApply regstatus args = (assign_regs, sp') = loadRegArgs regstatus 0 args body = vcat [assign_regs, text "Sp_adj" <> parens (int sp') <> semi, - text "jump %GET_ENTRY(UNTAG(R1));" + text "jump %GET_ENTRY(UNTAG(R1)) [*];" ] -- ----------------------------------------------------------------------------- @@ -766,7 +766,7 @@ genStackSave regstatus args = text "Sp(2) = R1;", text "Sp(1) =" <+> int stk_args <> semi, text "Sp(0) = stg_gc_fun_info;", - text "jump stg_gc_noregs;" + text "jump stg_gc_noregs [];" ] std_frame_size = 3 -- the std bits of the frame. See StgRetFun in Closures.h, |