summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-07-31 11:19:03 +0100
committerSimon Marlow <marlowsd@gmail.com>2012-08-02 11:56:02 +0100
commit6ede0067a91a0da7f682c3ea1d6df938616e983a (patch)
tree967a755612581b3d578f9879b56353a830cfa4bb
parentd2361423f23c3381b5a7f57c3f9e6c2448cdac80 (diff)
downloadhaskell-6ede0067a91a0da7f682c3ea1d6df938616e983a.tar.gz
Explicitly share some return continuations
Instead of relying on common-block-elimination to share return continuations in the common case (case-alternative heap checks) we do it explicitly. This isn't hard to do, is more robust, and saves some compilation time. Full commentary in Note [sharing continuations].
-rw-r--r--compiler/cmm/CmmPipeline.hs8
-rw-r--r--compiler/cmm/MkGraph.hs27
-rw-r--r--compiler/codeGen/StgCmm.hs5
-rw-r--r--compiler/codeGen/StgCmmBind.hs6
-rw-r--r--compiler/codeGen/StgCmmExpr.hs177
-rw-r--r--compiler/codeGen/StgCmmForeign.hs3
-rw-r--r--compiler/codeGen/StgCmmHeap.hs99
-rw-r--r--compiler/codeGen/StgCmmLayout.hs31
-rw-r--r--compiler/codeGen/StgCmmMonad.hs90
-rw-r--r--compiler/codeGen/StgCmmPrim.hs2
-rw-r--r--compiler/codeGen/StgCmmUtils.hs2
-rw-r--r--compiler/main/DynFlags.hs3
12 files changed, 283 insertions, 170 deletions
diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs
index 6042a08d25..f96e77bda7 100644
--- a/compiler/cmm/CmmPipeline.hs
+++ b/compiler/cmm/CmmPipeline.hs
@@ -60,8 +60,12 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}})
dump Opt_D_dump_cmmz_cfg "Post control-flow optimsations" g
----------- Eliminate common blocks -------------------
- g <- {-# SCC "elimCommonBlocks" #-} return $ elimCommonBlocks g
- dump Opt_D_dump_cmmz_cbe "Post common block elimination" g
+ g <- if dopt Opt_CmmElimCommonBlocks dflags
+ then do g <- {-# SCC "elimCommonBlocks" #-} return $ elimCommonBlocks g
+ dump Opt_D_dump_cmmz_cbe "Post common block elimination" g
+ return g
+ else return g
+
-- Any work storing block Labels must be performed _after_
-- elimCommonBlocks
diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs
index 443fa3a441..4703b47f42 100644
--- a/compiler/cmm/MkGraph.hs
+++ b/compiler/cmm/MkGraph.hs
@@ -8,6 +8,7 @@ module MkGraph
, stackStubExpr
, mkNop, mkAssign, mkStore, mkUnsafeCall, mkFinalCall, mkCallReturnsTo
+ , mkJumpReturnsTo
, mkJump, mkDirectJump, mkForeignJump, mkForeignJumpExtra, mkJumpGC
, mkCbranch, mkSwitch
, mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch
@@ -234,6 +235,17 @@ mkCallReturnsTo f callConv actuals ret_lbl ret_off updfr_off extra_stack = do
updfr_off extra_stack $
toCall f (Just ret_lbl) updfr_off ret_off
+-- Like mkCallReturnsTo, but does not push the return address (it is assumed to be
+-- already on the stack).
+mkJumpReturnsTo :: CmmExpr -> Convention -> [CmmActual]
+ -> BlockId
+ -> ByteOff
+ -> UpdFrameOffset
+ -> CmmAGraph
+mkJumpReturnsTo f callConv actuals ret_lbl ret_off updfr_off = do
+ lastWithArgs JumpRet (Young ret_lbl) callConv actuals updfr_off $
+ toCall f (Just ret_lbl) updfr_off ret_off
+
mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph
mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as
@@ -289,7 +301,7 @@ oneCopyOflowI area (reg, off) (n, ms) =
-- Factoring out the common parts of the copyout functions yielded something
-- more complicated:
-data Transfer = Call | Jump | Ret deriving Eq
+data Transfer = Call | JumpRet | Jump | Ret deriving Eq
copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual]
-> UpdFrameOffset
@@ -321,10 +333,15 @@ copyOutOflow conv transfer area actuals updfr_off
case area of
Young id -> id `seq` -- Generate a store instruction for
-- the return address if making a call
- if transfer == Call then
- ([(CmmLit (CmmBlock id), StackParam init_offset)],
- widthInBytes wordWidth)
- else ([], 0)
+ case transfer of
+ Call ->
+ ([(CmmLit (CmmBlock id), StackParam init_offset)],
+ widthInBytes wordWidth)
+ JumpRet ->
+ ([],
+ widthInBytes wordWidth)
+ _other ->
+ ([], 0)
Old -> ([], updfr_off)
arg_offset = init_offset + extra_stack_off
diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs
index 70892eeb5e..d82b4bc3b1 100644
--- a/compiler/codeGen/StgCmm.hs
+++ b/compiler/codeGen/StgCmm.hs
@@ -244,8 +244,9 @@ cgDataCon data_con
do { _ <- ticky_code
; ldvEnter (CmmReg nodeReg)
; tickyReturnOldCon (length arg_things)
- ; emitReturn [cmmOffsetB (CmmReg nodeReg)
- (tagForCon data_con)] }
+ ; _ <- emitReturn [cmmOffsetB (CmmReg nodeReg)
+ (tagForCon data_con)]
+ ; return () }
-- The case continuation code expects a tagged pointer
arg_reps :: [(PrimRep, UnaryType)]
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index e40c660fdb..a0fcc1ac5d 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -435,7 +435,8 @@ closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details
-- heap check, to reduce live vars over check
; if node_points then load_fvs node lf_info fv_bindings
else return ()
- ; cgExpr body }}
+ ; _ <- cgExpr body
+ ; return () }}
}
-- A function closure pointer may be tagged, so we
@@ -501,7 +502,8 @@ thunkCode cl_info fv_details _cc node arity body
; let lf_info = closureLFInfo cl_info
; fv_bindings <- mapM bind_fv fv_details
; load_fvs node lf_info fv_bindings
- ; cgExpr body }}}
+ ; _ <- cgExpr body
+ ; return () }}}
------------------------------------------------------------------------
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
index 65e2416d2f..95c61082c0 100644
--- a/compiler/codeGen/StgCmmExpr.hs
+++ b/compiler/codeGen/StgCmmExpr.hs
@@ -56,7 +56,7 @@ import UniqSupply
-- cgExpr: the main function
------------------------------------------------------------------------
-cgExpr :: StgExpr -> FCode ()
+cgExpr :: StgExpr -> FCode ReturnKind
cgExpr (StgApp fun args) = cgIdApp fun args
@@ -76,8 +76,9 @@ cgExpr (StgLetNoEscape _ _ binds expr) =
do { us <- newUniqSupply
; let join_id = mkBlockId (uniqFromSupply us)
; cgLneBinds join_id binds
- ; cgExpr expr
- ; emitLabel join_id}
+ ; r <- cgExpr expr
+ ; emitLabel join_id
+ ; return r }
cgExpr (StgCase expr _live_vars _save_vars bndr _srt alt_type alts) =
cgCase expr bndr alt_type alts
@@ -161,7 +162,7 @@ cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
= do { arg_regs <- forkProc $ do
{ restoreCurrentCostCentre cc_slot
; arg_regs <- bindArgsToRegs args
- ; altHeapCheck arg_regs (cgExpr body)
+ ; _ <- altHeapCheck arg_regs (cgExpr body)
-- Using altHeapCheck just reduces
-- instructions to save on stack
; return arg_regs }
@@ -283,7 +284,7 @@ data GcPlan
-- of the case alternative(s) into the upstream check
-------------------------------------
-cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ()
+cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind
cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
| isEnumerationTyCon tycon -- Note [case on bool]
@@ -296,9 +297,10 @@ cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
; emitAssign (CmmLocal tmp_reg)
(tagToClosure tycon tag_expr) }
- ; (mb_deflt, branches) <- cgAlgAltRhss NoGcInAlts Nothing
+ ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
(NonVoid bndr) alts
; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
+ ; return AssignedDirectly
}
where
do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
@@ -369,21 +371,21 @@ cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
; v_info <- getCgIdInfo v
; emitAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info)
; _ <- bindArgsToRegs [NonVoid bndr]
- ; cgAlts NoGcInAlts (NonVoid bndr) alt_type alts }
+ ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
where
reps_compatible = idPrimRep v == idPrimRep bndr
cgCase scrut@(StgApp v []) _ (PrimAlt _) _
= -- fail at run-time, not compile-time
do { mb_cc <- maybeSaveCostCentre True
- ; withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut)
+ ; _ <- withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut)
; restoreCurrentCostCentre mb_cc
; emitComment $ mkFastString "should be unreachable code"
; l <- newLabelC
; emitLabel l
; emit (mkBranch l)
+ ; return AssignedDirectly
}
-
{-
case seq# a s of v
(# s', a' #) -> e
@@ -396,6 +398,7 @@ case a of v
(taking advantage of the fact that the return convention for (# State#, a #)
is the same as the return convention for just 'a')
-}
+
cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
= -- handle seq#, same return convention as vanilla 'a'.
cgCase (StgApp a []) bndr alt_type alts
@@ -406,19 +409,25 @@ cgCase scrut bndr alt_type alts
; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
alt_regs = map idToReg ret_bndrs
simple_scrut = isSimpleScrut scrut alt_type
- gcInAlts | not simple_scrut = True
- | isSingleton alts = False
- | up_hp_usg > 0 = False
- | otherwise = True
- gc_plan = if gcInAlts then GcInAlts alt_regs else NoGcInAlts
+ do_gc | not simple_scrut = True
+ | isSingleton alts = False
+ | up_hp_usg > 0 = False
+ | otherwise = True
+ gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
; mb_cc <- maybeSaveCostCentre simple_scrut
- ; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut)
- ; restoreCurrentCostCentre mb_cc
- -- JD: We need Note: [Better Alt Heap Checks]
+ -- if do_gc then our sequel will be ReturnTo
+ -- - generate code for the sequel now
+ -- - pass info about the sequel to cgAlts for use in the heap check
+ -- else sequel will be AssignTo
+
+ ; ret_kind <- withSequel (AssignTo alt_regs False) (cgExpr scrut)
+ ; restoreCurrentCostCentre mb_cc
; _ <- bindArgsToRegs ret_bndrs
- ; cgAlts gc_plan (NonVoid bndr) alt_type alts }
+ ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
+ }
+
-----------------
maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
@@ -465,17 +474,18 @@ chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
-- UbxTupALt has only one alternative
-------------------------------------
-cgAlts :: GcPlan -> NonVoid Id -> AltType -> [StgAlt] -> FCode ()
+cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt]
+ -> FCode ReturnKind
-- At this point the result of the case are in the binders
cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)]
- = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs)
-
+ = maybeAltHeapCheck gc_plan (cgExpr rhs)
+
cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)]
- = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs)
+ = maybeAltHeapCheck gc_plan (cgExpr rhs)
-- Here bndrs are *already* in scope, so don't rebind them
cgAlts gc_plan bndr (PrimAlt _) alts
- = do { tagged_cmms <- cgAltRhss gc_plan Nothing bndr alts
+ = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
; let bndr_reg = CmmLocal (idToReg bndr)
(DEFAULT,deflt) = head tagged_cmms
@@ -484,25 +494,23 @@ cgAlts gc_plan bndr (PrimAlt _) alts
tagged_cmms' = [(lit,code)
| (LitAlt lit, code) <- tagged_cmms]
- ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt }
+ ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
+ ; return AssignedDirectly }
cgAlts gc_plan bndr (AlgAlt tycon) alts
- = do { retry_lbl <- newLabelC
- ; emitLabel retry_lbl -- Note [alg-alt heap checks]
-
- ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan (Just retry_lbl)
- bndr alts
+ = do { (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
; let fam_sz = tyConFamilySize tycon
bndr_reg = CmmLocal (idToReg bndr)
-- Is the constructor tag in the node reg?
; if isSmallFamily fam_sz
- then let -- Yes, bndr_reg has constr. tag in ls bits
+ then do
+ let -- Yes, bndr_reg has constr. tag in ls bits
tag_expr = cmmConstrTag1 (CmmReg bndr_reg)
branches' = [(tag+1,branch) | (tag,branch) <- branches]
- in
- emitSwitch tag_expr branches' mb_deflt 1 fam_sz
+ emitSwitch tag_expr branches' mb_deflt 1 fam_sz
+ return AssignedDirectly
else -- No, get tag from info table
do dflags <- getDynFlags
@@ -510,7 +518,8 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts
-- when the family size is big enough
untagged_ptr = cmmRegOffB bndr_reg (-1)
tag_expr = getConstrTag dflags (untagged_ptr)
- emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) }
+ emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
+ return AssignedDirectly }
cgAlts _ _ _ _ = panic "cgAlts"
-- UbxTupAlt and PolyAlt have only one alternative
@@ -537,11 +546,11 @@ cgAlts _ _ _ _ = panic "cgAlts"
-- goto L1
-------------------
-cgAlgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt]
+cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
-> FCode ( Maybe CmmAGraph
, [(ConTagZ, CmmAGraph)] )
-cgAlgAltRhss gc_plan retry_lbl bndr alts
- = do { tagged_cmms <- cgAltRhss gc_plan retry_lbl bndr alts
+cgAlgAltRhss gc_plan bndr alts
+ = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
; let { mb_deflt = case tagged_cmms of
((DEFAULT,rhs) : _) -> Just rhs
@@ -557,32 +566,32 @@ cgAlgAltRhss gc_plan retry_lbl bndr alts
-------------------
-cgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt]
+cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt]
-> FCode [(AltCon, CmmAGraph)]
-cgAltRhss gc_plan retry_lbl bndr alts
+cgAltRhss gc_plan bndr alts
= forkAlts (map cg_alt alts)
where
base_reg = idToReg bndr
cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph)
cg_alt (con, bndrs, _uses, rhs)
= getCodeR $
- maybeAltHeapCheck gc_plan retry_lbl $
+ maybeAltHeapCheck gc_plan $
do { _ <- bindConArgs con base_reg bndrs
- ; cgExpr rhs
- ; return con }
+ ; _ <- cgExpr rhs
+ ; return con }
-maybeAltHeapCheck :: GcPlan -> Maybe BlockId -> FCode a -> FCode a
-maybeAltHeapCheck NoGcInAlts _ code = code
-maybeAltHeapCheck (GcInAlts regs) mlbl code =
- case mlbl of
- Nothing -> altHeapCheck regs code
- Just retry_lbl -> altHeapCheckReturnsTo regs retry_lbl code
+maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
+maybeAltHeapCheck (NoGcInAlts,_) code = code
+maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
+ altHeapCheck regs code
+maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
+ altHeapCheckReturnsTo regs lret off code
-----------------------------------------------------------------------------
-- Tail calls
-----------------------------------------------------------------------------
-cgConApp :: DataCon -> [StgArg] -> FCode ()
+cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
cgConApp con stg_args
| isUnboxedTupleCon con -- Unboxed tuple: assign and return
= do { arg_exprs <- getNonVoidArgAmodes stg_args
@@ -599,7 +608,7 @@ cgConApp con stg_args
; emitReturn [idInfoToAmode idinfo] }
-cgIdApp :: Id -> [StgArg] -> FCode ()
+cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
cgIdApp fun_id [] | isVoidId fun_id = emitReturn []
cgIdApp fun_id args
= do { fun_info <- getCgIdInfo fun_id
@@ -607,14 +616,15 @@ cgIdApp fun_id args
Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args
Nothing -> cgTailCall fun_id fun_info args }
-cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode ()
+cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode ReturnKind
cgLneJump blk_id lne_regs args -- Join point; discard sequel
= do { adjustHpBackwards -- always do this before a tail-call
; cmm_args <- getNonVoidArgAmodes args
; emitMultiAssign lne_regs cmm_args
- ; emit (mkBranch blk_id) }
+ ; emit (mkBranch blk_id)
+ ; return AssignedDirectly }
-cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode ()
+cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode ReturnKind
cgTailCall fun_id fun_info args = do
dflags <- getDynFlags
case (getCallMethod dflags fun_name (idCafInfo fun_id) lf_info (length args)) of
@@ -647,7 +657,7 @@ cgTailCall fun_id fun_info args = do
node_points dflags = nodeMustPointToIt dflags lf_info
-emitEnter :: CmmExpr -> FCode ()
+emitEnter :: CmmExpr -> FCode ReturnKind
emitEnter fun = do
{ adjustHpBackwards
; sequel <- getSequel
@@ -665,6 +675,7 @@ emitEnter fun = do
{ let entry = entryCode $ closureInfoPtr $ CmmReg nodeReg
; emit $ mkForeignJump NativeNodeCall entry
[cmmUntag fun] updfr_off
+ ; return AssignedDirectly
}
-- The result will be scrutinised in the sequel. This is where
@@ -687,12 +698,18 @@ emitEnter fun = do
-- ensure that we generate only one proc-point for this
-- sequence.
--
+ -- Furthermore, we tell the caller that we generated a native
+ -- return continuation by returning (ReturnedTo Lret off), so
+ -- that the continuation can be reused by the heap-check failure
+ -- code in the enclosing case expression.
+ --
AssignTo res_regs _ -> do
{ lret <- newLabelC
+ ; let (off, copyin) = copyInOflow NativeReturn (Young lret) res_regs
; lcall <- newLabelC
+ ; updfr_off <- getUpdFrameOff
; let area = Young lret
- ; let (off, copyin) = copyInOflow NativeReturn area res_regs
- (outArgs, regs, copyout) = copyOutOflow NativeNodeCall Call area
+ ; let (outArgs, regs, copyout) = copyOutOflow NativeNodeCall Call area
[fun] updfr_off (0,[])
-- refer to fun via nodeReg after the copyout, to avoid having
-- both live simultaneously; this sometimes enables fun to be
@@ -705,54 +722,6 @@ emitEnter fun = do
outOfLine lcall the_call <*>
mkLabel lret <*>
copyin
+ ; return (ReturnedTo lret off)
}
}
-
-
-{- Note [Better Alt Heap Checks]
-If two function calls can share a return point, then they will also
-get the same info table. Therefore, it's worth our effort to make
-those opportunities appear as frequently as possible.
-
-Here are a few examples of how it should work:
-
- STG:
- case f x of
- True -> <True code -- including allocation>
- False -> <False code>
- Cmm:
- r = call f(x) returns to L;
- L:
- if r & 7 >= 2 goto L1 else goto L2;
- L1:
- if Hp > HpLim then
- r = gc(r);
- goto L;
- <True code -- including allocation>
- L2:
- <False code>
-Note that the code following both the call to f(x) and the code to gc(r)
-should be the same, which will allow the common blockifier to discover
-that they are the same. Therefore, both function calls will return to the same
-block, and they will use the same info table.
-
-Here's an example of the Cmm code we want from a primOp.
-The primOp doesn't produce an info table for us to reuse, but that's okay:
-we should still generate the same code:
- STG:
- case f x of
- 0 -> <0-case code -- including allocation>
- _ -> <default-case code>
- Cmm:
- r = a +# b;
- L:
- if r == 0 then goto L1 else goto L2;
- L1:
- if Hp > HpLim then
- r = gc(r);
- goto L;
- <0-case code -- including allocation>
- L2:
- <default-case code>
--}
-
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index 8c061cf00c..a6274662ad 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -51,7 +51,7 @@ import Control.Monad
cgForeignCall :: ForeignCall -- the op
-> [StgArg] -- x,y arguments
-> Type -- result type
- -> FCode ()
+ -> FCode ReturnKind
cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
= do { cmm_args <- getFCallArgs stg_args
@@ -90,6 +90,7 @@ cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
AssignTo assign_to_these _ ->
do { emitForeignCall safety assign_to_these call_target
call_args CmmMayReturn
+ ; return AssignedDirectly
}
_something_else ->
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index be4497aa5c..ddb6dd01e4 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -427,42 +427,79 @@ entryHeapCheck cl_info offset nodeSet arity args code
-- ------------------------------------------------------------
-- A heap/stack check in a case alternative
+
+-- If there are multiple alts and we need to GC, but don't have a
+-- continuation already (the scrut was simple), then we should
+-- pre-generate the continuation. (if there are multiple alts it is
+-- always a canned GC point).
+
+-- altHeapCheck:
+-- If we have a return continuation,
+-- then if it is a canned GC pattern,
+-- then we do mkJumpReturnsTo
+-- else we do a normal call to stg_gc_noregs
+-- else if it is a canned GC pattern,
+-- then generate the continuation and do mkCallReturnsTo
+-- else we do a normal call to stg_gc_noregs
+
altHeapCheck :: [LocalReg] -> FCode a -> FCode a
altHeapCheck regs code
- = do loop_id <- newLabelC
- emitLabel loop_id
- altHeapCheckReturnsTo regs loop_id code
-
-altHeapCheckReturnsTo :: [LocalReg] -> Label -> FCode a -> FCode a
-altHeapCheckReturnsTo regs retry_lbl code
+ = case cannedGCEntryPoint regs of
+ Nothing -> genericGC code
+ Just gc -> do
+ lret <- newLabelC
+ let (off, copyin) = copyInOflow NativeReturn (Young lret) regs
+ lcont <- newLabelC
+ emitOutOfLine lret (copyin <*> mkBranch lcont)
+ emitLabel lcont
+ cannedGCReturnsTo False gc regs lret off code
+
+altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
+altHeapCheckReturnsTo regs lret off code
+ = case cannedGCEntryPoint regs of
+ Nothing -> genericGC code
+ Just gc -> cannedGCReturnsTo True gc regs lret off code
+
+cannedGCReturnsTo :: Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
+ -> FCode a
+ -> FCode a
+cannedGCReturnsTo cont_on_stack gc regs lret off code
= do updfr_sz <- getUpdFrameOff
- gc_call_code <- gc_call updfr_sz
- heapCheck False (gc_call_code <*> mkBranch retry_lbl) code
-
+ heapCheck False (gc_call gc updfr_sz) code
where
reg_exprs = map (CmmReg . CmmLocal) regs
-- Note [stg_gc arguments]
- gc_call sp =
- case rts_label regs of
- Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp (0,[])
- Nothing -> mkCall generic_gc (GC, GC) [] [] sp (0,[])
-
- rts_label [reg]
- | isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1")
- | isFloatType ty = case width of
- W32 -> Just (mkGcLabel "stg_gc_f1")
- W64 -> Just (mkGcLabel "stg_gc_d1")
- _ -> Nothing
+ gc_call label sp
+ | cont_on_stack = mkJumpReturnsTo label GC reg_exprs lret off sp
+ | otherwise = mkCallReturnsTo label GC reg_exprs lret off sp (0,[])
- | width == wordWidth = Just (mkGcLabel "stg_gc_unbx_r1")
- | width == W64 = Just (mkGcLabel "stg_gc_l1")
- | otherwise = Nothing
- where
- ty = localRegType reg
- width = typeWidth ty
-
- rts_label _ = Nothing
+genericGC :: FCode a -> FCode a
+genericGC code
+ = do updfr_sz <- getUpdFrameOff
+ lretry <- newLabelC
+ emitLabel lretry
+ call <- mkCall generic_gc (GC, GC) [] [] updfr_sz (0,[])
+ heapCheck False (call <*> mkBranch lretry) code
+
+cannedGCEntryPoint :: [LocalReg] -> Maybe CmmExpr
+cannedGCEntryPoint regs
+ = case regs of
+ [] -> Just (mkGcLabel "stg_gc_noregs")
+ [reg]
+ | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
+ | isFloatType ty -> case width of
+ W32 -> Just (mkGcLabel "stg_gc_f1")
+ W64 -> Just (mkGcLabel "stg_gc_d1")
+ _ -> Nothing
+
+ | width == wordWidth -> Just (mkGcLabel "stg_gc_unbx_r1")
+ | width == W64 -> Just (mkGcLabel "stg_gc_l1")
+ | otherwise -> Nothing
+ where
+ ty = localRegType reg
+ width = typeWidth ty
+ _otherwise -> Nothing
-- Note [stg_gc arguments]
-- It might seem that we could avoid passing the arguments to the
@@ -484,11 +521,11 @@ altHeapCheckReturnsTo regs retry_lbl code
-- | The generic GC procedure; no params, no results
generic_gc :: CmmExpr
-generic_gc = CmmLit $ mkGcLabel "stg_gc_noregs"
+generic_gc = mkGcLabel "stg_gc_noregs"
-- | Create a CLabel for calling a garbage collector entry point
-mkGcLabel :: String -> CmmLit
-mkGcLabel = (CmmLabel . (mkCmmCodeLabel rtsPackageId) . fsLit)
+mkGcLabel :: String -> CmmExpr
+mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit s)))
-------------------------------
heapCheck :: Bool -> CmmAGraph -> FCode a -> FCode a
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 5bcb67f82b..8a20411064 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -77,11 +77,10 @@ import FastString
--
-- > p=x; q=y;
--
-emitReturn :: [CmmExpr] -> FCode ()
+emitReturn :: [CmmExpr] -> FCode ReturnKind
emitReturn results
= do { sequel <- getSequel;
; updfr_off <- getUpdFrameOff
- ; emitComment $ mkFastString ("emitReturn: " ++ show sequel)
; case sequel of
Return _ ->
do { adjustHpBackwards
@@ -89,6 +88,7 @@ emitReturn results
AssignTo regs adjust ->
do { if adjust then adjustHpBackwards else return ()
; emitMultiAssign regs results }
+ ; return AssignedDirectly
}
@@ -96,7 +96,7 @@ emitReturn results
-- using the call/return convention @conv@, passing @args@, and
-- returning the results to the current sequel.
--
-emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ()
+emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ReturnKind
emitCall convs fun args
= emitCallWithExtraStack convs fun args noExtraStack
@@ -108,17 +108,23 @@ emitCall convs fun args
--
emitCallWithExtraStack
:: (Convention, Convention) -> CmmExpr -> [CmmExpr]
- -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode ()
-emitCallWithExtraStack convs@(callConv, _) fun args extra_stack
+ -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode ReturnKind
+emitCallWithExtraStack (callConv, retConv) fun args extra_stack
= do { adjustHpBackwards
; sequel <- getSequel
; updfr_off <- getUpdFrameOff
- ; emitComment $ mkFastString ("emitCallWithExtraStack: " ++ show sequel)
; case sequel of
- Return _ ->
+ Return _ -> do
emit $ mkForeignJumpExtra callConv fun args updfr_off extra_stack
+ return AssignedDirectly
AssignTo res_regs _ -> do
- emit =<< mkCall fun convs res_regs args updfr_off extra_stack
+ k <- newLabelC
+ let area = Young k
+ (off, copyin) = copyInOflow retConv area res_regs
+ copyout = mkCallReturnsTo fun callConv args k off updfr_off
+ extra_stack
+ emit (copyout <*> mkLabel k <*> copyin)
+ return (ReturnedTo k off)
}
@@ -166,7 +172,7 @@ adjustHpBackwards
-- call f() return to Nothing updfr_off: 32
-directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ()
+directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind
-- (directCall f n args)
-- calls f(arg1, ..., argn), and applies the result to the remaining args
-- The function f has arity n, and there are guaranteed at least n args
@@ -176,17 +182,18 @@ directCall conv lbl arity stg_args
; direct_call "directCall" conv lbl arity argreps }
-slowCall :: CmmExpr -> [StgArg] -> FCode ()
+slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind
-- (slowCall fun args) applies fun to args, returning the results to Sequel
slowCall fun stg_args
= do { dflags <- getDynFlags
; argsreps <- getArgRepsAmodes stg_args
; let (rts_fun, arity) = slowCallPattern (map fst argsreps)
- ; direct_call "slow_call" NativeNodeCall
+ ; r <- direct_call "slow_call" NativeNodeCall
(mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps)
; emitComment $ mkFastString ("slow_call for " ++
showSDoc dflags (ppr fun) ++
" with pat " ++ unpackFS rts_fun)
+ ; return r
}
@@ -194,7 +201,7 @@ slowCall fun stg_args
direct_call :: String
-> Convention -- e.g. NativeNodeCall or NativeDirectCall
-> CLabel -> RepArity
- -> [(ArgRep,Maybe CmmExpr)] -> FCode ()
+ -> [(ArgRep,Maybe CmmExpr)] -> FCode ReturnKind
direct_call caller call_conv lbl arity args
| debugIsOn && real_arity > length args -- Too few args
= do -- Caller should ensure that there enough args!
diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs
index d1732ed2b7..287302fb0a 100644
--- a/compiler/codeGen/StgCmmMonad.hs
+++ b/compiler/codeGen/StgCmmMonad.hs
@@ -36,7 +36,7 @@ module StgCmmMonad (
ConTagZ,
- Sequel(..),
+ Sequel(..), ReturnKind(..),
withSequel, getSequel,
setSRTLabel, getSRTLabel,
@@ -222,13 +222,85 @@ data Sequel
| AssignTo
[LocalReg] -- Put result(s) in these regs and fall through
-- NB: no void arguments here
- Bool -- Should we adjust the heap pointer back to recover
- -- space that's unused on this path?
- -- We need to do this only if the expression may
- -- allocate (e.g. it's a foreign call or allocating primOp)
-instance Show Sequel where
- show (Return _) = "Sequel: Return"
- show (AssignTo _ _) = "Sequel: Assign"
+ --
+ Bool -- Should we adjust the heap pointer back to
+ -- recover space that's unused on this path?
+ -- We need to do this only if the expression
+ -- may allocate (e.g. it's a foreign call or
+ -- allocating primOp)
+
+-- See Note [sharing continuations] below
+data ReturnKind
+ = AssignedDirectly
+ | ReturnedTo BlockId ByteOff
+
+-- Note [sharing continuations]
+--
+-- ReturnKind says how the expression being compiled returned its
+-- results: either by assigning directly to the registers specified
+-- by the Sequel, or by returning to a continuation that does the
+-- assignments. The point of this is we might be able to re-use the
+-- continuation in a subsequent heap-check. Consider:
+--
+-- case f x of z
+-- True -> <True code>
+-- False -> <False code>
+--
+-- Naively we would generate
+--
+-- R2 = x -- argument to f
+-- Sp[young(L1)] = L1
+-- call f returns to L1
+-- L1:
+-- z = R1
+-- if (z & 1) then Ltrue else Lfalse
+-- Ltrue:
+-- Hp = Hp + 24
+-- if (Hp > HpLim) then L4 else L7
+-- L4:
+-- HpAlloc = 24
+-- goto L5
+-- L5:
+-- R1 = z
+-- Sp[young(L6)] = L6
+-- call stg_gc_unpt_r1 returns to L6
+-- L6:
+-- z = R1
+-- goto L1
+-- L7:
+-- <True code>
+-- Lfalse:
+-- <False code>
+--
+-- We want the gc call in L4 to return to L1, and discard L6. Note
+-- that not only can we share L1 and L6, but the assignment of the
+-- return address in L4 is unnecessary because the return address for
+-- L1 is already on the stack. We used to catch the sharing of L1 and
+-- L6 in the common-block-eliminator, but not the unnecessary return
+-- address assignment.
+--
+-- Since this case is so common I decided to make it more explicit and
+-- robust by programming the sharing directly, rather than relying on
+-- the common-block elimiantor to catch it. This makes
+-- common-block-elimianteion an optional optimisation, and furthermore
+-- generates less code in the first place that we have to subsequently
+-- clean up.
+--
+-- There are some rarer cases of common blocks that we don't catch
+-- this way, but that's ok. Common-block-elimation is still available
+-- to catch them when optimisation is enabled. Some examples are:
+--
+-- - when both the True and False branches do a heap check, we
+-- can share the heap-check failure code L4a and maybe L4
+--
+-- - in a case-of-case, there might be multiple continuations that
+-- we can common up.
+--
+-- It is always safe to use AssignedDirectly. Expressions that jump
+-- to the continuation from multiple places (e.g. case expressions)
+-- fall back to AssignedDirectly.
+--
+
initCgInfoDown :: DynFlags -> Module -> CgInfoDownwards
initCgInfoDown dflags mod
@@ -410,7 +482,7 @@ getModuleName = do { info <- getInfoDown; return (cgd_mod info) }
-- ----------------------------------------------------------------------------
-- Get/set the end-of-block info
-withSequel :: Sequel -> FCode () -> FCode ()
+withSequel :: Sequel -> FCode a -> FCode a
withSequel sequel code
= do { info <- getInfoDown
; withInfoDown code (info {cgd_sequel = sequel }) }
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index e015ac7935..d9585c6d61 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -68,7 +68,7 @@ might be a Haskell closure pointer, we don't want to evaluate it. -}
cgOpApp :: StgOp -- The op
-> [StgArg] -- Arguments
-> Type -- Result type (always an unboxed tuple)
- -> FCode ()
+ -> FCode ReturnKind
-- Foreign calls
cgOpApp (StgFCallOp fcall _) stg_args res_ty
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index ab44888597..4798c65a4a 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -464,7 +464,7 @@ newUnboxedTupleRegs res_ty
, let rep = typePrimRep ty
, not (isVoidRep rep) ]
choose_regs (AssignTo regs _) = return regs
- choose_regs _other = mapM (newTemp . primRepCmmType) reps
+ choose_regs _other = mapM (newTemp . primRepCmmType) reps
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index a351746948..415fef213c 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -280,6 +280,7 @@ data DynFlag
| Opt_RegLiveness -- Use the STG Reg liveness information (hidden flag)
| Opt_IrrefutableTuples
| Opt_CmmSink
+ | Opt_CmmElimCommonBlocks
-- Interface files
| Opt_IgnoreInterfacePragmas
@@ -2041,6 +2042,7 @@ fFlags = [
( "regs-liveness", Opt_RegLiveness, nop), -- hidden flag
( "irrefutable-tuples", Opt_IrrefutableTuples, nop ),
( "cmm-sink", Opt_CmmSink, nop ),
+ ( "cmm-elim-common-blocks", Opt_CmmElimCommonBlocks, nop ),
( "gen-manifest", Opt_GenManifest, nop ),
( "embed-manifest", Opt_EmbedManifest, nop ),
( "ext-core", Opt_EmitExternalCore, nop ),
@@ -2314,6 +2316,7 @@ optLevelFlags
, ([0,1,2], Opt_LlvmTBAA)
, ([0,1,2], Opt_RegLiveness)
, ([1,2], Opt_CmmSink)
+ , ([1,2], Opt_CmmElimCommonBlocks)
-- , ([2], Opt_StaticArgumentTransformation)
-- Max writes: I think it's probably best not to enable SAT with -O2 for the