diff options
author | Simon Marlow <marlowsd@gmail.com> | 2012-07-31 11:19:03 +0100 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2012-08-02 11:56:02 +0100 |
commit | 6ede0067a91a0da7f682c3ea1d6df938616e983a (patch) | |
tree | 967a755612581b3d578f9879b56353a830cfa4bb | |
parent | d2361423f23c3381b5a7f57c3f9e6c2448cdac80 (diff) | |
download | haskell-6ede0067a91a0da7f682c3ea1d6df938616e983a.tar.gz |
Explicitly share some return continuations
Instead of relying on common-block-elimination to share return
continuations in the common case (case-alternative heap checks) we do
it explicitly. This isn't hard to do, is more robust, and saves some
compilation time. Full commentary in Note [sharing continuations].
-rw-r--r-- | compiler/cmm/CmmPipeline.hs | 8 | ||||
-rw-r--r-- | compiler/cmm/MkGraph.hs | 27 | ||||
-rw-r--r-- | compiler/codeGen/StgCmm.hs | 5 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmBind.hs | 6 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmExpr.hs | 177 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmForeign.hs | 3 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmHeap.hs | 99 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmLayout.hs | 31 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmMonad.hs | 90 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 2 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmUtils.hs | 2 | ||||
-rw-r--r-- | compiler/main/DynFlags.hs | 3 |
12 files changed, 283 insertions, 170 deletions
diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs index 6042a08d25..f96e77bda7 100644 --- a/compiler/cmm/CmmPipeline.hs +++ b/compiler/cmm/CmmPipeline.hs @@ -60,8 +60,12 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) dump Opt_D_dump_cmmz_cfg "Post control-flow optimsations" g ----------- Eliminate common blocks ------------------- - g <- {-# SCC "elimCommonBlocks" #-} return $ elimCommonBlocks g - dump Opt_D_dump_cmmz_cbe "Post common block elimination" g + g <- if dopt Opt_CmmElimCommonBlocks dflags + then do g <- {-# SCC "elimCommonBlocks" #-} return $ elimCommonBlocks g + dump Opt_D_dump_cmmz_cbe "Post common block elimination" g + return g + else return g + -- Any work storing block Labels must be performed _after_ -- elimCommonBlocks diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs index 443fa3a441..4703b47f42 100644 --- a/compiler/cmm/MkGraph.hs +++ b/compiler/cmm/MkGraph.hs @@ -8,6 +8,7 @@ module MkGraph , stackStubExpr , mkNop, mkAssign, mkStore, mkUnsafeCall, mkFinalCall, mkCallReturnsTo + , mkJumpReturnsTo , mkJump, mkDirectJump, mkForeignJump, mkForeignJumpExtra, mkJumpGC , mkCbranch, mkSwitch , mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch @@ -234,6 +235,17 @@ mkCallReturnsTo f callConv actuals ret_lbl ret_off updfr_off extra_stack = do updfr_off extra_stack $ toCall f (Just ret_lbl) updfr_off ret_off +-- Like mkCallReturnsTo, but does not push the return address (it is assumed to be +-- already on the stack). +mkJumpReturnsTo :: CmmExpr -> Convention -> [CmmActual] + -> BlockId + -> ByteOff + -> UpdFrameOffset + -> CmmAGraph +mkJumpReturnsTo f callConv actuals ret_lbl ret_off updfr_off = do + lastWithArgs JumpRet (Young ret_lbl) callConv actuals updfr_off $ + toCall f (Just ret_lbl) updfr_off ret_off + mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as @@ -289,7 +301,7 @@ oneCopyOflowI area (reg, off) (n, ms) = -- Factoring out the common parts of the copyout functions yielded something -- more complicated: -data Transfer = Call | Jump | Ret deriving Eq +data Transfer = Call | JumpRet | Jump | Ret deriving Eq copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] -> UpdFrameOffset @@ -321,10 +333,15 @@ copyOutOflow conv transfer area actuals updfr_off case area of Young id -> id `seq` -- Generate a store instruction for -- the return address if making a call - if transfer == Call then - ([(CmmLit (CmmBlock id), StackParam init_offset)], - widthInBytes wordWidth) - else ([], 0) + case transfer of + Call -> + ([(CmmLit (CmmBlock id), StackParam init_offset)], + widthInBytes wordWidth) + JumpRet -> + ([], + widthInBytes wordWidth) + _other -> + ([], 0) Old -> ([], updfr_off) arg_offset = init_offset + extra_stack_off diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs index 70892eeb5e..d82b4bc3b1 100644 --- a/compiler/codeGen/StgCmm.hs +++ b/compiler/codeGen/StgCmm.hs @@ -244,8 +244,9 @@ cgDataCon data_con do { _ <- ticky_code ; ldvEnter (CmmReg nodeReg) ; tickyReturnOldCon (length arg_things) - ; emitReturn [cmmOffsetB (CmmReg nodeReg) - (tagForCon data_con)] } + ; _ <- emitReturn [cmmOffsetB (CmmReg nodeReg) + (tagForCon data_con)] + ; return () } -- The case continuation code expects a tagged pointer arg_reps :: [(PrimRep, UnaryType)] diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index e40c660fdb..a0fcc1ac5d 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -435,7 +435,8 @@ closureCodeBody top_lvl bndr cl_info _cc args arity body fv_details -- heap check, to reduce live vars over check ; if node_points then load_fvs node lf_info fv_bindings else return () - ; cgExpr body }} + ; _ <- cgExpr body + ; return () }} } -- A function closure pointer may be tagged, so we @@ -501,7 +502,8 @@ thunkCode cl_info fv_details _cc node arity body ; let lf_info = closureLFInfo cl_info ; fv_bindings <- mapM bind_fv fv_details ; load_fvs node lf_info fv_bindings - ; cgExpr body }}} + ; _ <- cgExpr body + ; return () }}} ------------------------------------------------------------------------ diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 65e2416d2f..95c61082c0 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -56,7 +56,7 @@ import UniqSupply -- cgExpr: the main function ------------------------------------------------------------------------ -cgExpr :: StgExpr -> FCode () +cgExpr :: StgExpr -> FCode ReturnKind cgExpr (StgApp fun args) = cgIdApp fun args @@ -76,8 +76,9 @@ cgExpr (StgLetNoEscape _ _ binds expr) = do { us <- newUniqSupply ; let join_id = mkBlockId (uniqFromSupply us) ; cgLneBinds join_id binds - ; cgExpr expr - ; emitLabel join_id} + ; r <- cgExpr expr + ; emitLabel join_id + ; return r } cgExpr (StgCase expr _live_vars _save_vars bndr _srt alt_type alts) = cgCase expr bndr alt_type alts @@ -161,7 +162,7 @@ cgLetNoEscapeClosure bndr cc_slot _unused_cc args body = do { arg_regs <- forkProc $ do { restoreCurrentCostCentre cc_slot ; arg_regs <- bindArgsToRegs args - ; altHeapCheck arg_regs (cgExpr body) + ; _ <- altHeapCheck arg_regs (cgExpr body) -- Using altHeapCheck just reduces -- instructions to save on stack ; return arg_regs } @@ -283,7 +284,7 @@ data GcPlan -- of the case alternative(s) into the upstream check ------------------------------------- -cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode () +cgCase :: StgExpr -> Id -> AltType -> [StgAlt] -> FCode ReturnKind cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts | isEnumerationTyCon tycon -- Note [case on bool] @@ -296,9 +297,10 @@ cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts ; emitAssign (CmmLocal tmp_reg) (tagToClosure tycon tag_expr) } - ; (mb_deflt, branches) <- cgAlgAltRhss NoGcInAlts Nothing + ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alts ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1) + ; return AssignedDirectly } where do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr @@ -369,21 +371,21 @@ cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts ; v_info <- getCgIdInfo v ; emitAssign (CmmLocal (idToReg (NonVoid bndr))) (idInfoToAmode v_info) ; _ <- bindArgsToRegs [NonVoid bndr] - ; cgAlts NoGcInAlts (NonVoid bndr) alt_type alts } + ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts } where reps_compatible = idPrimRep v == idPrimRep bndr cgCase scrut@(StgApp v []) _ (PrimAlt _) _ = -- fail at run-time, not compile-time do { mb_cc <- maybeSaveCostCentre True - ; withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut) + ; _ <- withSequel (AssignTo [idToReg (NonVoid v)] False) (cgExpr scrut) ; restoreCurrentCostCentre mb_cc ; emitComment $ mkFastString "should be unreachable code" ; l <- newLabelC ; emitLabel l ; emit (mkBranch l) + ; return AssignedDirectly } - {- case seq# a s of v (# s', a' #) -> e @@ -396,6 +398,7 @@ case a of v (taking advantage of the fact that the return convention for (# State#, a #) is the same as the return convention for just 'a') -} + cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts = -- handle seq#, same return convention as vanilla 'a'. cgCase (StgApp a []) bndr alt_type alts @@ -406,19 +409,25 @@ cgCase scrut bndr alt_type alts ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts alt_regs = map idToReg ret_bndrs simple_scrut = isSimpleScrut scrut alt_type - gcInAlts | not simple_scrut = True - | isSingleton alts = False - | up_hp_usg > 0 = False - | otherwise = True - gc_plan = if gcInAlts then GcInAlts alt_regs else NoGcInAlts + do_gc | not simple_scrut = True + | isSingleton alts = False + | up_hp_usg > 0 = False + | otherwise = True + gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts ; mb_cc <- maybeSaveCostCentre simple_scrut - ; withSequel (AssignTo alt_regs gcInAlts) (cgExpr scrut) - ; restoreCurrentCostCentre mb_cc - -- JD: We need Note: [Better Alt Heap Checks] + -- if do_gc then our sequel will be ReturnTo + -- - generate code for the sequel now + -- - pass info about the sequel to cgAlts for use in the heap check + -- else sequel will be AssignTo + + ; ret_kind <- withSequel (AssignTo alt_regs False) (cgExpr scrut) + ; restoreCurrentCostCentre mb_cc ; _ <- bindArgsToRegs ret_bndrs - ; cgAlts gc_plan (NonVoid bndr) alt_type alts } + ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts + } + ----------------- maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg) @@ -465,17 +474,18 @@ chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs" -- UbxTupALt has only one alternative ------------------------------------- -cgAlts :: GcPlan -> NonVoid Id -> AltType -> [StgAlt] -> FCode () +cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [StgAlt] + -> FCode ReturnKind -- At this point the result of the case are in the binders cgAlts gc_plan _bndr PolyAlt [(_, _, _, rhs)] - = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs) - + = maybeAltHeapCheck gc_plan (cgExpr rhs) + cgAlts gc_plan _bndr (UbxTupAlt _) [(_, _, _, rhs)] - = maybeAltHeapCheck gc_plan Nothing (cgExpr rhs) + = maybeAltHeapCheck gc_plan (cgExpr rhs) -- Here bndrs are *already* in scope, so don't rebind them cgAlts gc_plan bndr (PrimAlt _) alts - = do { tagged_cmms <- cgAltRhss gc_plan Nothing bndr alts + = do { tagged_cmms <- cgAltRhss gc_plan bndr alts ; let bndr_reg = CmmLocal (idToReg bndr) (DEFAULT,deflt) = head tagged_cmms @@ -484,25 +494,23 @@ cgAlts gc_plan bndr (PrimAlt _) alts tagged_cmms' = [(lit,code) | (LitAlt lit, code) <- tagged_cmms] - ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt } + ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt + ; return AssignedDirectly } cgAlts gc_plan bndr (AlgAlt tycon) alts - = do { retry_lbl <- newLabelC - ; emitLabel retry_lbl -- Note [alg-alt heap checks] - - ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan (Just retry_lbl) - bndr alts + = do { (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts ; let fam_sz = tyConFamilySize tycon bndr_reg = CmmLocal (idToReg bndr) -- Is the constructor tag in the node reg? ; if isSmallFamily fam_sz - then let -- Yes, bndr_reg has constr. tag in ls bits + then do + let -- Yes, bndr_reg has constr. tag in ls bits tag_expr = cmmConstrTag1 (CmmReg bndr_reg) branches' = [(tag+1,branch) | (tag,branch) <- branches] - in - emitSwitch tag_expr branches' mb_deflt 1 fam_sz + emitSwitch tag_expr branches' mb_deflt 1 fam_sz + return AssignedDirectly else -- No, get tag from info table do dflags <- getDynFlags @@ -510,7 +518,8 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts -- when the family size is big enough untagged_ptr = cmmRegOffB bndr_reg (-1) tag_expr = getConstrTag dflags (untagged_ptr) - emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) } + emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) + return AssignedDirectly } cgAlts _ _ _ _ = panic "cgAlts" -- UbxTupAlt and PolyAlt have only one alternative @@ -537,11 +546,11 @@ cgAlts _ _ _ _ = panic "cgAlts" -- goto L1 ------------------- -cgAlgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt] +cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt] -> FCode ( Maybe CmmAGraph , [(ConTagZ, CmmAGraph)] ) -cgAlgAltRhss gc_plan retry_lbl bndr alts - = do { tagged_cmms <- cgAltRhss gc_plan retry_lbl bndr alts +cgAlgAltRhss gc_plan bndr alts + = do { tagged_cmms <- cgAltRhss gc_plan bndr alts ; let { mb_deflt = case tagged_cmms of ((DEFAULT,rhs) : _) -> Just rhs @@ -557,32 +566,32 @@ cgAlgAltRhss gc_plan retry_lbl bndr alts ------------------- -cgAltRhss :: GcPlan -> Maybe BlockId -> NonVoid Id -> [StgAlt] +cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [StgAlt] -> FCode [(AltCon, CmmAGraph)] -cgAltRhss gc_plan retry_lbl bndr alts +cgAltRhss gc_plan bndr alts = forkAlts (map cg_alt alts) where base_reg = idToReg bndr cg_alt :: StgAlt -> FCode (AltCon, CmmAGraph) cg_alt (con, bndrs, _uses, rhs) = getCodeR $ - maybeAltHeapCheck gc_plan retry_lbl $ + maybeAltHeapCheck gc_plan $ do { _ <- bindConArgs con base_reg bndrs - ; cgExpr rhs - ; return con } + ; _ <- cgExpr rhs + ; return con } -maybeAltHeapCheck :: GcPlan -> Maybe BlockId -> FCode a -> FCode a -maybeAltHeapCheck NoGcInAlts _ code = code -maybeAltHeapCheck (GcInAlts regs) mlbl code = - case mlbl of - Nothing -> altHeapCheck regs code - Just retry_lbl -> altHeapCheckReturnsTo regs retry_lbl code +maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a +maybeAltHeapCheck (NoGcInAlts,_) code = code +maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code = + altHeapCheck regs code +maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code = + altHeapCheckReturnsTo regs lret off code ----------------------------------------------------------------------------- -- Tail calls ----------------------------------------------------------------------------- -cgConApp :: DataCon -> [StgArg] -> FCode () +cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind cgConApp con stg_args | isUnboxedTupleCon con -- Unboxed tuple: assign and return = do { arg_exprs <- getNonVoidArgAmodes stg_args @@ -599,7 +608,7 @@ cgConApp con stg_args ; emitReturn [idInfoToAmode idinfo] } -cgIdApp :: Id -> [StgArg] -> FCode () +cgIdApp :: Id -> [StgArg] -> FCode ReturnKind cgIdApp fun_id [] | isVoidId fun_id = emitReturn [] cgIdApp fun_id args = do { fun_info <- getCgIdInfo fun_id @@ -607,14 +616,15 @@ cgIdApp fun_id args Just (blk_id, lne_regs) -> cgLneJump blk_id lne_regs args Nothing -> cgTailCall fun_id fun_info args } -cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode () +cgLneJump :: BlockId -> [LocalReg] -> [StgArg] -> FCode ReturnKind cgLneJump blk_id lne_regs args -- Join point; discard sequel = do { adjustHpBackwards -- always do this before a tail-call ; cmm_args <- getNonVoidArgAmodes args ; emitMultiAssign lne_regs cmm_args - ; emit (mkBranch blk_id) } + ; emit (mkBranch blk_id) + ; return AssignedDirectly } -cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode () +cgTailCall :: Id -> CgIdInfo -> [StgArg] -> FCode ReturnKind cgTailCall fun_id fun_info args = do dflags <- getDynFlags case (getCallMethod dflags fun_name (idCafInfo fun_id) lf_info (length args)) of @@ -647,7 +657,7 @@ cgTailCall fun_id fun_info args = do node_points dflags = nodeMustPointToIt dflags lf_info -emitEnter :: CmmExpr -> FCode () +emitEnter :: CmmExpr -> FCode ReturnKind emitEnter fun = do { adjustHpBackwards ; sequel <- getSequel @@ -665,6 +675,7 @@ emitEnter fun = do { let entry = entryCode $ closureInfoPtr $ CmmReg nodeReg ; emit $ mkForeignJump NativeNodeCall entry [cmmUntag fun] updfr_off + ; return AssignedDirectly } -- The result will be scrutinised in the sequel. This is where @@ -687,12 +698,18 @@ emitEnter fun = do -- ensure that we generate only one proc-point for this -- sequence. -- + -- Furthermore, we tell the caller that we generated a native + -- return continuation by returning (ReturnedTo Lret off), so + -- that the continuation can be reused by the heap-check failure + -- code in the enclosing case expression. + -- AssignTo res_regs _ -> do { lret <- newLabelC + ; let (off, copyin) = copyInOflow NativeReturn (Young lret) res_regs ; lcall <- newLabelC + ; updfr_off <- getUpdFrameOff ; let area = Young lret - ; let (off, copyin) = copyInOflow NativeReturn area res_regs - (outArgs, regs, copyout) = copyOutOflow NativeNodeCall Call area + ; let (outArgs, regs, copyout) = copyOutOflow NativeNodeCall Call area [fun] updfr_off (0,[]) -- refer to fun via nodeReg after the copyout, to avoid having -- both live simultaneously; this sometimes enables fun to be @@ -705,54 +722,6 @@ emitEnter fun = do outOfLine lcall the_call <*> mkLabel lret <*> copyin + ; return (ReturnedTo lret off) } } - - -{- Note [Better Alt Heap Checks] -If two function calls can share a return point, then they will also -get the same info table. Therefore, it's worth our effort to make -those opportunities appear as frequently as possible. - -Here are a few examples of how it should work: - - STG: - case f x of - True -> <True code -- including allocation> - False -> <False code> - Cmm: - r = call f(x) returns to L; - L: - if r & 7 >= 2 goto L1 else goto L2; - L1: - if Hp > HpLim then - r = gc(r); - goto L; - <True code -- including allocation> - L2: - <False code> -Note that the code following both the call to f(x) and the code to gc(r) -should be the same, which will allow the common blockifier to discover -that they are the same. Therefore, both function calls will return to the same -block, and they will use the same info table. - -Here's an example of the Cmm code we want from a primOp. -The primOp doesn't produce an info table for us to reuse, but that's okay: -we should still generate the same code: - STG: - case f x of - 0 -> <0-case code -- including allocation> - _ -> <default-case code> - Cmm: - r = a +# b; - L: - if r == 0 then goto L1 else goto L2; - L1: - if Hp > HpLim then - r = gc(r); - goto L; - <0-case code -- including allocation> - L2: - <default-case code> --} - diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index 8c061cf00c..a6274662ad 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -51,7 +51,7 @@ import Control.Monad cgForeignCall :: ForeignCall -- the op -> [StgArg] -- x,y arguments -> Type -- result type - -> FCode () + -> FCode ReturnKind cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty = do { cmm_args <- getFCallArgs stg_args @@ -90,6 +90,7 @@ cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty AssignTo assign_to_these _ -> do { emitForeignCall safety assign_to_these call_target call_args CmmMayReturn + ; return AssignedDirectly } _something_else -> diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index be4497aa5c..ddb6dd01e4 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -427,42 +427,79 @@ entryHeapCheck cl_info offset nodeSet arity args code -- ------------------------------------------------------------ -- A heap/stack check in a case alternative + +-- If there are multiple alts and we need to GC, but don't have a +-- continuation already (the scrut was simple), then we should +-- pre-generate the continuation. (if there are multiple alts it is +-- always a canned GC point). + +-- altHeapCheck: +-- If we have a return continuation, +-- then if it is a canned GC pattern, +-- then we do mkJumpReturnsTo +-- else we do a normal call to stg_gc_noregs +-- else if it is a canned GC pattern, +-- then generate the continuation and do mkCallReturnsTo +-- else we do a normal call to stg_gc_noregs + altHeapCheck :: [LocalReg] -> FCode a -> FCode a altHeapCheck regs code - = do loop_id <- newLabelC - emitLabel loop_id - altHeapCheckReturnsTo regs loop_id code - -altHeapCheckReturnsTo :: [LocalReg] -> Label -> FCode a -> FCode a -altHeapCheckReturnsTo regs retry_lbl code + = case cannedGCEntryPoint regs of + Nothing -> genericGC code + Just gc -> do + lret <- newLabelC + let (off, copyin) = copyInOflow NativeReturn (Young lret) regs + lcont <- newLabelC + emitOutOfLine lret (copyin <*> mkBranch lcont) + emitLabel lcont + cannedGCReturnsTo False gc regs lret off code + +altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a +altHeapCheckReturnsTo regs lret off code + = case cannedGCEntryPoint regs of + Nothing -> genericGC code + Just gc -> cannedGCReturnsTo True gc regs lret off code + +cannedGCReturnsTo :: Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff + -> FCode a + -> FCode a +cannedGCReturnsTo cont_on_stack gc regs lret off code = do updfr_sz <- getUpdFrameOff - gc_call_code <- gc_call updfr_sz - heapCheck False (gc_call_code <*> mkBranch retry_lbl) code - + heapCheck False (gc_call gc updfr_sz) code where reg_exprs = map (CmmReg . CmmLocal) regs -- Note [stg_gc arguments] - gc_call sp = - case rts_label regs of - Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp (0,[]) - Nothing -> mkCall generic_gc (GC, GC) [] [] sp (0,[]) - - rts_label [reg] - | isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1") - | isFloatType ty = case width of - W32 -> Just (mkGcLabel "stg_gc_f1") - W64 -> Just (mkGcLabel "stg_gc_d1") - _ -> Nothing + gc_call label sp + | cont_on_stack = mkJumpReturnsTo label GC reg_exprs lret off sp + | otherwise = mkCallReturnsTo label GC reg_exprs lret off sp (0,[]) - | width == wordWidth = Just (mkGcLabel "stg_gc_unbx_r1") - | width == W64 = Just (mkGcLabel "stg_gc_l1") - | otherwise = Nothing - where - ty = localRegType reg - width = typeWidth ty - - rts_label _ = Nothing +genericGC :: FCode a -> FCode a +genericGC code + = do updfr_sz <- getUpdFrameOff + lretry <- newLabelC + emitLabel lretry + call <- mkCall generic_gc (GC, GC) [] [] updfr_sz (0,[]) + heapCheck False (call <*> mkBranch lretry) code + +cannedGCEntryPoint :: [LocalReg] -> Maybe CmmExpr +cannedGCEntryPoint regs + = case regs of + [] -> Just (mkGcLabel "stg_gc_noregs") + [reg] + | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1") + | isFloatType ty -> case width of + W32 -> Just (mkGcLabel "stg_gc_f1") + W64 -> Just (mkGcLabel "stg_gc_d1") + _ -> Nothing + + | width == wordWidth -> Just (mkGcLabel "stg_gc_unbx_r1") + | width == W64 -> Just (mkGcLabel "stg_gc_l1") + | otherwise -> Nothing + where + ty = localRegType reg + width = typeWidth ty + _otherwise -> Nothing -- Note [stg_gc arguments] -- It might seem that we could avoid passing the arguments to the @@ -484,11 +521,11 @@ altHeapCheckReturnsTo regs retry_lbl code -- | The generic GC procedure; no params, no results generic_gc :: CmmExpr -generic_gc = CmmLit $ mkGcLabel "stg_gc_noregs" +generic_gc = mkGcLabel "stg_gc_noregs" -- | Create a CLabel for calling a garbage collector entry point -mkGcLabel :: String -> CmmLit -mkGcLabel = (CmmLabel . (mkCmmCodeLabel rtsPackageId) . fsLit) +mkGcLabel :: String -> CmmExpr +mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit s))) ------------------------------- heapCheck :: Bool -> CmmAGraph -> FCode a -> FCode a diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 5bcb67f82b..8a20411064 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -77,11 +77,10 @@ import FastString -- -- > p=x; q=y; -- -emitReturn :: [CmmExpr] -> FCode () +emitReturn :: [CmmExpr] -> FCode ReturnKind emitReturn results = do { sequel <- getSequel; ; updfr_off <- getUpdFrameOff - ; emitComment $ mkFastString ("emitReturn: " ++ show sequel) ; case sequel of Return _ -> do { adjustHpBackwards @@ -89,6 +88,7 @@ emitReturn results AssignTo regs adjust -> do { if adjust then adjustHpBackwards else return () ; emitMultiAssign regs results } + ; return AssignedDirectly } @@ -96,7 +96,7 @@ emitReturn results -- using the call/return convention @conv@, passing @args@, and -- returning the results to the current sequel. -- -emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode () +emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ReturnKind emitCall convs fun args = emitCallWithExtraStack convs fun args noExtraStack @@ -108,17 +108,23 @@ emitCall convs fun args -- emitCallWithExtraStack :: (Convention, Convention) -> CmmExpr -> [CmmExpr] - -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode () -emitCallWithExtraStack convs@(callConv, _) fun args extra_stack + -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode ReturnKind +emitCallWithExtraStack (callConv, retConv) fun args extra_stack = do { adjustHpBackwards ; sequel <- getSequel ; updfr_off <- getUpdFrameOff - ; emitComment $ mkFastString ("emitCallWithExtraStack: " ++ show sequel) ; case sequel of - Return _ -> + Return _ -> do emit $ mkForeignJumpExtra callConv fun args updfr_off extra_stack + return AssignedDirectly AssignTo res_regs _ -> do - emit =<< mkCall fun convs res_regs args updfr_off extra_stack + k <- newLabelC + let area = Young k + (off, copyin) = copyInOflow retConv area res_regs + copyout = mkCallReturnsTo fun callConv args k off updfr_off + extra_stack + emit (copyout <*> mkLabel k <*> copyin) + return (ReturnedTo k off) } @@ -166,7 +172,7 @@ adjustHpBackwards -- call f() return to Nothing updfr_off: 32 -directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode () +directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind -- (directCall f n args) -- calls f(arg1, ..., argn), and applies the result to the remaining args -- The function f has arity n, and there are guaranteed at least n args @@ -176,17 +182,18 @@ directCall conv lbl arity stg_args ; direct_call "directCall" conv lbl arity argreps } -slowCall :: CmmExpr -> [StgArg] -> FCode () +slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind -- (slowCall fun args) applies fun to args, returning the results to Sequel slowCall fun stg_args = do { dflags <- getDynFlags ; argsreps <- getArgRepsAmodes stg_args ; let (rts_fun, arity) = slowCallPattern (map fst argsreps) - ; direct_call "slow_call" NativeNodeCall + ; r <- direct_call "slow_call" NativeNodeCall (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps) ; emitComment $ mkFastString ("slow_call for " ++ showSDoc dflags (ppr fun) ++ " with pat " ++ unpackFS rts_fun) + ; return r } @@ -194,7 +201,7 @@ slowCall fun stg_args direct_call :: String -> Convention -- e.g. NativeNodeCall or NativeDirectCall -> CLabel -> RepArity - -> [(ArgRep,Maybe CmmExpr)] -> FCode () + -> [(ArgRep,Maybe CmmExpr)] -> FCode ReturnKind direct_call caller call_conv lbl arity args | debugIsOn && real_arity > length args -- Too few args = do -- Caller should ensure that there enough args! diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index d1732ed2b7..287302fb0a 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -36,7 +36,7 @@ module StgCmmMonad ( ConTagZ, - Sequel(..), + Sequel(..), ReturnKind(..), withSequel, getSequel, setSRTLabel, getSRTLabel, @@ -222,13 +222,85 @@ data Sequel | AssignTo [LocalReg] -- Put result(s) in these regs and fall through -- NB: no void arguments here - Bool -- Should we adjust the heap pointer back to recover - -- space that's unused on this path? - -- We need to do this only if the expression may - -- allocate (e.g. it's a foreign call or allocating primOp) -instance Show Sequel where - show (Return _) = "Sequel: Return" - show (AssignTo _ _) = "Sequel: Assign" + -- + Bool -- Should we adjust the heap pointer back to + -- recover space that's unused on this path? + -- We need to do this only if the expression + -- may allocate (e.g. it's a foreign call or + -- allocating primOp) + +-- See Note [sharing continuations] below +data ReturnKind + = AssignedDirectly + | ReturnedTo BlockId ByteOff + +-- Note [sharing continuations] +-- +-- ReturnKind says how the expression being compiled returned its +-- results: either by assigning directly to the registers specified +-- by the Sequel, or by returning to a continuation that does the +-- assignments. The point of this is we might be able to re-use the +-- continuation in a subsequent heap-check. Consider: +-- +-- case f x of z +-- True -> <True code> +-- False -> <False code> +-- +-- Naively we would generate +-- +-- R2 = x -- argument to f +-- Sp[young(L1)] = L1 +-- call f returns to L1 +-- L1: +-- z = R1 +-- if (z & 1) then Ltrue else Lfalse +-- Ltrue: +-- Hp = Hp + 24 +-- if (Hp > HpLim) then L4 else L7 +-- L4: +-- HpAlloc = 24 +-- goto L5 +-- L5: +-- R1 = z +-- Sp[young(L6)] = L6 +-- call stg_gc_unpt_r1 returns to L6 +-- L6: +-- z = R1 +-- goto L1 +-- L7: +-- <True code> +-- Lfalse: +-- <False code> +-- +-- We want the gc call in L4 to return to L1, and discard L6. Note +-- that not only can we share L1 and L6, but the assignment of the +-- return address in L4 is unnecessary because the return address for +-- L1 is already on the stack. We used to catch the sharing of L1 and +-- L6 in the common-block-eliminator, but not the unnecessary return +-- address assignment. +-- +-- Since this case is so common I decided to make it more explicit and +-- robust by programming the sharing directly, rather than relying on +-- the common-block elimiantor to catch it. This makes +-- common-block-elimianteion an optional optimisation, and furthermore +-- generates less code in the first place that we have to subsequently +-- clean up. +-- +-- There are some rarer cases of common blocks that we don't catch +-- this way, but that's ok. Common-block-elimation is still available +-- to catch them when optimisation is enabled. Some examples are: +-- +-- - when both the True and False branches do a heap check, we +-- can share the heap-check failure code L4a and maybe L4 +-- +-- - in a case-of-case, there might be multiple continuations that +-- we can common up. +-- +-- It is always safe to use AssignedDirectly. Expressions that jump +-- to the continuation from multiple places (e.g. case expressions) +-- fall back to AssignedDirectly. +-- + initCgInfoDown :: DynFlags -> Module -> CgInfoDownwards initCgInfoDown dflags mod @@ -410,7 +482,7 @@ getModuleName = do { info <- getInfoDown; return (cgd_mod info) } -- ---------------------------------------------------------------------------- -- Get/set the end-of-block info -withSequel :: Sequel -> FCode () -> FCode () +withSequel :: Sequel -> FCode a -> FCode a withSequel sequel code = do { info <- getInfoDown ; withInfoDown code (info {cgd_sequel = sequel }) } diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index e015ac7935..d9585c6d61 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -68,7 +68,7 @@ might be a Haskell closure pointer, we don't want to evaluate it. -} cgOpApp :: StgOp -- The op -> [StgArg] -- Arguments -> Type -- Result type (always an unboxed tuple) - -> FCode () + -> FCode ReturnKind -- Foreign calls cgOpApp (StgFCallOp fcall _) stg_args res_ty diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index ab44888597..4798c65a4a 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -464,7 +464,7 @@ newUnboxedTupleRegs res_ty , let rep = typePrimRep ty , not (isVoidRep rep) ] choose_regs (AssignTo regs _) = return regs - choose_regs _other = mapM (newTemp . primRepCmmType) reps + choose_regs _other = mapM (newTemp . primRepCmmType) reps diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index a351746948..415fef213c 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -280,6 +280,7 @@ data DynFlag | Opt_RegLiveness -- Use the STG Reg liveness information (hidden flag) | Opt_IrrefutableTuples | Opt_CmmSink + | Opt_CmmElimCommonBlocks -- Interface files | Opt_IgnoreInterfacePragmas @@ -2041,6 +2042,7 @@ fFlags = [ ( "regs-liveness", Opt_RegLiveness, nop), -- hidden flag ( "irrefutable-tuples", Opt_IrrefutableTuples, nop ), ( "cmm-sink", Opt_CmmSink, nop ), + ( "cmm-elim-common-blocks", Opt_CmmElimCommonBlocks, nop ), ( "gen-manifest", Opt_GenManifest, nop ), ( "embed-manifest", Opt_EmbedManifest, nop ), ( "ext-core", Opt_EmitExternalCore, nop ), @@ -2314,6 +2316,7 @@ optLevelFlags , ([0,1,2], Opt_LlvmTBAA) , ([0,1,2], Opt_RegLiveness) , ([1,2], Opt_CmmSink) + , ([1,2], Opt_CmmElimCommonBlocks) -- , ([2], Opt_StaticArgumentTransformation) -- Max writes: I think it's probably best not to enable SAT with -O2 for the |