diff options
-rw-r--r-- | compiler/codeGen/StgCmmBind.hs | 14 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmExpr.hs | 16 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmHeap.hs | 41 |
3 files changed, 53 insertions, 18 deletions
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 23367926c7..344e80a497 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -472,25 +472,21 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details \(_offset, node, arg_regs) -> do -- Emit slow-entry code (for entering a closure through a PAP) { mkSlowEntryCode bndr cl_info arg_regs - ; dflags <- getDynFlags ; let node_points = nodeMustPointToIt dflags lf_info node' = if node_points then Just node else Nothing - -- Emit new label that might potentially be a header - -- of a self-recursive tail call. See Note - -- [Self-recursive tail calls] in StgCmmExpr ; loop_header_id <- newLabelC - ; emitLabel loop_header_id - ; when node_points (ldvEnterClosure cl_info (CmmLocal node)) -- Extend reader monad with information that -- self-recursive tail calls can be optimized into local - -- jumps + -- jumps. See Note [Self-recursive tail calls] in StgCmmExpr. ; withSelfLoop (bndr, loop_header_id, arg_regs) $ do { -- Main payload ; entryHeapCheck cl_info node' arity arg_regs $ do - { -- ticky after heap check to avoid double counting - tickyEnterFun cl_info + { -- emit LDV code when profiling + when node_points (ldvEnterClosure cl_info (CmmLocal node)) + -- ticky after heap check to avoid double counting + ; tickyEnterFun cl_info ; enterCostCentreFun cc (CmmMachOp (mo_wordSub dflags) [ CmmReg (CmmLocal node) -- See [NodeReg clobbered with loopification] diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index cc32a1445b..d94eca493e 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -737,10 +737,16 @@ cgIdApp fun_id args = do -- -- * Whenever we are compiling a function, we set that information to reflect -- the fact that function currently being compiled can be jumped to, instead --- of called. We also have to emit a label to which we will be jumping. Both --- things are done in closureCodyBody in StgCmmBind. +-- of called. This is done in closureCodyBody in StgCmmBind. -- --- * When we began compilation of another closure we remove the additional +-- * We also have to emit a label to which we will be jumping. We make sure +-- that the label is placed after a stack check but before the heap +-- check. The reason is that making a recursive tail-call does not increase +-- the stack so we only need to check once. But it may grow the heap, so we +-- have to repeat the heap check in every self-call. This is done in +-- do_checks in StgCmmHeap. +-- +-- * When we begin compilation of another closure we remove the additional -- information from the environment. This is done by forkClosureBody -- in StgCmmMonad. Other functions that duplicate the environment - -- forkLneBody, forkAlts, codeOnly - duplicate that information. In other @@ -755,8 +761,8 @@ cgIdApp fun_id args = do -- arity. (d) loopification is turned on via -floopification command-line -- option. -- --- * Command line option to control turn loopification on and off is --- implemented in DynFlags +-- * Command line option to turn loopification on and off is implemented in +-- DynFlags. -- diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index 55ddfd4f96..077b7809b5 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -531,7 +531,7 @@ heapStackCheckGen stk_hwm mb_bytes lretry <- newLabelC emitLabel lretry call <- mkCall generic_gc (GC, GC) [] [] updfr_sz [] - do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry) + do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry) -- Note [Single stack check] -- ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -569,11 +569,11 @@ heapStackCheckGen stk_hwm mb_bytes -- number of bytes of stack that the function will use, so we use a -- special late-bound CmmLit, namely -- CmmHighStackMark --- to stand for the number of bytes needed. When the stack is made +-- to stand for the number of bytes needed. When the stack is made -- manifest, the number of bytes needed is calculated, and used to -- replace occurrences of CmmHighStackMark -- --- The (Maybe CmmExpr) passed to do_checks is usually +-- The (Maybe CmmExpr) passed to do_checks is usually -- Just (CmmLit CmmHighStackMark) -- but can also (in certain hand-written RTS functions) -- Just (CmmLit 8) or some other fixed valuet @@ -615,13 +615,22 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do Nothing -> return () Just stk_hwm -> tickyStackCheck >> (emit =<< mkCmmIfGoto (sp_oflo stk_hwm) gc_id) + -- Emit new label that might potentially be a header + -- of a self-recursive tail call. + -- See Note [Self-recursive loop header]. + self_loop_info <- getSelfLoop + case self_loop_info of + Just (_, loop_header_id, _) + | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id + _otherwise -> return () + if (isJust mb_alloc_lit) then do tickyHeapCheck emitAssign hpReg bump_hp emit =<< mkCmmIfThen hp_oflo (alloc_n <*> mkBranch gc_id) else do - when (not (gopt Opt_OmitYields dflags) && checkYield) $ do + when (checkYield && not (gopt Opt_OmitYields dflags)) $ do -- Yielding if HpLim == 0 let yielding = CmmMachOp (mo_wordEq dflags) [CmmReg (CmmGlobal HpLim), @@ -637,3 +646,27 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do -- stack check succeeds. Otherwise we might end up -- with slop at the end of the current block, which can -- confuse the LDV profiler. + +-- Note [Self-recursive loop header] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Self-recursive loop header is required by loopification optimization (See +-- Note [Self-recursive tail calls] in StgCmmExpr). We emit it if: +-- +-- 1. There is information about self-loop in the FCode environment. We don't +-- check the binder (first component of the self_loop_info) because we are +-- certain that if the self-loop info is present then we are compiling the +-- binder body. Reason: the only possible way to get here with the +-- self_loop_info present is from closureCodeBody. +-- +-- 2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible +-- to preempt the heap check (see #367 for motivation behind this check). It +-- is True for heap checks placed at the entry to a function and +-- let-no-escape heap checks but false for other heap checks (eg. in case +-- alternatives or created from hand-written high-level Cmm). The second +-- check (isJust mb_stk_hwm) is true for heap checks at the entry to a +-- function and some heap checks created in hand-written Cmm. Otherwise it +-- is Nothing. In other words the only situation when both conditions are +-- true is when compiling stack and heap checks at the entry to a +-- function. This is the only situation when we want to emit a self-loop +-- label. |