summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/cmm/CmmContFlowOpt.hs435
1 files changed, 271 insertions, 164 deletions
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs
index f39ab3c62b..7a008e2c0b 100644
--- a/compiler/cmm/CmmContFlowOpt.hs
+++ b/compiler/cmm/CmmContFlowOpt.hs
@@ -14,15 +14,127 @@ import BlockId
import Cmm
import CmmUtils
import Maybes
+import Panic
import Control.Monad
import Prelude hiding (succ, unzip, zip)
------------------------------------------------------------------------------
+
+-- Note [What is shortcutting]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Consider this Cmm code:
+--
+-- L1: ...
+-- goto L2;
+-- L2: goto L3;
+-- L3: ...
--
--- Control-flow optimisations
+-- Here L2 is an empty block and contains only an unconditional branch
+-- to L3. In this situation any block that jumps to L2 can jump
+-- directly to L3:
--
------------------------------------------------------------------------------
+-- L1: ...
+-- goto L3;
+-- L2: goto L3;
+-- L3: ...
+--
+-- In this situation we say that we shortcut L2 to L3. One of
+-- consequences of shortcutting is that some blocks of code may become
+-- unreachable (in the example above this is true for L2).
+
+
+-- Note [Control-flow optimisations]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- This optimisation does four things:
+--
+-- - If a block finishes in an unconditonal branch to another block
+-- and that is the only jump to that block we concatenate the
+-- destination block at the end of the current one.
+--
+-- - If a block finishes in an unconditional branch, we may be able
+-- to shortcut the destination block.
+--
+-- - If a block finishes in a call whose continuation block is a
+-- goto, then we can shortcut the destination, making the
+-- continuation block the destination of the goto - but see Note
+-- [Shortcut call returns].
+--
+-- - For block finishing in conditional branch we try to invert the
+-- condition and shortcut destination of alternatives.
+--
+-- - For any block that is not a call we try to shortcut the
+-- destination(s).
+--
+-- Blocks are processed using postorder DFS traversal. A side effect
+-- of determining traversal order with a graph search is elimination
+-- of any blocks that are unreachable.
+--
+-- Transformations are improved by working from the end of the graph
+-- towards the beginning, because we may be able to perform many
+-- shortcuts in one go.
+
+
+-- Note [Shortcut call returns]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- We are going to maintain the "current" graph (BlockEnv CmmBlock) as
+-- we go, and also a mapping from BlockId to BlockId, representing
+-- continuation labels that we have renamed. This latter mapping is
+-- important because we might shortcut a CmmCall continuation. For
+-- example:
+--
+-- Sp[0] = L
+-- call g returns to L
+-- L: goto M
+-- M: ...
+--
+-- So when we shortcut the L block, we need to replace not only
+-- the continuation of the call, but also references to L in the
+-- code (e.g. the assignment Sp[0] = L):
+--
+-- Sp[0] = M
+-- call g returns to M
+-- M: ...
+--
+-- So we keep track of which labels we have renamed and apply the mapping
+-- at the end with replaceLabels.
+
+
+-- Note [Shortcut call returns and proc-points]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Consider this code that you might get from a recursive
+-- let-no-escape:
+--
+-- goto L1
+-- L1:
+-- if (Hp > HpLim) then L2 else L3
+-- L2:
+-- call stg_gc_noregs returns to L4
+-- L4:
+-- goto L1
+-- L3:
+-- ...
+-- goto L1
+--
+-- Then the control-flow optimiser shortcuts L4. But that turns L1
+-- into the call-return proc point, and every iteration of the loop
+-- has to shuffle variables to and from the stack. So we must *not*
+-- shortcut L4.
+--
+-- Moreover not shortcutting call returns is probably fine. If L4 can
+-- concat with its branch target then it will still do so. And we
+-- save some compile time because we don't have to traverse all the
+-- code in replaceLabels.
+--
+-- However, we probably do want to do this if we are splitting proc
+-- points, because L1 will be a proc-point anyway, so merging it with
+-- L4 reduces the number of proc points. Unfortunately recursive
+-- let-no-escapes won't generate very good code with proc-point
+-- splitting on - we should probably compile them to explicitly use
+-- the native calling convention instead.
cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
cmmCfgOpts split g = fst (blockConcat split g)
@@ -43,65 +155,15 @@ cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
else info{ cit_lbl = infoTblLbl k' })
| otherwise
= (k,info)
-
cmmCfgOptsProc _ top = top
------------------------------------------------------------------------------
---
--- Block concatenation
---
------------------------------------------------------------------------------
-
--- This optimisation does three things:
---
--- - If a block finishes with an unconditional branch, then we may
--- be able to duplicate the block it points to and remove the
--- branch. We do this if either
--- a) the destination block is small (e.g. just another branch), or
--- b) this is the only jump to this particular destination block.
---
--- - If a block finishes in a call whose continuation block is a
--- goto, then we can shortcut the destination, making the
--- destination of the goto into the continuation. E.g.
--- call g returns to L ==> call g returns to M
--- L: goto M M: ...blah...
--- M: ...blah...
--- (but see Note [shortcut call returns])
---
--- - Remove any unreachable blocks from the graph. This is a side
--- effect of starting with a postorder DFS traversal of the graph
-
--- Both transformations are improved by working from the end of the
--- graph towards the beginning, because we may be able to perform many
--- shortcuts in one go.
-
--- We need to walk over the blocks from the end back to the
--- beginning. We are going to maintain the "current" graph
--- (BlockEnv CmmBlock) as we go, and also a mapping from BlockId
--- to BlockId, representing continuation labels that we have
--- renamed. This latter mapping is important because we might
--- shortcut a CmmCall continuation. For example:
---
--- Sp[0] = L
--- call g returns to L
---
--- L: goto M
---
--- M: ...
---
--- So when we shortcut the L block, we need to replace not only
--- the continuation of the call, but also references to L in the
--- code (e.g. the assignment Sp[0] = L). So we keep track of
--- which labels we have renamed and apply the mapping at the end
--- with replaceLabels.
-
blockConcat :: Bool -> CmmGraph -> (CmmGraph, BlockEnv BlockId)
blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
= (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
where
- -- we might be able to shortcut the entry BlockId itself.
- -- remember to update the shortcut_map', since we also have to
+ -- We might be able to shortcut the entry BlockId itself.
+ -- Remember to update the shortcut_map, since we also have to
-- update the info_tbls mapping now.
(new_entry, shortcut_map')
| Just entry_blk <- mapLookup entry_id new_blocks
@@ -110,146 +172,196 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
| otherwise
= (entry_id, shortcut_map)
+ -- blocks is a list of blocks in DFS postorder, while blockmap is
+ -- a map of blocks. We process each element from blocks and update
+ -- blockmap accordingly
blocks = postorderDfs g
blockmap = foldr addBlock emptyBody blocks
- -- the initial blockmap is constructed from the postorderDfs result,
- -- so that we automatically throw away unreachable blocks.
- (new_blocks, shortcut_map) =
- foldr maybe_concat (blockmap, mapEmpty) blocks
+ -- Accumulator contains three components:
+ -- * map of blocks in a graph
+ -- * map of shortcut labels. See Note [Shortcut call returns]
+ -- * map containing number of predecessors for each block. We discard
+ -- it after we process all blocks.
+ (new_blocks, shortcut_map, _) =
+ foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
+
+ -- Map of predecessors for initial graph. We increase number of
+ -- predecessors for entry block by one to denote that it is
+ -- target of a jump, even if no block in the current graph jumps
+ -- to it.
+ initialBackEdges = incPreds entry_id (predMap blocks)
maybe_concat :: CmmBlock
- -> (BlockEnv CmmBlock, BlockEnv BlockId)
- -> (BlockEnv CmmBlock, BlockEnv BlockId)
- maybe_concat block (blocks, shortcut_map)
+ -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int)
+ -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int)
+ maybe_concat block (blocks, shortcut_map, backEdges)
+ -- If:
+ -- (1) current block ends with unconditional branch to b' and
+ -- (2) it has exactly one predecessor (namely, current block) and
+ -- (3) we have not mapped any other label to b'
+ -- (see Note [Shortcut call returns]).
+ -- Then:
+ -- (1) append b' block at the end of current block
+ -- (2) remove b' from the map of blocks
+ -- (3) remove information about b' from predecessors map
+ --
+ -- This guard must be first so that we always eliminate blocks that have
+ -- only one predecessor. If we had a target block that is both
+ -- shorcutable and has only one predecessor and attempted to shortcut it
+ -- first we would make that block unreachable but would not remove it
+ -- from the graph.
| CmmBranch b' <- last
, Just blk' <- mapLookup b' blocks
- , shouldConcatWith b' blk'
- = (mapInsert bid (splice head blk') blocks, shortcut_map)
-
- -- calls: if we can shortcut the continuation label, then
- -- we must *also* remember to substitute for the label in the
- -- code, because we will push it somewhere.
- | splitting_procs -- Note [shortcut call returns]
+ , hasOnePredecessor b'
+ , hasNotBeenMappedTo b' shortcut_map
+ = let bid' = entryLabel blk'
+ in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
+ , shortcut_map
+ , mapDelete b' backEdges )
+
+ -- If:
+ -- (1) current block ends with unconditional branch to b' and
+ -- (2) we can shortcut block b'
+ -- Then:
+ -- (1) concatenate b' at the end of current block, effectively
+ -- changing target of uncondtional jump from b' to dest
+ -- (2) increase number of predecessors of dest by 1
+ -- (3) decrease number of predecessors of b' by 1
+ | CmmBranch b' <- last
+ , Just blk' <- mapLookup b' blocks
+ , Just dest <- canShortcut blk'
+ = ( mapInsert bid (splice head blk') blocks, shortcut_map,
+ decPreds b' $ incPreds dest backEdges )
+
+ -- If:
+ -- (1) we are splitting proc points (see Note
+ -- [Shortcut call returns and proc-points]) and
+ -- (2) current block is a CmmCall or CmmForeignCall with
+ -- continuation b' and
+ -- (3) we can shortcut that continuation to dest
+ -- Then:
+ -- (1) we change continuation to point to b'
+ -- (2) create mapping from b' to dest
+ -- (3) increase number of predecessors of dest by 1
+ -- (4) decrease number of predecessors of b' by 1
+ --
+ -- Later we will use replaceLabels to substitute all occurences of b'
+ -- with dest.
+ | splitting_procs
, Just b' <- callContinuation_maybe last
, Just blk' <- mapLookup b' blocks
, Just dest <- canShortcut blk'
- = (blocks, mapInsert b' dest shortcut_map)
- -- replaceLabels will substitute dest for b' everywhere, later
-
- -- non-calls: see if we can shortcut any of the successors,
- -- and check whether we should invert the conditional
+ = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
+ , mapInsert b' dest shortcut_map
+ , decPreds b' $ incPreds dest backEdges )
+
+ -- If:
+ -- (1) a block does not end with a call
+ -- Then:
+ -- (1) if it ends with a conditional attempt to invert the
+ -- conditional
+ -- (2) attempt to shortcut all destination blocks
+ -- (3) if new successors of a block are different from the old ones
+ -- we update the of predecessors accordingly
| Nothing <- callContinuation_maybe last
- = ( mapInsert bid (blockJoinTail head swapcond_last) blocks
- , shortcut_map )
-
+ = let oldSuccs = successors last
+ newSuccs = successors swapcond_last
+ in ( mapInsert bid (blockJoinTail head swapcond_last) blocks
+ , shortcut_map
+ , if oldSuccs == newSuccs
+ then backEdges
+ else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
+
+ -- Otherwise don't do anything
| otherwise
- = (blocks, shortcut_map)
+ = ( blocks, shortcut_map, backEdges )
where
(head, last) = blockSplitTail block
bid = entryLabel block
+ -- Changes continuation of a call to a specified label
+ update_cont dest =
+ case last of
+ CmmCall{} -> last { cml_cont = Just dest }
+ CmmForeignCall{} -> last { succ = dest }
+ _ -> panic "Can't shortcut continuation."
+
+ -- Attempts to shortcut successors of last node
shortcut_last = mapSuccessors shortcut last
where
shortcut l =
case mapLookup l blocks of
- Just b | Just dest <- canShortcut b -> dest
+ Just b | Just dest <- canShortcut b -> dest
_otherwise -> l
- -- for a conditional, we invert the conditional if that
- -- would make it more likely that the branch-not-taken case
- -- becomes a fallthrough. This helps the native codegen a
- -- little bit, and probably has no effect on LLVM. It's
- -- convenient to do it here, where we have the information
- -- about predecessors.
- --
+ -- For a conditional, we invert the conditional if that would make it
+ -- more likely that the branch-not-taken case becomes a fallthrough.
+ -- This helps the native codegen a little bit, and probably has no
+ -- effect on LLVM. It's convenient to do it here, where we have the
+ -- information about predecessors.
swapcond_last
| CmmCondBranch cond t f <- shortcut_last
, numPreds f > 1
- , numPreds t == 1
+ , hasOnePredecessor t
, Just cond' <- maybeInvertCmmExpr cond
= CmmCondBranch cond' f t
| otherwise
= shortcut_last
-
- shouldConcatWith b block
- | okToDuplicate block = True -- short enough to duplicate
- | numPreds b == 1 = True -- only one predecessor: go for it
- | otherwise = False
-
- numPreds bid = mapLookup bid backEdges `orElse` 0
-
- canShortcut :: CmmBlock -> Maybe BlockId
- canShortcut block
- | (_, middle, CmmBranch dest) <- blockSplit block
- , isEmptyBlock middle
- = Just dest
- | otherwise
- = Nothing
-
- backEdges :: BlockEnv Int -- number of predecessors for each block
- backEdges = mapInsertWith (+) entry_id 1 $ -- add 1 for the entry id
- predMap blocks
-
- splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
- splice head rest = head `blockAppend` snd (blockSplitHead rest)
-
-
+ -- Number of predecessors for a block
+ numPreds bid = mapLookup bid backEdges `orElse` 0
+
+ hasOnePredecessor b = numPreds b == 1
+
+ hasNotBeenMappedTo :: BlockId -> BlockEnv BlockId -> Bool
+ hasNotBeenMappedTo b successor_map = mapMember b successor_map
+
+-- Functions for incrementing and decrementing number of predecessors. If
+-- decrementing would set the predecessor count to 0, we remove entry from the
+-- map.
+-- Invariant: if a block has no predecessors it should be dropped from the
+-- graph because it is unreachable. maybe_concat is constructed to maintain
+-- that invariant, but calling replaceLabels may introduce unreachable blocks.
+-- We rely on subsequent passes in the Cmm pipeline to remove unreachable
+-- blocks.
+incPreds, decPreds :: BlockId -> BlockEnv Int -> BlockEnv Int
+incPreds bid edges = mapInsertWith (+) bid 1 edges
+decPreds bid edges = case mapLookup bid edges of
+ Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
+ Just _ -> mapDelete bid edges
+ _ -> edges
+
+
+-- Checks if a block consists only of "goto dest". If it does than we return
+-- "Just dest" label. See Note [What is shortcutting]
+canShortcut :: CmmBlock -> Maybe BlockId
+canShortcut block
+ | (_, middle, CmmBranch dest) <- blockSplit block
+ , isEmptyBlock middle
+ = Just dest
+ | otherwise
+ = Nothing
+
+
+-- Concatenates two blocks. First one is assumed to be open on exit, the second
+-- is assumed to be closed on entry (i.e. it has a label attached to it, which
+-- the splice function removes by calling snd on result of blockSplitHead).
+splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
+splice head rest = head `blockAppend` snd (blockSplitHead rest)
+
+
+-- If node is a call with continuation call return Just label of that
+-- continuation. Otherwise return Nothing.
callContinuation_maybe :: CmmNode O C -> Maybe BlockId
callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
callContinuation_maybe _ = Nothing
-okToDuplicate :: CmmBlock -> Bool
-okToDuplicate block
- = case blockSplit block of
- (_, m, CmmBranch _) -> isEmptyBlock m
- -- cheap and cheerful; we might expand this in the future to
- -- e.g. spot blocks that represent a single instruction or two.
- -- Be careful: a CmmCall can be more than one instruction, it
- -- has a CmmExpr inside it.
- _otherwise -> False
-
-
-{- Note [shortcut call returns]
-
-Consider this code that you might get from a recursive let-no-escape:
-
- goto L1
- L1:
- if (Hp > HpLim) then L2 else L3
- L2:
- call stg_gc_noregs returns to L4
- L4:
- goto L1
- L3:
- ...
- goto L1
-
-Then the control-flow optimiser shortcuts L4. But that turns L1
-into the call-return proc point, and every iteration of the loop
-has to shuffle variables to and from the stack. So we must *not*
-shortcut L4.
-
-Moreover not shortcutting call returns is probably fine. If L4 can
-concat with its branch target then it will still do so. And we
-save some compile time because we don't have to traverse all the
-code in replaceLabels.
-
-However, we probably do want to do this if we are splitting proc
-points, because L1 will be a proc-point anyway, so merging it with L4
-reduces the number of proc points. Unfortunately recursive
-let-no-escapes won't generate very good code with proc-point splitting
-on - we should probably compile them to explicitly use the native
-calling convention instead.
--}
-
-------------------------------------------------------------------------
+
-- Map over the CmmGraph, replacing each label with its mapping in the
-- supplied BlockEnv.
-
replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph
replaceLabels env g
| mapNull env = g
@@ -275,19 +387,14 @@ replaceLabels env g
mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C
mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f
-----------------------------------------------------------------
--- Build a map from a block to its set of predecessors. Very useful.
-
+-- Build a map from a block to its set of predecessors.
predMap :: [CmmBlock] -> BlockEnv Int
-predMap blocks = foldr add_preds mapEmpty blocks -- find the back edges
+predMap blocks = foldr add_preds mapEmpty blocks
where
add_preds block env = foldr add env (successors block)
where add lbl env = mapInsertWith (+) lbl 1 env
------------------------------------------------------------------------------
---
-- Removing unreachable blocks
-
removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
removeUnreachableBlocksProc (CmmProc info lbl live g)
= CmmProc info lbl live (removeUnreachableBlocks g)