diff options
-rw-r--r-- | compiler/cmm/CmmContFlowOpt.hs | 435 |
1 files changed, 271 insertions, 164 deletions
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index f39ab3c62b..7a008e2c0b 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -14,15 +14,127 @@ import BlockId import Cmm import CmmUtils import Maybes +import Panic import Control.Monad import Prelude hiding (succ, unzip, zip) ------------------------------------------------------------------------------ + +-- Note [What is shortcutting] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Consider this Cmm code: +-- +-- L1: ... +-- goto L2; +-- L2: goto L3; +-- L3: ... -- --- Control-flow optimisations +-- Here L2 is an empty block and contains only an unconditional branch +-- to L3. In this situation any block that jumps to L2 can jump +-- directly to L3: -- ------------------------------------------------------------------------------ +-- L1: ... +-- goto L3; +-- L2: goto L3; +-- L3: ... +-- +-- In this situation we say that we shortcut L2 to L3. One of +-- consequences of shortcutting is that some blocks of code may become +-- unreachable (in the example above this is true for L2). + + +-- Note [Control-flow optimisations] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- This optimisation does four things: +-- +-- - If a block finishes in an unconditonal branch to another block +-- and that is the only jump to that block we concatenate the +-- destination block at the end of the current one. +-- +-- - If a block finishes in an unconditional branch, we may be able +-- to shortcut the destination block. +-- +-- - If a block finishes in a call whose continuation block is a +-- goto, then we can shortcut the destination, making the +-- continuation block the destination of the goto - but see Note +-- [Shortcut call returns]. +-- +-- - For block finishing in conditional branch we try to invert the +-- condition and shortcut destination of alternatives. +-- +-- - For any block that is not a call we try to shortcut the +-- destination(s). +-- +-- Blocks are processed using postorder DFS traversal. A side effect +-- of determining traversal order with a graph search is elimination +-- of any blocks that are unreachable. +-- +-- Transformations are improved by working from the end of the graph +-- towards the beginning, because we may be able to perform many +-- shortcuts in one go. + + +-- Note [Shortcut call returns] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- We are going to maintain the "current" graph (BlockEnv CmmBlock) as +-- we go, and also a mapping from BlockId to BlockId, representing +-- continuation labels that we have renamed. This latter mapping is +-- important because we might shortcut a CmmCall continuation. For +-- example: +-- +-- Sp[0] = L +-- call g returns to L +-- L: goto M +-- M: ... +-- +-- So when we shortcut the L block, we need to replace not only +-- the continuation of the call, but also references to L in the +-- code (e.g. the assignment Sp[0] = L): +-- +-- Sp[0] = M +-- call g returns to M +-- M: ... +-- +-- So we keep track of which labels we have renamed and apply the mapping +-- at the end with replaceLabels. + + +-- Note [Shortcut call returns and proc-points] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Consider this code that you might get from a recursive +-- let-no-escape: +-- +-- goto L1 +-- L1: +-- if (Hp > HpLim) then L2 else L3 +-- L2: +-- call stg_gc_noregs returns to L4 +-- L4: +-- goto L1 +-- L3: +-- ... +-- goto L1 +-- +-- Then the control-flow optimiser shortcuts L4. But that turns L1 +-- into the call-return proc point, and every iteration of the loop +-- has to shuffle variables to and from the stack. So we must *not* +-- shortcut L4. +-- +-- Moreover not shortcutting call returns is probably fine. If L4 can +-- concat with its branch target then it will still do so. And we +-- save some compile time because we don't have to traverse all the +-- code in replaceLabels. +-- +-- However, we probably do want to do this if we are splitting proc +-- points, because L1 will be a proc-point anyway, so merging it with +-- L4 reduces the number of proc points. Unfortunately recursive +-- let-no-escapes won't generate very good code with proc-point +-- splitting on - we should probably compile them to explicitly use +-- the native calling convention instead. cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph cmmCfgOpts split g = fst (blockConcat split g) @@ -43,65 +155,15 @@ cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g' else info{ cit_lbl = infoTblLbl k' }) | otherwise = (k,info) - cmmCfgOptsProc _ top = top ------------------------------------------------------------------------------ --- --- Block concatenation --- ------------------------------------------------------------------------------ - --- This optimisation does three things: --- --- - If a block finishes with an unconditional branch, then we may --- be able to duplicate the block it points to and remove the --- branch. We do this if either --- a) the destination block is small (e.g. just another branch), or --- b) this is the only jump to this particular destination block. --- --- - If a block finishes in a call whose continuation block is a --- goto, then we can shortcut the destination, making the --- destination of the goto into the continuation. E.g. --- call g returns to L ==> call g returns to M --- L: goto M M: ...blah... --- M: ...blah... --- (but see Note [shortcut call returns]) --- --- - Remove any unreachable blocks from the graph. This is a side --- effect of starting with a postorder DFS traversal of the graph - --- Both transformations are improved by working from the end of the --- graph towards the beginning, because we may be able to perform many --- shortcuts in one go. - --- We need to walk over the blocks from the end back to the --- beginning. We are going to maintain the "current" graph --- (BlockEnv CmmBlock) as we go, and also a mapping from BlockId --- to BlockId, representing continuation labels that we have --- renamed. This latter mapping is important because we might --- shortcut a CmmCall continuation. For example: --- --- Sp[0] = L --- call g returns to L --- --- L: goto M --- --- M: ... --- --- So when we shortcut the L block, we need to replace not only --- the continuation of the call, but also references to L in the --- code (e.g. the assignment Sp[0] = L). So we keep track of --- which labels we have renamed and apply the mapping at the end --- with replaceLabels. - blockConcat :: Bool -> CmmGraph -> (CmmGraph, BlockEnv BlockId) blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map') where - -- we might be able to shortcut the entry BlockId itself. - -- remember to update the shortcut_map', since we also have to + -- We might be able to shortcut the entry BlockId itself. + -- Remember to update the shortcut_map, since we also have to -- update the info_tbls mapping now. (new_entry, shortcut_map') | Just entry_blk <- mapLookup entry_id new_blocks @@ -110,146 +172,196 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } | otherwise = (entry_id, shortcut_map) + -- blocks is a list of blocks in DFS postorder, while blockmap is + -- a map of blocks. We process each element from blocks and update + -- blockmap accordingly blocks = postorderDfs g blockmap = foldr addBlock emptyBody blocks - -- the initial blockmap is constructed from the postorderDfs result, - -- so that we automatically throw away unreachable blocks. - (new_blocks, shortcut_map) = - foldr maybe_concat (blockmap, mapEmpty) blocks + -- Accumulator contains three components: + -- * map of blocks in a graph + -- * map of shortcut labels. See Note [Shortcut call returns] + -- * map containing number of predecessors for each block. We discard + -- it after we process all blocks. + (new_blocks, shortcut_map, _) = + foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks + + -- Map of predecessors for initial graph. We increase number of + -- predecessors for entry block by one to denote that it is + -- target of a jump, even if no block in the current graph jumps + -- to it. + initialBackEdges = incPreds entry_id (predMap blocks) maybe_concat :: CmmBlock - -> (BlockEnv CmmBlock, BlockEnv BlockId) - -> (BlockEnv CmmBlock, BlockEnv BlockId) - maybe_concat block (blocks, shortcut_map) + -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int) + -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int) + maybe_concat block (blocks, shortcut_map, backEdges) + -- If: + -- (1) current block ends with unconditional branch to b' and + -- (2) it has exactly one predecessor (namely, current block) and + -- (3) we have not mapped any other label to b' + -- (see Note [Shortcut call returns]). + -- Then: + -- (1) append b' block at the end of current block + -- (2) remove b' from the map of blocks + -- (3) remove information about b' from predecessors map + -- + -- This guard must be first so that we always eliminate blocks that have + -- only one predecessor. If we had a target block that is both + -- shorcutable and has only one predecessor and attempted to shortcut it + -- first we would make that block unreachable but would not remove it + -- from the graph. | CmmBranch b' <- last , Just blk' <- mapLookup b' blocks - , shouldConcatWith b' blk' - = (mapInsert bid (splice head blk') blocks, shortcut_map) - - -- calls: if we can shortcut the continuation label, then - -- we must *also* remember to substitute for the label in the - -- code, because we will push it somewhere. - | splitting_procs -- Note [shortcut call returns] + , hasOnePredecessor b' + , hasNotBeenMappedTo b' shortcut_map + = let bid' = entryLabel blk' + in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks + , shortcut_map + , mapDelete b' backEdges ) + + -- If: + -- (1) current block ends with unconditional branch to b' and + -- (2) we can shortcut block b' + -- Then: + -- (1) concatenate b' at the end of current block, effectively + -- changing target of uncondtional jump from b' to dest + -- (2) increase number of predecessors of dest by 1 + -- (3) decrease number of predecessors of b' by 1 + | CmmBranch b' <- last + , Just blk' <- mapLookup b' blocks + , Just dest <- canShortcut blk' + = ( mapInsert bid (splice head blk') blocks, shortcut_map, + decPreds b' $ incPreds dest backEdges ) + + -- If: + -- (1) we are splitting proc points (see Note + -- [Shortcut call returns and proc-points]) and + -- (2) current block is a CmmCall or CmmForeignCall with + -- continuation b' and + -- (3) we can shortcut that continuation to dest + -- Then: + -- (1) we change continuation to point to b' + -- (2) create mapping from b' to dest + -- (3) increase number of predecessors of dest by 1 + -- (4) decrease number of predecessors of b' by 1 + -- + -- Later we will use replaceLabels to substitute all occurences of b' + -- with dest. + | splitting_procs , Just b' <- callContinuation_maybe last , Just blk' <- mapLookup b' blocks , Just dest <- canShortcut blk' - = (blocks, mapInsert b' dest shortcut_map) - -- replaceLabels will substitute dest for b' everywhere, later - - -- non-calls: see if we can shortcut any of the successors, - -- and check whether we should invert the conditional + = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks + , mapInsert b' dest shortcut_map + , decPreds b' $ incPreds dest backEdges ) + + -- If: + -- (1) a block does not end with a call + -- Then: + -- (1) if it ends with a conditional attempt to invert the + -- conditional + -- (2) attempt to shortcut all destination blocks + -- (3) if new successors of a block are different from the old ones + -- we update the of predecessors accordingly | Nothing <- callContinuation_maybe last - = ( mapInsert bid (blockJoinTail head swapcond_last) blocks - , shortcut_map ) - + = let oldSuccs = successors last + newSuccs = successors swapcond_last + in ( mapInsert bid (blockJoinTail head swapcond_last) blocks + , shortcut_map + , if oldSuccs == newSuccs + then backEdges + else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs ) + + -- Otherwise don't do anything | otherwise - = (blocks, shortcut_map) + = ( blocks, shortcut_map, backEdges ) where (head, last) = blockSplitTail block bid = entryLabel block + -- Changes continuation of a call to a specified label + update_cont dest = + case last of + CmmCall{} -> last { cml_cont = Just dest } + CmmForeignCall{} -> last { succ = dest } + _ -> panic "Can't shortcut continuation." + + -- Attempts to shortcut successors of last node shortcut_last = mapSuccessors shortcut last where shortcut l = case mapLookup l blocks of - Just b | Just dest <- canShortcut b -> dest + Just b | Just dest <- canShortcut b -> dest _otherwise -> l - -- for a conditional, we invert the conditional if that - -- would make it more likely that the branch-not-taken case - -- becomes a fallthrough. This helps the native codegen a - -- little bit, and probably has no effect on LLVM. It's - -- convenient to do it here, where we have the information - -- about predecessors. - -- + -- For a conditional, we invert the conditional if that would make it + -- more likely that the branch-not-taken case becomes a fallthrough. + -- This helps the native codegen a little bit, and probably has no + -- effect on LLVM. It's convenient to do it here, where we have the + -- information about predecessors. swapcond_last | CmmCondBranch cond t f <- shortcut_last , numPreds f > 1 - , numPreds t == 1 + , hasOnePredecessor t , Just cond' <- maybeInvertCmmExpr cond = CmmCondBranch cond' f t | otherwise = shortcut_last - - shouldConcatWith b block - | okToDuplicate block = True -- short enough to duplicate - | numPreds b == 1 = True -- only one predecessor: go for it - | otherwise = False - - numPreds bid = mapLookup bid backEdges `orElse` 0 - - canShortcut :: CmmBlock -> Maybe BlockId - canShortcut block - | (_, middle, CmmBranch dest) <- blockSplit block - , isEmptyBlock middle - = Just dest - | otherwise - = Nothing - - backEdges :: BlockEnv Int -- number of predecessors for each block - backEdges = mapInsertWith (+) entry_id 1 $ -- add 1 for the entry id - predMap blocks - - splice :: Block CmmNode C O -> CmmBlock -> CmmBlock - splice head rest = head `blockAppend` snd (blockSplitHead rest) - - + -- Number of predecessors for a block + numPreds bid = mapLookup bid backEdges `orElse` 0 + + hasOnePredecessor b = numPreds b == 1 + + hasNotBeenMappedTo :: BlockId -> BlockEnv BlockId -> Bool + hasNotBeenMappedTo b successor_map = mapMember b successor_map + +-- Functions for incrementing and decrementing number of predecessors. If +-- decrementing would set the predecessor count to 0, we remove entry from the +-- map. +-- Invariant: if a block has no predecessors it should be dropped from the +-- graph because it is unreachable. maybe_concat is constructed to maintain +-- that invariant, but calling replaceLabels may introduce unreachable blocks. +-- We rely on subsequent passes in the Cmm pipeline to remove unreachable +-- blocks. +incPreds, decPreds :: BlockId -> BlockEnv Int -> BlockEnv Int +incPreds bid edges = mapInsertWith (+) bid 1 edges +decPreds bid edges = case mapLookup bid edges of + Just preds | preds > 1 -> mapInsert bid (preds - 1) edges + Just _ -> mapDelete bid edges + _ -> edges + + +-- Checks if a block consists only of "goto dest". If it does than we return +-- "Just dest" label. See Note [What is shortcutting] +canShortcut :: CmmBlock -> Maybe BlockId +canShortcut block + | (_, middle, CmmBranch dest) <- blockSplit block + , isEmptyBlock middle + = Just dest + | otherwise + = Nothing + + +-- Concatenates two blocks. First one is assumed to be open on exit, the second +-- is assumed to be closed on entry (i.e. it has a label attached to it, which +-- the splice function removes by calling snd on result of blockSplitHead). +splice :: Block CmmNode C O -> CmmBlock -> CmmBlock +splice head rest = head `blockAppend` snd (blockSplitHead rest) + + +-- If node is a call with continuation call return Just label of that +-- continuation. Otherwise return Nothing. callContinuation_maybe :: CmmNode O C -> Maybe BlockId callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b callContinuation_maybe (CmmForeignCall { succ = b }) = Just b callContinuation_maybe _ = Nothing -okToDuplicate :: CmmBlock -> Bool -okToDuplicate block - = case blockSplit block of - (_, m, CmmBranch _) -> isEmptyBlock m - -- cheap and cheerful; we might expand this in the future to - -- e.g. spot blocks that represent a single instruction or two. - -- Be careful: a CmmCall can be more than one instruction, it - -- has a CmmExpr inside it. - _otherwise -> False - - -{- Note [shortcut call returns] - -Consider this code that you might get from a recursive let-no-escape: - - goto L1 - L1: - if (Hp > HpLim) then L2 else L3 - L2: - call stg_gc_noregs returns to L4 - L4: - goto L1 - L3: - ... - goto L1 - -Then the control-flow optimiser shortcuts L4. But that turns L1 -into the call-return proc point, and every iteration of the loop -has to shuffle variables to and from the stack. So we must *not* -shortcut L4. - -Moreover not shortcutting call returns is probably fine. If L4 can -concat with its branch target then it will still do so. And we -save some compile time because we don't have to traverse all the -code in replaceLabels. - -However, we probably do want to do this if we are splitting proc -points, because L1 will be a proc-point anyway, so merging it with L4 -reduces the number of proc points. Unfortunately recursive -let-no-escapes won't generate very good code with proc-point splitting -on - we should probably compile them to explicitly use the native -calling convention instead. --} - ------------------------------------------------------------------------- + -- Map over the CmmGraph, replacing each label with its mapping in the -- supplied BlockEnv. - replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph replaceLabels env g | mapNull env = g @@ -275,19 +387,14 @@ replaceLabels env g mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f ----------------------------------------------------------------- --- Build a map from a block to its set of predecessors. Very useful. - +-- Build a map from a block to its set of predecessors. predMap :: [CmmBlock] -> BlockEnv Int -predMap blocks = foldr add_preds mapEmpty blocks -- find the back edges +predMap blocks = foldr add_preds mapEmpty blocks where add_preds block env = foldr add env (successors block) where add lbl env = mapInsertWith (+) lbl 1 env ------------------------------------------------------------------------------ --- -- Removing unreachable blocks - removeUnreachableBlocksProc :: CmmDecl -> CmmDecl removeUnreachableBlocksProc (CmmProc info lbl live g) = CmmProc info lbl live (removeUnreachableBlocks g) |