summaryrefslogtreecommitdiff
path: root/compiler/GHC/Cmm/ContFlowOpt.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/GHC/Cmm/ContFlowOpt.hs')
-rw-r--r--compiler/GHC/Cmm/ContFlowOpt.hs451
1 files changed, 451 insertions, 0 deletions
diff --git a/compiler/GHC/Cmm/ContFlowOpt.hs b/compiler/GHC/Cmm/ContFlowOpt.hs
new file mode 100644
index 0000000000..7765972d02
--- /dev/null
+++ b/compiler/GHC/Cmm/ContFlowOpt.hs
@@ -0,0 +1,451 @@
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE BangPatterns #-}
+{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
+module GHC.Cmm.ContFlowOpt
+ ( cmmCfgOpts
+ , cmmCfgOptsProc
+ , removeUnreachableBlocksProc
+ , replaceLabels
+ )
+where
+
+import GhcPrelude hiding (succ, unzip, zip)
+
+import GHC.Cmm.Dataflow.Block
+import GHC.Cmm.Dataflow.Collections
+import GHC.Cmm.Dataflow.Graph
+import GHC.Cmm.Dataflow.Label
+import GHC.Cmm.BlockId
+import GHC.Cmm
+import GHC.Cmm.Utils
+import GHC.Cmm.Switch (mapSwitchTargets, switchTargetsToList)
+import Maybes
+import Panic
+import Util
+
+import Control.Monad
+
+
+-- Note [What is shortcutting]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Consider this Cmm code:
+--
+-- L1: ...
+-- goto L2;
+-- L2: goto L3;
+-- L3: ...
+--
+-- Here L2 is an empty block and contains only an unconditional branch
+-- to L3. In this situation any block that jumps to L2 can jump
+-- directly to L3:
+--
+-- L1: ...
+-- goto L3;
+-- L2: goto L3;
+-- L3: ...
+--
+-- In this situation we say that we shortcut L2 to L3. One of
+-- consequences of shortcutting is that some blocks of code may become
+-- unreachable (in the example above this is true for L2).
+
+
+-- Note [Control-flow optimisations]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- This optimisation does three things:
+--
+-- - If a block finishes in an unconditional branch to another block
+-- and that is the only jump to that block we concatenate the
+-- destination block at the end of the current one.
+--
+-- - If a block finishes in a call whose continuation block is a
+-- goto, then we can shortcut the destination, making the
+-- continuation block the destination of the goto - but see Note
+-- [Shortcut call returns].
+--
+-- - For any block that is not a call we try to shortcut the
+-- destination(s). Additionally, if a block ends with a
+-- conditional branch we try to invert the condition.
+--
+-- Blocks are processed using postorder DFS traversal. A side effect
+-- of determining traversal order with a graph search is elimination
+-- of any blocks that are unreachable.
+--
+-- Transformations are improved by working from the end of the graph
+-- towards the beginning, because we may be able to perform many
+-- shortcuts in one go.
+
+
+-- Note [Shortcut call returns]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- We are going to maintain the "current" graph (LabelMap CmmBlock) as
+-- we go, and also a mapping from BlockId to BlockId, representing
+-- continuation labels that we have renamed. This latter mapping is
+-- important because we might shortcut a CmmCall continuation. For
+-- example:
+--
+-- Sp[0] = L
+-- call g returns to L
+-- L: goto M
+-- M: ...
+--
+-- So when we shortcut the L block, we need to replace not only
+-- the continuation of the call, but also references to L in the
+-- code (e.g. the assignment Sp[0] = L):
+--
+-- Sp[0] = M
+-- call g returns to M
+-- M: ...
+--
+-- So we keep track of which labels we have renamed and apply the mapping
+-- at the end with replaceLabels.
+
+
+-- Note [Shortcut call returns and proc-points]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Consider this code that you might get from a recursive
+-- let-no-escape:
+--
+-- goto L1
+-- L1:
+-- if (Hp > HpLim) then L2 else L3
+-- L2:
+-- call stg_gc_noregs returns to L4
+-- L4:
+-- goto L1
+-- L3:
+-- ...
+-- goto L1
+--
+-- Then the control-flow optimiser shortcuts L4. But that turns L1
+-- into the call-return proc point, and every iteration of the loop
+-- has to shuffle variables to and from the stack. So we must *not*
+-- shortcut L4.
+--
+-- Moreover not shortcutting call returns is probably fine. If L4 can
+-- concat with its branch target then it will still do so. And we
+-- save some compile time because we don't have to traverse all the
+-- code in replaceLabels.
+--
+-- However, we probably do want to do this if we are splitting proc
+-- points, because L1 will be a proc-point anyway, so merging it with
+-- L4 reduces the number of proc points. Unfortunately recursive
+-- let-no-escapes won't generate very good code with proc-point
+-- splitting on - we should probably compile them to explicitly use
+-- the native calling convention instead.
+
+cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
+cmmCfgOpts split g = fst (blockConcat split g)
+
+cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
+cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
+ where (g', env) = blockConcat split g
+ info' = info{ info_tbls = new_info_tbls }
+ new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
+
+ -- If we changed any labels, then we have to update the info tables
+ -- too, except for the top-level info table because that might be
+ -- referred to by other procs.
+ upd_info (k,info)
+ | Just k' <- mapLookup k env
+ = (k', if k' == g_entry g'
+ then info
+ else info{ cit_lbl = infoTblLbl k' })
+ | otherwise
+ = (k,info)
+cmmCfgOptsProc _ top = top
+
+
+blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
+blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
+ = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
+ where
+ -- We might be able to shortcut the entry BlockId itself.
+ -- Remember to update the shortcut_map, since we also have to
+ -- update the info_tbls mapping now.
+ (new_entry, shortcut_map')
+ | Just entry_blk <- mapLookup entry_id new_blocks
+ , Just dest <- canShortcut entry_blk
+ = (dest, mapInsert entry_id dest shortcut_map)
+ | otherwise
+ = (entry_id, shortcut_map)
+
+ -- blocks are sorted in reverse postorder, but we want to go from the exit
+ -- towards beginning, so we use foldr below.
+ blocks = revPostorder g
+ blockmap = foldl' (flip addBlock) emptyBody blocks
+
+ -- Accumulator contains three components:
+ -- * map of blocks in a graph
+ -- * map of shortcut labels. See Note [Shortcut call returns]
+ -- * map containing number of predecessors for each block. We discard
+ -- it after we process all blocks.
+ (new_blocks, shortcut_map, _) =
+ foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
+
+ -- Map of predecessors for initial graph. We increase number of
+ -- predecessors for entry block by one to denote that it is
+ -- target of a jump, even if no block in the current graph jumps
+ -- to it.
+ initialBackEdges = incPreds entry_id (predMap blocks)
+
+ maybe_concat :: CmmBlock
+ -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
+ -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
+ maybe_concat block (!blocks, !shortcut_map, !backEdges)
+ -- If:
+ -- (1) current block ends with unconditional branch to b' and
+ -- (2) it has exactly one predecessor (namely, current block)
+ --
+ -- Then:
+ -- (1) append b' block at the end of current block
+ -- (2) remove b' from the map of blocks
+ -- (3) remove information about b' from predecessors map
+ --
+ -- Since we know that the block has only one predecessor we call
+ -- mapDelete directly instead of calling decPreds.
+ --
+ -- Note that we always maintain an up-to-date list of predecessors, so
+ -- we can ignore the contents of shortcut_map
+ | CmmBranch b' <- last
+ , hasOnePredecessor b'
+ , Just blk' <- mapLookup b' blocks
+ = let bid' = entryLabel blk'
+ in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
+ , shortcut_map
+ , mapDelete b' backEdges )
+
+ -- If:
+ -- (1) we are splitting proc points (see Note
+ -- [Shortcut call returns and proc-points]) and
+ -- (2) current block is a CmmCall or CmmForeignCall with
+ -- continuation b' and
+ -- (3) we can shortcut that continuation to dest
+ -- Then:
+ -- (1) we change continuation to point to b'
+ -- (2) create mapping from b' to dest
+ -- (3) increase number of predecessors of dest by 1
+ -- (4) decrease number of predecessors of b' by 1
+ --
+ -- Later we will use replaceLabels to substitute all occurrences of b'
+ -- with dest.
+ | splitting_procs
+ , Just b' <- callContinuation_maybe last
+ , Just blk' <- mapLookup b' blocks
+ , Just dest <- canShortcut blk'
+ = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
+ , mapInsert b' dest shortcut_map
+ , decPreds b' $ incPreds dest backEdges )
+
+ -- If:
+ -- (1) a block does not end with a call
+ -- Then:
+ -- (1) if it ends with a conditional attempt to invert the
+ -- conditional
+ -- (2) attempt to shortcut all destination blocks
+ -- (3) if new successors of a block are different from the old ones
+ -- update the of predecessors accordingly
+ --
+ -- A special case of this is a situation when a block ends with an
+ -- unconditional jump to a block that can be shortcut.
+ | Nothing <- callContinuation_maybe last
+ = let oldSuccs = successors last
+ newSuccs = successors rewrite_last
+ in ( mapInsert bid (blockJoinTail head rewrite_last) blocks
+ , shortcut_map
+ , if oldSuccs == newSuccs
+ then backEdges
+ else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
+
+ -- Otherwise don't do anything
+ | otherwise
+ = ( blocks, shortcut_map, backEdges )
+ where
+ (head, last) = blockSplitTail block
+ bid = entryLabel block
+
+ -- Changes continuation of a call to a specified label
+ update_cont dest =
+ case last of
+ CmmCall{} -> last { cml_cont = Just dest }
+ CmmForeignCall{} -> last { succ = dest }
+ _ -> panic "Can't shortcut continuation."
+
+ -- Attempts to shortcut successors of last node
+ shortcut_last = mapSuccessors shortcut last
+ where
+ shortcut l =
+ case mapLookup l blocks of
+ Just b | Just dest <- canShortcut b -> dest
+ _otherwise -> l
+
+ rewrite_last
+ -- Sometimes we can get rid of the conditional completely.
+ | CmmCondBranch _cond t f _l <- shortcut_last
+ , t == f
+ = CmmBranch t
+
+ -- See Note [Invert Cmm conditionals]
+ | CmmCondBranch cond t f l <- shortcut_last
+ , hasOnePredecessor t -- inverting will make t a fallthrough
+ , likelyTrue l || (numPreds f > 1)
+ , Just cond' <- maybeInvertCmmExpr cond
+ = CmmCondBranch cond' f t (invertLikeliness l)
+
+ -- If all jump destinations of a switch go to the
+ -- same target eliminate the switch.
+ | CmmSwitch _expr targets <- shortcut_last
+ , (t:ts) <- switchTargetsToList targets
+ , all (== t) ts
+ = CmmBranch t
+
+ | otherwise
+ = shortcut_last
+
+ likelyTrue (Just True) = True
+ likelyTrue _ = False
+
+ invertLikeliness :: Maybe Bool -> Maybe Bool
+ invertLikeliness = fmap not
+
+ -- Number of predecessors for a block
+ numPreds bid = mapLookup bid backEdges `orElse` 0
+
+ hasOnePredecessor b = numPreds b == 1
+
+{-
+ Note [Invert Cmm conditionals]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The native code generator always produces jumps to the true branch.
+ Falling through to the false branch is however faster. So we try to
+ arrange for that to happen.
+ This means we invert the condition if:
+ * The likely path will become a fallthrough.
+ * We can't guarantee a fallthrough for the false branch but for the
+ true branch.
+
+ In some cases it's faster to avoid inverting when the false branch is likely.
+ However determining when that is the case is neither easy nor cheap so for
+ now we always invert as this produces smaller binaries and code that is
+ equally fast on average. (On an i7-6700K)
+
+ TODO:
+ There is also the edge case when both branches have multiple predecessors.
+ In this case we could assume that we will end up with a jump for BOTH
+ branches. In this case it might be best to put the likely path in the true
+ branch especially if there are large numbers of predecessors as this saves
+ us the jump thats not taken. However I haven't tested this and as of early
+ 2018 we almost never generate cmm where this would apply.
+-}
+
+-- Functions for incrementing and decrementing number of predecessors. If
+-- decrementing would set the predecessor count to 0, we remove entry from the
+-- map.
+-- Invariant: if a block has no predecessors it should be dropped from the
+-- graph because it is unreachable. maybe_concat is constructed to maintain
+-- that invariant, but calling replaceLabels may introduce unreachable blocks.
+-- We rely on subsequent passes in the Cmm pipeline to remove unreachable
+-- blocks.
+incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
+incPreds bid edges = mapInsertWith (+) bid 1 edges
+decPreds bid edges = case mapLookup bid edges of
+ Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
+ Just _ -> mapDelete bid edges
+ _ -> edges
+
+
+-- Checks if a block consists only of "goto dest". If it does than we return
+-- "Just dest" label. See Note [What is shortcutting]
+canShortcut :: CmmBlock -> Maybe BlockId
+canShortcut block
+ | (_, middle, CmmBranch dest) <- blockSplit block
+ , all dont_care $ blockToList middle
+ = Just dest
+ | otherwise
+ = Nothing
+ where dont_care CmmComment{} = True
+ dont_care CmmTick{} = True
+ dont_care _other = False
+
+-- Concatenates two blocks. First one is assumed to be open on exit, the second
+-- is assumed to be closed on entry (i.e. it has a label attached to it, which
+-- the splice function removes by calling snd on result of blockSplitHead).
+splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
+splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
+ where (CmmEntry lbl sc0, code0) = blockSplitHead head
+ (CmmEntry _ sc1, code1) = blockSplitHead rest
+ entry = CmmEntry lbl (combineTickScopes sc0 sc1)
+
+-- If node is a call with continuation call return Just label of that
+-- continuation. Otherwise return Nothing.
+callContinuation_maybe :: CmmNode O C -> Maybe BlockId
+callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
+callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
+callContinuation_maybe _ = Nothing
+
+
+-- Map over the CmmGraph, replacing each label with its mapping in the
+-- supplied LabelMap.
+replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
+replaceLabels env g
+ | mapNull env = g
+ | otherwise = replace_eid $ mapGraphNodes1 txnode g
+ where
+ replace_eid g = g {g_entry = lookup (g_entry g)}
+ lookup id = mapLookup id env `orElse` id
+
+ txnode :: CmmNode e x -> CmmNode e x
+ txnode (CmmBranch bid) = CmmBranch (lookup bid)
+ txnode (CmmCondBranch p t f l) =
+ mkCmmCondBranch (exp p) (lookup t) (lookup f) l
+ txnode (CmmSwitch e ids) =
+ CmmSwitch (exp e) (mapSwitchTargets lookup ids)
+ txnode (CmmCall t k rg a res r) =
+ CmmCall (exp t) (liftM lookup k) rg a res r
+ txnode fc@CmmForeignCall{} =
+ fc{ args = map exp (args fc), succ = lookup (succ fc) }
+ txnode other = mapExpDeep exp other
+
+ exp :: CmmExpr -> CmmExpr
+ exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
+ exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
+ exp e = e
+
+mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
+mkCmmCondBranch p t f l =
+ if t == f then CmmBranch t else CmmCondBranch p t f l
+
+-- Build a map from a block to its set of predecessors.
+predMap :: [CmmBlock] -> LabelMap Int
+predMap blocks = foldr add_preds mapEmpty blocks
+ where
+ add_preds block env = foldr add env (successors block)
+ where add lbl env = mapInsertWith (+) lbl 1 env
+
+-- Removing unreachable blocks
+removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
+removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
+ | used_blocks `lengthLessThan` mapSize (toBlockMap g)
+ = CmmProc info' lbl live g'
+ | otherwise
+ = proc
+ where
+ g' = ofBlockList (g_entry g) used_blocks
+ info' = info { info_tbls = keep_used (info_tbls info) }
+ -- Remove any info_tbls for unreachable
+
+ keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
+ keep_used bs = mapFoldlWithKey keep mapEmpty bs
+
+ keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable
+ keep env l i | l `setMember` used_lbls = mapInsert l i env
+ | otherwise = env
+
+ used_blocks :: [CmmBlock]
+ used_blocks = revPostorder g
+
+ used_lbls :: LabelSet
+ used_lbls = setFromList $ map entryLabel used_blocks