summaryrefslogtreecommitdiff
path: root/compiler/cmm/CmmContFlowOpt.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/cmm/CmmContFlowOpt.hs')
-rw-r--r--compiler/cmm/CmmContFlowOpt.hs83
1 files changed, 54 insertions, 29 deletions
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs
index 219b68e42a..92dd7abba5 100644
--- a/compiler/cmm/CmmContFlowOpt.hs
+++ b/compiler/cmm/CmmContFlowOpt.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE GADTs #-}
+{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
module CmmContFlowOpt
( cmmCfgOpts
@@ -8,6 +9,8 @@ module CmmContFlowOpt
)
where
+import GhcPrelude hiding (succ, unzip, zip)
+
import Hoopl.Block
import Hoopl.Collections
import Hoopl.Graph
@@ -21,7 +24,6 @@ import Panic
import Util
import Control.Monad
-import Prelude hiding (succ, unzip, zip)
-- Note [What is shortcutting]
@@ -53,7 +55,7 @@ import Prelude hiding (succ, unzip, zip)
--
-- This optimisation does three things:
--
--- - If a block finishes in an unconditonal branch to another block
+-- - If a block finishes in an unconditional branch to another block
-- and that is the only jump to that block we concatenate the
-- destination block at the end of the current one.
--
@@ -171,11 +173,10 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
| otherwise
= (entry_id, shortcut_map)
- -- blocks is a list of blocks in DFS postorder, while blockmap is
- -- a map of blocks. We process each element from blocks and update
- -- blockmap accordingly
- blocks = postorderDfs g
- blockmap = foldr addBlock emptyBody blocks
+ -- blocks are sorted in reverse postorder, but we want to go from the exit
+ -- towards beginning, so we use foldr below.
+ blocks = revPostorder g
+ blockmap = foldl' (flip addBlock) emptyBody blocks
-- Accumulator contains three components:
-- * map of blocks in a graph
@@ -194,7 +195,7 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
maybe_concat :: CmmBlock
-> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
-> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
- maybe_concat block (blocks, shortcut_map, backEdges)
+ maybe_concat block (!blocks, !shortcut_map, !backEdges)
-- If:
-- (1) current block ends with unconditional branch to b' and
-- (2) it has exactly one predecessor (namely, current block)
@@ -252,8 +253,8 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
-- unconditional jump to a block that can be shortcut.
| Nothing <- callContinuation_maybe last
= let oldSuccs = successors last
- newSuccs = successors swapcond_last
- in ( mapInsert bid (blockJoinTail head swapcond_last) blocks
+ newSuccs = successors rewrite_last
+ in ( mapInsert bid (blockJoinTail head rewrite_last) blocks
, shortcut_map
, if oldSuccs == newSuccs
then backEdges
@@ -281,34 +282,58 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
Just b | Just dest <- canShortcut b -> dest
_otherwise -> l
- -- For a conditional, we invert the conditional if that would make it
- -- more likely that the branch-not-taken case becomes a fallthrough.
- -- This helps the native codegen a little bit, and probably has no
- -- effect on LLVM. It's convenient to do it here, where we have the
- -- information about predecessors.
- swapcond_last
+ rewrite_last
+ -- Sometimes we can get rid of the conditional completely.
+ | CmmCondBranch _cond t f _l <- shortcut_last
+ , t == f
+ = CmmBranch t
+
+ -- See Note [Invert Cmm conditionals]
| CmmCondBranch cond t f l <- shortcut_last
- , likelyFalse l
- , numPreds f > 1
- , hasOnePredecessor t
+ , hasOnePredecessor t -- inverting will make t a fallthrough
+ , likelyTrue l || (numPreds f > 1)
, Just cond' <- maybeInvertCmmExpr cond
= CmmCondBranch cond' f t (invertLikeliness l)
| otherwise
= shortcut_last
- likelyFalse (Just False) = True
- likelyFalse Nothing = True
- likelyFalse _ = False
+ likelyTrue (Just True) = True
+ likelyTrue _ = False
- invertLikeliness (Just b) = Just (not b)
- invertLikeliness Nothing = Nothing
+ invertLikeliness :: Maybe Bool -> Maybe Bool
+ invertLikeliness = fmap not
-- Number of predecessors for a block
numPreds bid = mapLookup bid backEdges `orElse` 0
hasOnePredecessor b = numPreds b == 1
+{-
+ Note [Invert Cmm conditionals]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The native code generator always produces jumps to the true branch.
+ Falling through to the false branch is however faster. So we try to
+ arrange for that to happen.
+ This means we invert the condition if:
+ * The likely path will become a fallthrough.
+ * We can't guarantee a fallthrough for the false branch but for the
+ true branch.
+
+ In some cases it's faster to avoid inverting when the false branch is likely.
+ However determining when that is the case is neither easy nor cheap so for
+ now we always invert as this produces smaller binaries and code that is
+ equally fast on average. (On an i7-6700K)
+
+ TODO:
+ There is also the edge case when both branches have multiple predecessors.
+ In this case we could assume that we will end up with a jump for BOTH
+ branches. In this case it might be best to put the likely path in the true
+ branch especially if there are large numbers of predecessors as this saves
+ us the jump thats not taken. However I haven't tested this and as of early
+ 2018 we almost never generate cmm where this would apply.
+-}
+
-- Functions for incrementing and decrementing number of predecessors. If
-- decrementing would set the predecessor count to 0, we remove entry from the
-- map.
@@ -406,14 +431,14 @@ removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
-- Remove any info_tbls for unreachable
keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
- keep_used bs = mapFoldWithKey keep mapEmpty bs
+ keep_used bs = mapFoldlWithKey keep mapEmpty bs
- keep :: Label -> CmmInfoTable -> LabelMap CmmInfoTable -> LabelMap CmmInfoTable
- keep l i env | l `setMember` used_lbls = mapInsert l i env
+ keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable
+ keep env l i | l `setMember` used_lbls = mapInsert l i env
| otherwise = env
used_blocks :: [CmmBlock]
- used_blocks = postorderDfs g
+ used_blocks = revPostorder g
used_lbls :: LabelSet
- used_lbls = foldr (setInsert . entryLabel) setEmpty used_blocks
+ used_lbls = setFromList $ map entryLabel used_blocks