diff options
Diffstat (limited to 'compiler/cmm/CmmContFlowOpt.hs')
-rw-r--r-- | compiler/cmm/CmmContFlowOpt.hs | 83 |
1 files changed, 54 insertions, 29 deletions
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index 219b68e42a..92dd7abba5 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -1,4 +1,5 @@ {-# LANGUAGE GADTs #-} +{-# LANGUAGE BangPatterns #-} {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} module CmmContFlowOpt ( cmmCfgOpts @@ -8,6 +9,8 @@ module CmmContFlowOpt ) where +import GhcPrelude hiding (succ, unzip, zip) + import Hoopl.Block import Hoopl.Collections import Hoopl.Graph @@ -21,7 +24,6 @@ import Panic import Util import Control.Monad -import Prelude hiding (succ, unzip, zip) -- Note [What is shortcutting] @@ -53,7 +55,7 @@ import Prelude hiding (succ, unzip, zip) -- -- This optimisation does three things: -- --- - If a block finishes in an unconditonal branch to another block +-- - If a block finishes in an unconditional branch to another block -- and that is the only jump to that block we concatenate the -- destination block at the end of the current one. -- @@ -171,11 +173,10 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } | otherwise = (entry_id, shortcut_map) - -- blocks is a list of blocks in DFS postorder, while blockmap is - -- a map of blocks. We process each element from blocks and update - -- blockmap accordingly - blocks = postorderDfs g - blockmap = foldr addBlock emptyBody blocks + -- blocks are sorted in reverse postorder, but we want to go from the exit + -- towards beginning, so we use foldr below. + blocks = revPostorder g + blockmap = foldl' (flip addBlock) emptyBody blocks -- Accumulator contains three components: -- * map of blocks in a graph @@ -194,7 +195,7 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } maybe_concat :: CmmBlock -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int) -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int) - maybe_concat block (blocks, shortcut_map, backEdges) + maybe_concat block (!blocks, !shortcut_map, !backEdges) -- If: -- (1) current block ends with unconditional branch to b' and -- (2) it has exactly one predecessor (namely, current block) @@ -252,8 +253,8 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } -- unconditional jump to a block that can be shortcut. | Nothing <- callContinuation_maybe last = let oldSuccs = successors last - newSuccs = successors swapcond_last - in ( mapInsert bid (blockJoinTail head swapcond_last) blocks + newSuccs = successors rewrite_last + in ( mapInsert bid (blockJoinTail head rewrite_last) blocks , shortcut_map , if oldSuccs == newSuccs then backEdges @@ -281,34 +282,58 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } Just b | Just dest <- canShortcut b -> dest _otherwise -> l - -- For a conditional, we invert the conditional if that would make it - -- more likely that the branch-not-taken case becomes a fallthrough. - -- This helps the native codegen a little bit, and probably has no - -- effect on LLVM. It's convenient to do it here, where we have the - -- information about predecessors. - swapcond_last + rewrite_last + -- Sometimes we can get rid of the conditional completely. + | CmmCondBranch _cond t f _l <- shortcut_last + , t == f + = CmmBranch t + + -- See Note [Invert Cmm conditionals] | CmmCondBranch cond t f l <- shortcut_last - , likelyFalse l - , numPreds f > 1 - , hasOnePredecessor t + , hasOnePredecessor t -- inverting will make t a fallthrough + , likelyTrue l || (numPreds f > 1) , Just cond' <- maybeInvertCmmExpr cond = CmmCondBranch cond' f t (invertLikeliness l) | otherwise = shortcut_last - likelyFalse (Just False) = True - likelyFalse Nothing = True - likelyFalse _ = False + likelyTrue (Just True) = True + likelyTrue _ = False - invertLikeliness (Just b) = Just (not b) - invertLikeliness Nothing = Nothing + invertLikeliness :: Maybe Bool -> Maybe Bool + invertLikeliness = fmap not -- Number of predecessors for a block numPreds bid = mapLookup bid backEdges `orElse` 0 hasOnePredecessor b = numPreds b == 1 +{- + Note [Invert Cmm conditionals] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + The native code generator always produces jumps to the true branch. + Falling through to the false branch is however faster. So we try to + arrange for that to happen. + This means we invert the condition if: + * The likely path will become a fallthrough. + * We can't guarantee a fallthrough for the false branch but for the + true branch. + + In some cases it's faster to avoid inverting when the false branch is likely. + However determining when that is the case is neither easy nor cheap so for + now we always invert as this produces smaller binaries and code that is + equally fast on average. (On an i7-6700K) + + TODO: + There is also the edge case when both branches have multiple predecessors. + In this case we could assume that we will end up with a jump for BOTH + branches. In this case it might be best to put the likely path in the true + branch especially if there are large numbers of predecessors as this saves + us the jump thats not taken. However I haven't tested this and as of early + 2018 we almost never generate cmm where this would apply. +-} + -- Functions for incrementing and decrementing number of predecessors. If -- decrementing would set the predecessor count to 0, we remove entry from the -- map. @@ -406,14 +431,14 @@ removeUnreachableBlocksProc proc@(CmmProc info lbl live g) -- Remove any info_tbls for unreachable keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable - keep_used bs = mapFoldWithKey keep mapEmpty bs + keep_used bs = mapFoldlWithKey keep mapEmpty bs - keep :: Label -> CmmInfoTable -> LabelMap CmmInfoTable -> LabelMap CmmInfoTable - keep l i env | l `setMember` used_lbls = mapInsert l i env + keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable + keep env l i | l `setMember` used_lbls = mapInsert l i env | otherwise = env used_blocks :: [CmmBlock] - used_blocks = postorderDfs g + used_blocks = revPostorder g used_lbls :: LabelSet - used_lbls = foldr (setInsert . entryLabel) setEmpty used_blocks + used_lbls = setFromList $ map entryLabel used_blocks |