diff options
-rw-r--r-- | compiler/cmm/CmmContFlowOpt.hs | 45 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 3 |
2 files changed, 35 insertions, 13 deletions
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index 5f34a913d6..1efc3e6e54 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -283,34 +283,53 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } Just b | Just dest <- canShortcut b -> dest _otherwise -> l - -- For a conditional, we invert the conditional if that would make it - -- more likely that the branch-not-taken case becomes a fallthrough. - -- This helps the native codegen a little bit, and probably has no - -- effect on LLVM. It's convenient to do it here, where we have the - -- information about predecessors. + -- See Note [Invert Cmm conditionals] swapcond_last | CmmCondBranch cond t f l <- shortcut_last - , likelyFalse l - , numPreds f > 1 - , hasOnePredecessor t + , hasOnePredecessor t -- inverting will make t a fallthrough + , likelyTrue l || (numPreds f > 1) , Just cond' <- maybeInvertCmmExpr cond = CmmCondBranch cond' f t (invertLikeliness l) | otherwise = shortcut_last - likelyFalse (Just False) = True - likelyFalse Nothing = True - likelyFalse _ = False + likelyTrue (Just True) = True + likelyTrue _ = False - invertLikeliness (Just b) = Just (not b) - invertLikeliness Nothing = Nothing + invertLikeliness :: Maybe Bool -> Maybe Bool + invertLikeliness = fmap not -- Number of predecessors for a block numPreds bid = mapLookup bid backEdges `orElse` 0 hasOnePredecessor b = numPreds b == 1 +{- + Note [Invert Cmm conditionals] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + The native code generator always produces jumps to the true branch. + Falling through to the false branch is however faster. So we try to + arrange for that to happen. + This means we invert the condition if: + * The likely path will become a fallthrough. + * We can't guarantee a fallthrough for the false branch but for the + true branch. + + In some cases it's faster to avoid inverting when the false branch is likely. + However determining when that is the case is neither easy nor cheap so for + now we always invert as this produces smaller binaries and code that is + equally fast on average. (On an i7-6700K) + + TODO: + There is also the edge case when both branches have multiple predecessors. + In this case we could assume that we will end up with a jump for BOTH + branches. In this case it might be best to put the likely path in the true + branch especially if there are large numbers of predecessors as this saves + us the jump thats not taken. However I haven't tested this and as of early + 2018 we almost never generate cmm where this would apply. +-} + -- Functions for incrementing and decrementing number of predecessors. If -- decrementing would set the predecessor count to 0, we remove entry from the -- map. diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index eb6af1ff41..09757e769e 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -210,6 +210,9 @@ stmtToInstrs stmt = do -> genCCall dflags is32Bit target result_regs args CmmBranch id -> genBranch id + + --We try to arrange blocks such that the likely branch is the fallthrough + --in CmmContFlowOpt. So we can assume the condition is likely false here. CmmCondBranch arg true false _ -> do b1 <- genCondJump true arg b2 <- genBranch false |