summaryrefslogtreecommitdiff
path: root/compiler/nativeGen/BlockLayout.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/nativeGen/BlockLayout.hs')
-rw-r--r--compiler/nativeGen/BlockLayout.hs57
1 files changed, 54 insertions, 3 deletions
diff --git a/compiler/nativeGen/BlockLayout.hs b/compiler/nativeGen/BlockLayout.hs
index 56e3177dd8..f5faba2d2e 100644
--- a/compiler/nativeGen/BlockLayout.hs
+++ b/compiler/nativeGen/BlockLayout.hs
@@ -239,7 +239,44 @@ import Control.Monad (foldM)
Assuming that Lwork is large the chance that the "call" ends up
in the same cache line is also fairly small.
--}
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~~~ Note [Layout relevant edge weights]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ The input to the chain based code layout algorithm is a CFG
+ with edges annotated with their frequency. The frequency
+ of traversal corresponds quite well to the cost of not placing
+ the connected blocks next to each other.
+
+ However even if having the same frequency certain edges are
+ inherently more or less relevant to code layout.
+
+ In particular:
+
+ * Edges which cross an info table are less relevant than others.
+
+ If we place the blocks across this edge next to each other
+ they are still separated by the info table which negates
+ much of the benefit. It makes it less likely both blocks
+ will share a cache line reducing the benefits from locality.
+ But it also prevents us from eliminating jump instructions.
+
+ * Conditional branches and switches are slightly less relevant.
+
+ We can completely remove unconditional jumps by placing them
+ next to each other. This is not true for conditional branch edges.
+ We apply a small modifier to them to ensure edges for which we can
+ eliminate the overhead completely are considered first. See also #18053.
+
+ * Edges constituted by a call are ignored.
+
+ Considering these hardly helped with performance and ignoring
+ them helps quite a bit to improve compiler performance.
+
+ So we perform a preprocessing step where we apply a multiplicator
+ to these kinds of edges.
+
+ -}
-- | Look at X number of blocks in two chains to determine
@@ -653,17 +690,31 @@ sequenceChain info weights' blocks@((BasicBlock entry _):_) =
directEdges :: [CfgEdge]
directEdges = sortBy (flip compare) $ catMaybes . map relevantWeight $ (infoEdgeList weights)
where
+ -- Apply modifiers to turn edge frequencies into useable weights
+ -- for computing code layout.
+ -- See also Note [Layout relevant edge weights]
relevantWeight :: CfgEdge -> Maybe CfgEdge
relevantWeight edge@(CfgEdge from to edgeInfo)
| (EdgeInfo CmmSource { trans_cmmNode = CmmCall {} } _) <- edgeInfo
- -- Ignore edges across calls
+ -- Ignore edges across calls.
= Nothing
| mapMember to info
, w <- edgeWeight edgeInfo
- -- The payoff is small if we jump over an info table
+ -- The payoff is quite small if we jump over an info table
= Just (CfgEdge from to edgeInfo { edgeWeight = w/8 })
+ | (EdgeInfo CmmSource { trans_cmmNode = exitNode } _) <- edgeInfo
+ , cantEliminate exitNode
+ , w <- edgeWeight edgeInfo
+ -- A small penalty to edge types which
+ -- we can't optimize away by layout.
+ -- w * 0.96875 == w - w/32
+ = Just (CfgEdge from to edgeInfo { edgeWeight = w * 0.96875 })
| otherwise
= Just edge
+ where
+ cantEliminate CmmCondBranch {} = True
+ cantEliminate CmmSwitch {} = True
+ cantEliminate _ = False
blockMap :: LabelMap (GenBasicBlock i)
blockMap