summaryrefslogtreecommitdiff
path: root/compiler/nativeGen/BlockLayout.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/nativeGen/BlockLayout.hs')
-rw-r--r--compiler/nativeGen/BlockLayout.hs635
1 files changed, 387 insertions, 248 deletions
diff --git a/compiler/nativeGen/BlockLayout.hs b/compiler/nativeGen/BlockLayout.hs
index 7a39071541..56e3177dd8 100644
--- a/compiler/nativeGen/BlockLayout.hs
+++ b/compiler/nativeGen/BlockLayout.hs
@@ -6,6 +6,8 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE FlexibleContexts #-}
module BlockLayout
( sequenceTop )
@@ -22,7 +24,6 @@ import BlockId
import Cmm
import Hoopl.Collections
import Hoopl.Label
-import Hoopl.Block
import DynFlags (gopt, GeneralFlag(..), DynFlags, backendMaintainsCfg)
import UniqFM
@@ -41,11 +42,30 @@ import ListSetOps (removeDups)
import OrdList
import Data.List
import Data.Foldable (toList)
-import Hoopl.Graph
import qualified Data.Set as Set
+import Data.STRef
+import Control.Monad.ST.Strict
+import Control.Monad (foldM)
{-
+ Note [CFG based code layout]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ The major steps in placing blocks are as follow:
+ * Compute a CFG based on the Cmm AST, see getCfgProc.
+ This CFG will have edge weights representing a guess
+ on how important they are.
+ * After we convert Cmm to Asm we run `optimizeCFG` which
+ adds a few more "educated guesses" to the equation.
+ * Then we run loop analysis on the CFG (`loopInfo`) which tells us
+ about loop headers, loop nesting levels and the sort.
+ * Based on the CFG and loop information refine the edge weights
+ in the CFG and normalize them relative to the most often visited
+ node. (See `mkGlobalWeights`)
+ * Feed this CFG into the block layout code (`sequenceTop`) in this
+ module. Which will then produce a code layout based on the input weights.
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~ Note [Chain based CFG serialization]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -60,8 +80,8 @@ import qualified Data.Set as Set
but also how much a block would benefit from being placed sequentially after
it's predecessor.
For example blocks which are preceeded by an info table are more likely to end
- up in a different cache line than their predecessor. So there is less benefit
- in placing them sequentially.
+ up in a different cache line than their predecessor and we can't eliminate the jump
+ so there is less benefit to placing them sequentially.
For example consider this example:
@@ -81,56 +101,83 @@ import qualified Data.Set as Set
Eg for our example we might end up with two chains like:
[A->B->C->X],[D]. Blocks inside chains will always be placed sequentially.
However there is no particular order in which chains are placed since
- (hopefully) the blocks for which sequentially is important have already
+ (hopefully) the blocks for which sequentiality is important have already
been placed in the same chain.
-----------------------------------------------------------------------------
- First try to create a lists of good chains.
+ 1) First try to create a list of good chains.
-----------------------------------------------------------------------------
- We do so by taking a block not yet placed in a chain and
- looking at these cases:
+ Good chains are these which allow us to eliminate jump instructions.
+ Which further eliminate often executed jumps first.
+
+ We do so by:
+
+ *) Ignore edges which represent instructions which can not be replaced
+ by fall through control flow. Primarily calls and edges to blocks which
+ are prefixed by a info table we have to jump across.
+
+ *) Then process remaining edges in order of frequency taken and:
+
+ +) If source and target have not been placed build a new chain from them.
+
+ +) If source and target have been placed, and are ends of differing chains
+ try to merge the two chains.
- *) Check if the best predecessor of the block is at the end of a chain.
- If so add the current block to the end of that chain.
+ +) If one side of the edge is a end/front of a chain, add the other block of
+ to edge to the same chain
- Eg if we look at block C and already have the chain (A -> B)
- then we extend the chain to (A -> B -> C).
+ Eg if we look at edge (B -> C) and already have the chain (A -> B)
+ then we extend the chain to (A -> B -> C).
- Combined with the fact that we process blocks in reverse post order
- this means loop bodies and trivially sequential control flow already
- ends up as a single chain.
+ +) If the edge was used to modify or build a new chain remove the edge from
+ our working list.
- *) Otherwise we create a singleton chain from the block we are looking at.
- Eg if we have from the example above already constructed (A->B)
- and look at D we create the chain (D) resulting in the chains [A->B, D]
+ *) If there any blocks not being placed into a chain after these steps we place
+ them into a chain consisting of only this block.
+
+ Ranking edges by their taken frequency, if
+ two edges compete for fall through on the same target block, the one taken
+ more often will automatically win out. Resulting in fewer instructions being
+ executed.
+
+ Creating singleton chains is required for situations where we have code of the
+ form:
+
+ A: goto B:
+ <infoTable>
+ B: goto C:
+ <infoTable>
+ C: ...
+
+ As the code in block B is only connected to the rest of the program via edges
+ which will be ignored in this step we make sure that B still ends up in a chain
+ this way.
-----------------------------------------------------------------------------
- We then try to fuse chains.
+ 2) We also try to fuse chains.
-----------------------------------------------------------------------------
- There are edge cases which result in two chains being created which trivially
- represent linear control flow. For example we might have the chains
- [(A-B-C),(D-E)] with an cfg triangle:
+ As a result from the above step we still end up with multiple chains which
+ represent sequential control flow chunks. But they are not yet suitable for
+ code layout as we need to place *all* blocks into a single sequence.
- A----->C->D->E
- \->B-/
+ In this step we combine chains result from the above step via these steps:
- We also get three independent chains if two branches end with a jump
- to a common successor.
+ *) Look at the ranked list of *all* edges, including calls/jumps across info tables
+ and the like.
- We take care of these cases by fusing chains which are connected by an
- edge.
+ *) Look at each edge and
- We do so by looking at the list of edges sorted by weight.
- Given the edge (C -> D) we try to find two chains such that:
- * C is at the end of chain one.
- * D is in front of chain two.
- * If two such chains exist we fuse them.
- We then remove the edge and repeat the process for the rest of the edges.
+ +) Given an edge (A -> B) try to find two chains for which
+ * Block A is at the end of one chain
+ * Block B is at the front of the other chain.
+ +) If we find such a chain we "fuse" them into a single chain, remove the
+ edge from working set and continue.
+ +) If we can't find such chains we skip the edge and continue.
-----------------------------------------------------------------------------
- Place indirect successors (neighbours) after each other
+ 3) Place indirect successors (neighbours) after each other
-----------------------------------------------------------------------------
We might have chains [A,B,C,X],[E] in a CFG of the sort:
@@ -141,15 +188,11 @@ import qualified Data.Set as Set
While E does not follow X it's still beneficial to place them near each other.
This can be advantageous if eg C,X,E will end up in the same cache line.
- TODO: If we remove edges as we use them (eg if we build up A->B remove A->B
- from the list) we could save some more work in later phases.
-
-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~ Note [Triangle Control Flow]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Checking if an argument is already evaluating leads to a somewhat
+ Checking if an argument is already evaluated leads to a somewhat
special case which looks like this:
A:
@@ -204,11 +247,6 @@ import qualified Data.Set as Set
neighbourOverlapp :: Int
neighbourOverlapp = 2
--- | Only edges heavier than this are considered
--- for fusing two chains into a single chain.
-fuseEdgeThreshold :: EdgeWeight
-fuseEdgeThreshold = 0
-
-- | Maps blocks near the end of a chain to it's chain AND
-- the other blocks near the end.
-- [A,B,C,D,E] Gives entries like (B -> ([A,B], [A,B,C,D,E]))
@@ -224,40 +262,24 @@ type FrontierMap = LabelMap ([BlockId],BlockChain)
newtype BlockChain
= BlockChain { chainBlocks :: (OrdList BlockId) }
-instance Eq (BlockChain) where
- (BlockChain blks1) == (BlockChain blks2)
- = fromOL blks1 == fromOL blks2
+-- All chains are constructed the same way so comparison
+-- including structure is faster.
+instance Eq BlockChain where
+ BlockChain b1 == BlockChain b2 = strictlyEqOL b1 b2
-- Useful for things like sets and debugging purposes, sorts by blocks
-- in the chain.
instance Ord (BlockChain) where
(BlockChain lbls1) `compare` (BlockChain lbls2)
- = (fromOL lbls1) `compare` (fromOL lbls2)
+ = ASSERT(toList lbls1 /= toList lbls2 || lbls1 `strictlyEqOL` lbls2)
+ strictlyOrdOL lbls1 lbls2
instance Outputable (BlockChain) where
ppr (BlockChain blks) =
parens (text "Chain:" <+> ppr (fromOL $ blks) )
-data WeightedEdge = WeightedEdge !BlockId !BlockId EdgeWeight deriving (Eq)
-
-
--- | Non deterministic! (Uniques) Sorts edges by weight and nodes.
-instance Ord WeightedEdge where
- compare (WeightedEdge from1 to1 weight1)
- (WeightedEdge from2 to2 weight2)
- | weight1 < weight2 || weight1 == weight2 && from1 < from2 ||
- weight1 == weight2 && from1 == from2 && to1 < to2
- = LT
- | from1 == from2 && to1 == to2 && weight1 == weight2
- = EQ
- | otherwise
- = GT
-
-instance Outputable WeightedEdge where
- ppr (WeightedEdge from to info) =
- ppr from <> text "->" <> ppr to <> brackets (ppr info)
-
-type WeightedEdgeList = [WeightedEdge]
+chainFoldl :: (b -> BlockId -> b) -> b -> BlockChain -> b
+chainFoldl f z (BlockChain blocks) = foldl' f z blocks
noDups :: [BlockChain] -> Bool
noDups chains =
@@ -270,19 +292,21 @@ inFront :: BlockId -> BlockChain -> Bool
inFront bid (BlockChain seq)
= headOL seq == bid
-chainMember :: BlockId -> BlockChain -> Bool
-chainMember bid chain
- = elem bid $ fromOL . chainBlocks $ chain
--- = setMember bid . chainMembers $ chain
-
chainSingleton :: BlockId -> BlockChain
chainSingleton lbl
= BlockChain (unitOL lbl)
+chainFromList :: [BlockId] -> BlockChain
+chainFromList = BlockChain . toOL
+
chainSnoc :: BlockChain -> BlockId -> BlockChain
chainSnoc (BlockChain blks) lbl
= BlockChain (blks `snocOL` lbl)
+chainCons :: BlockId -> BlockChain -> BlockChain
+chainCons lbl (BlockChain blks)
+ = BlockChain (lbl `consOL` blks)
+
chainConcat :: BlockChain -> BlockChain -> BlockChain
chainConcat (BlockChain blks1) (BlockChain blks2)
= BlockChain (blks1 `appOL` blks2)
@@ -311,52 +335,14 @@ takeL :: Int -> BlockChain -> [BlockId]
takeL n (BlockChain blks) =
take n . fromOL $ blks
--- | For a given list of chains try to fuse chains with strong
--- edges between them into a single chain.
--- Returns the list of fused chains together with a set of
--- used edges. The set of edges is indirectly encoded in the
--- chains so doesn't need to be considered for later passes.
-fuseChains :: WeightedEdgeList -> LabelMap BlockChain
- -> (LabelMap BlockChain, Set.Set WeightedEdge)
-fuseChains weights chains
- = let fronts = mapFromList $
- map (\chain -> (headOL . chainBlocks $ chain,chain)) $
- mapElems chains :: LabelMap BlockChain
- (chains', used, _) = applyEdges weights chains fronts Set.empty
- in (chains', used)
- where
- applyEdges :: WeightedEdgeList -> LabelMap BlockChain
- -> LabelMap BlockChain -> Set.Set WeightedEdge
- -> (LabelMap BlockChain, Set.Set WeightedEdge, LabelMap BlockChain)
- applyEdges [] chainsEnd chainsFront used
- = (chainsEnd, used, chainsFront)
- applyEdges (edge@(WeightedEdge from to w):edges) chainsEnd chainsFront used
- --Since we order edges descending by weight we can stop here
- | w <= fuseEdgeThreshold
- = ( chainsEnd, used, chainsFront)
- --Fuse the two chains
- | Just c1 <- mapLookup from chainsEnd
- , Just c2 <- mapLookup to chainsFront
- , c1 /= c2
- = let newChain = chainConcat c1 c2
- front = headOL . chainBlocks $ newChain
- end = lastOL . chainBlocks $ newChain
- chainsFront' = mapInsert front newChain $
- mapDelete to chainsFront
- chainsEnd' = mapInsert end newChain $
- mapDelete from chainsEnd
- in applyEdges edges chainsEnd' chainsFront'
- (Set.insert edge used)
- | otherwise
- --Check next edge
- = applyEdges edges chainsEnd chainsFront used
-
+-- Note [Combining neighborhood chains]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- See also Note [Chain based CFG serialization]
-- We have the chains (A-B-C-D) and (E-F) and an Edge C->E.
--
--- While placing the later after the former doesn't result in sequential
--- control flow it is still be benefical since block C and E might end
+-- While placing the latter after the former doesn't result in sequential
+-- control flow it is still benefical. As block C and E might end
-- up in the same cache line.
--
-- So we place these chains next to each other even if we can't fuse them.
@@ -365,7 +351,7 @@ fuseChains weights chains
-- v
-- - -> E -> F ...
--
--- Simple heuristic to chose which chains we want to combine:
+-- A simple heuristic to chose which chains we want to combine:
-- * Process edges in descending priority.
-- * Check if there is a edge near the end of one chain which goes
-- to a block near the start of another edge.
@@ -375,14 +361,22 @@ fuseChains weights chains
-- us to find all edges between two chains, check the distance for all edges,
-- rank them based on the distance and and only then we can select two chains
-- to combine. Which would add a lot of complexity for little gain.
+--
+-- So instead we just rank by the strength of the edge and use the first pair we
+-- find.
-- | For a given list of chains and edges try to combine chains with strong
-- edges between them.
-combineNeighbourhood :: WeightedEdgeList -> [BlockChain]
- -> [BlockChain]
+combineNeighbourhood :: [CfgEdge] -- ^ Edges to consider
+ -> [BlockChain] -- ^ Current chains of blocks
+ -> ([BlockChain], Set.Set (BlockId,BlockId))
+ -- ^ Resulting list of block chains, and a set of edges which
+ -- were used to fuse chains and as such no longer need to be
+ -- considered.
combineNeighbourhood edges chains
= -- pprTraceIt "Neigbours" $
- applyEdges edges endFrontier startFrontier
+ -- pprTrace "combineNeighbours" (ppr edges) $
+ applyEdges edges endFrontier startFrontier (Set.empty)
where
--Build maps from chain ends to chains
endFrontier, startFrontier :: FrontierMap
@@ -396,14 +390,14 @@ combineNeighbourhood edges chains
let front = getFronts chain
entry = (front,chain)
in map (\x -> (x,entry)) front) chains
- applyEdges :: WeightedEdgeList -> FrontierMap -> FrontierMap
- -> [BlockChain]
- applyEdges [] chainEnds _chainFronts =
- ordNub $ map snd $ mapElems chainEnds
- applyEdges ((WeightedEdge from to _w):edges) chainEnds chainFronts
+ applyEdges :: [CfgEdge] -> FrontierMap -> FrontierMap -> Set.Set (BlockId, BlockId)
+ -> ([BlockChain], Set.Set (BlockId,BlockId))
+ applyEdges [] chainEnds _chainFronts combined =
+ (ordNub $ map snd $ mapElems chainEnds, combined)
+ applyEdges ((CfgEdge from to _w):edges) chainEnds chainFronts combined
| Just (c1_e,c1) <- mapLookup from chainEnds
, Just (c2_f,c2) <- mapLookup to chainFronts
- , c1 /= c2 -- Avoid trying to concat a short chain with itself.
+ , c1 /= c2 -- Avoid trying to concat a chain with itself.
= let newChain = chainConcat c1 c2
newChainFrontier = getFronts newChain
newChainEnds = getEnds newChain
@@ -437,165 +431,299 @@ combineNeighbourhood edges chains
-- text "fronts" <+> ppr newFronts $$
-- text "ends" <+> ppr newEnds
-- )
- applyEdges edges newEnds newFronts
+ applyEdges edges newEnds newFronts (Set.insert (from,to) combined)
| otherwise
- = --pprTrace "noNeigbours" (ppr ()) $
- applyEdges edges chainEnds chainFronts
+ = applyEdges edges chainEnds chainFronts combined
where
getFronts chain = takeL neighbourOverlapp chain
getEnds chain = takeR neighbourOverlapp chain
-
-
--- See [Chain based CFG serialization]
-buildChains :: CFG -> [BlockId]
- -> ( LabelMap BlockChain -- Resulting chains.
+-- In the last stop we combine all chains into a single one.
+-- Trying to place chains with strong edges next to each other.
+mergeChains :: [CfgEdge] -> [BlockChain]
+ -> (BlockChain)
+mergeChains edges chains
+ = -- pprTrace "combine" (ppr edges) $
+ runST $ do
+ let addChain m0 chain = do
+ ref <- newSTRef chain
+ return $ chainFoldl (\m' b -> mapInsert b ref m') m0 chain
+ chainMap' <- foldM (\m0 c -> addChain m0 c) mapEmpty chains
+ merge edges chainMap'
+ where
+ -- We keep a map from ALL blocks to their respective chain (sigh)
+ -- This is required since when looking at an edge we need to find
+ -- the associated chains quickly.
+ -- We use a map of STRefs, maintaining a invariant of one STRef per chain.
+ -- When merging chains we can update the
+ -- STRef of one chain once (instead of writing to the map for each block).
+ -- We then overwrite the STRefs for the other chain so there is again only
+ -- a single STRef for the combined chain.
+ -- The difference in terms of allocations saved is ~0.2% with -O so actually
+ -- significant compared to using a regular map.
+
+ merge :: forall s. [CfgEdge] -> LabelMap (STRef s BlockChain) -> ST s BlockChain
+ merge [] chains = do
+ chains' <- ordNub <$> (mapM readSTRef $ mapElems chains) :: ST s [BlockChain]
+ return $ foldl' chainConcat (head chains') (tail chains')
+ merge ((CfgEdge from to _):edges) chains
+ -- | pprTrace "merge" (ppr (from,to) <> ppr chains) False
+ -- = undefined
+ | cFrom == cTo
+ = merge edges chains
+ | otherwise
+ = do
+ chains' <- mergeComb cFrom cTo
+ merge edges chains'
+ where
+ mergeComb :: STRef s BlockChain -> STRef s BlockChain -> ST s (LabelMap (STRef s BlockChain))
+ mergeComb refFrom refTo = do
+ cRight <- readSTRef refTo
+ chain <- pure chainConcat <*> readSTRef refFrom <*> pure cRight
+ writeSTRef refFrom chain
+ return $ chainFoldl (\m b -> mapInsert b refFrom m) chains cRight
+
+ cFrom = expectJust "mergeChains:chainMap:from" $ mapLookup from chains
+ cTo = expectJust "mergeChains:chainMap:to" $ mapLookup to chains
+
+
+-- See Note [Chain based CFG serialization] for the general idea.
+-- This creates and fuses chains at the same time for performance reasons.
+
+-- Try to build chains from a list of edges.
+-- Edges must be sorted **descending** by their priority.
+-- Returns the constructed chains, along with all edges which
+-- are irrelevant past this point, this information doesn't need
+-- to be complete - it's only used to speed up the process.
+-- An Edge is irrelevant if the ends are part of the same chain.
+-- We say these edges are already linked
+buildChains :: [CfgEdge] -> [BlockId]
+ -> ( LabelMap BlockChain -- Resulting chains, indexd by end if chain.
, Set.Set (BlockId, BlockId)) --List of fused edges.
-buildChains succWeights blocks
- = let (_, fusedEdges, chains) = buildNext setEmpty mapEmpty blocks Set.empty
- in (chains, fusedEdges)
+buildChains edges blocks
+ = runST $ buildNext setEmpty mapEmpty mapEmpty edges Set.empty
where
- -- We keep a map from the last block in a chain to the chain itself.
- -- This we we can easily check if an block should be appened to an
+ -- buildNext builds up chains from edges one at a time.
+
+ -- We keep a map from the ends of chains to the chains.
+ -- This we we can easily check if an block should be appended to an
-- existing chain!
- buildNext :: LabelSet
- -> LabelMap BlockChain -- Map from last element to chain.
- -> [BlockId] -- Blocks to place
- -> Set.Set (BlockId, BlockId)
- -> ( [BlockChain] -- Placed Blocks
- , Set.Set (BlockId, BlockId) --List of fused edges
- , LabelMap BlockChain
- )
- buildNext _placed chains [] linked =
- ([], linked, chains)
- buildNext placed chains (block:todo) linked
- | setMember block placed
- = buildNext placed chains todo linked
+ -- We store them using STRefs so we don't have to rebuild the spine of both
+ -- maps every time we update a chain.
+ buildNext :: forall s. LabelSet
+ -> LabelMap (STRef s BlockChain) -- Map from end of chain to chain.
+ -> LabelMap (STRef s BlockChain) -- Map from start of chain to chain.
+ -> [CfgEdge] -- Edges to check - ordered by decreasing weight
+ -> Set.Set (BlockId, BlockId) -- Used edges
+ -> ST s ( LabelMap BlockChain -- Chains by end
+ , Set.Set (BlockId, BlockId) --List of fused edges
+ )
+ buildNext placed _chainStarts chainEnds [] linked = do
+ ends' <- sequence $ mapMap readSTRef chainEnds :: ST s (LabelMap BlockChain)
+ -- Any remaining blocks have to be made to singleton chains.
+ -- They might be combined with other chains later on outside this function.
+ let unplaced = filter (\x -> not (setMember x placed)) blocks
+ singletons = map (\x -> (x,chainSingleton x)) unplaced :: [(BlockId,BlockChain)]
+ return (foldl' (\m (k,v) -> mapInsert k v m) ends' singletons , linked)
+ buildNext placed chainStarts chainEnds (edge:todo) linked
+ | from == to
+ -- We skip self edges
+ = buildNext placed chainStarts chainEnds todo (Set.insert (from,to) linked)
+ | not (alreadyPlaced from) &&
+ not (alreadyPlaced to)
+ = do
+ --pprTraceM "Edge-Chain:" (ppr edge)
+ chain' <- newSTRef $ chainFromList [from,to]
+ buildNext
+ (setInsert to (setInsert from placed))
+ (mapInsert from chain' chainStarts)
+ (mapInsert to chain' chainEnds)
+ todo
+ (Set.insert (from,to) linked)
+
+ | (alreadyPlaced from) &&
+ (alreadyPlaced to)
+ , Just predChain <- mapLookup from chainEnds
+ , Just succChain <- mapLookup to chainStarts
+ , predChain /= succChain -- Otherwise we try to create a cycle.
+ = do
+ -- pprTraceM "Fusing edge" (ppr edge)
+ fuseChain predChain succChain
+
+ | (alreadyPlaced from) &&
+ (alreadyPlaced to)
+ = --pprTraceM "Skipping:" (ppr edge) >>
+ buildNext placed chainStarts chainEnds todo linked
+
| otherwise
- = buildNext placed' chains' todo linked'
+ = do -- pprTraceM "Finding chain for:" (ppr edge $$
+ -- text "placed" <+> ppr placed)
+ findChain
where
- placed' = (foldl' (flip setInsert) placed placedBlocks)
- linked' = Set.union linked linkedEdges
- (placedBlocks, chains', linkedEdges) = findChain block
-
- --Add the block to a existing or new chain
- --Returns placed blocks, list of resulting chains
- --and fused edges
- findChain :: BlockId
- -> ([BlockId],LabelMap BlockChain, Set.Set (BlockId, BlockId))
- findChain block
- -- B) place block at end of existing chain if
- -- there is no better block to append.
- | (pred:_) <- preds
- , alreadyPlaced pred
- , Just predChain <- mapLookup pred chains
- , (best:_) <- filter (not . alreadyPlaced) $ getSuccs pred
- , best == lbl
- = --pprTrace "B.2)" (ppr (pred,lbl)) $
- let newChain = chainSnoc predChain block
- chainMap = mapInsert lbl newChain $ mapDelete pred chains
- in ( [lbl]
- , chainMap
- , Set.singleton (pred,lbl) )
-
+ from = edgeFrom edge
+ to = edgeTo edge
+ alreadyPlaced blkId = (setMember blkId placed)
+
+ -- Combine two chains into a single one.
+ fuseChain :: STRef s BlockChain -> STRef s BlockChain
+ -> ST s ( LabelMap BlockChain -- Chains by end
+ , Set.Set (BlockId, BlockId) --List of fused edges
+ )
+ fuseChain fromRef toRef = do
+ fromChain <- readSTRef fromRef
+ toChain <- readSTRef toRef
+ let newChain = chainConcat fromChain toChain
+ ref <- newSTRef newChain
+ let start = head $ takeL 1 newChain
+ let end = head $ takeR 1 newChain
+ -- chains <- sequence $ mapMap readSTRef chainStarts
+ -- pprTraceM "pre-fuse chains:" $ ppr chains
+ buildNext
+ placed
+ (mapInsert start ref $ mapDelete to $ chainStarts)
+ (mapInsert end ref $ mapDelete from $ chainEnds)
+ todo
+ (Set.insert (from,to) linked)
+
+
+ --Add the block to a existing chain or creates a new chain
+ findChain :: ST s ( LabelMap BlockChain -- Chains by end
+ , Set.Set (BlockId, BlockId) --List of fused edges
+ )
+ findChain
+ -- We can attach the block to the end of a chain
+ | alreadyPlaced from
+ , Just predChain <- mapLookup from chainEnds
+ = do
+ chain <- readSTRef predChain
+ let newChain = chainSnoc chain to
+ writeSTRef predChain newChain
+ let chainEnds' = mapInsert to predChain $ mapDelete from chainEnds
+ -- chains <- sequence $ mapMap readSTRef chainStarts
+ -- pprTraceM "from chains:" $ ppr chains
+ buildNext (setInsert to placed) chainStarts chainEnds' todo (Set.insert (from,to) linked)
+ -- We can attack it to the front of a chain
+ | alreadyPlaced to
+ , Just succChain <- mapLookup to chainStarts
+ = do
+ chain <- readSTRef succChain
+ let newChain = from `chainCons` chain
+ writeSTRef succChain newChain
+ let chainStarts' = mapInsert from succChain $ mapDelete to chainStarts
+ -- chains <- sequence $ mapMap readSTRef chainStarts'
+ -- pprTraceM "to chains:" $ ppr chains
+ buildNext (setInsert from placed) chainStarts' chainEnds todo (Set.insert (from,to) linked)
+ -- The placed end of the edge is part of a chain already and not an end.
| otherwise
- = --pprTrace "single" (ppr lbl)
- ( [lbl]
- , mapInsert lbl (chainSingleton lbl) chains
- , Set.empty)
+ = do
+ let block = if alreadyPlaced to then from else to
+ --pprTraceM "Singleton" $ ppr block
+ let newChain = chainSingleton block
+ ref <- newSTRef newChain
+ buildNext (setInsert block placed) (mapInsert block ref chainStarts)
+ (mapInsert block ref chainEnds) todo (linked)
where
alreadyPlaced blkId = (setMember blkId placed)
- lbl = block
- getSuccs = map fst . getSuccEdgesSorted succWeights
- preds = map fst $ getSuccEdgesSorted predWeights lbl
- --For efficiency we also create the map to look up predecessors here
- predWeights = reverseEdges succWeights
-
-
--- We make the CFG a Hoopl Graph, so we can reuse revPostOrder.
-newtype BlockNode (e :: Extensibility) (x :: Extensibility) = BN (BlockId,[BlockId])
-instance NonLocal (BlockNode) where
- entryLabel (BN (lbl,_)) = lbl
- successors (BN (_,succs)) = succs
-
-fromNode :: BlockNode C C -> BlockId
-fromNode (BN x) = fst x
-
-sequenceChain :: forall a i. (Instruction i, Outputable i) => LabelMap a -> CFG
- -> [GenBasicBlock i] -> [GenBasicBlock i]
+-- | Place basic blocks based on the given CFG.
+-- See Note [Chain based CFG serialization]
+sequenceChain :: forall a i. (Instruction i, Outputable i)
+ => LabelMap a -- ^ Keys indicate an info table on the block.
+ -> CFG -- ^ Control flow graph and some meta data.
+ -> [GenBasicBlock i] -- ^ List of basic blocks to be placed.
+ -> [GenBasicBlock i] -- ^ Blocks placed in sequence.
sequenceChain _info _weights [] = []
sequenceChain _info _weights [x] = [x]
sequenceChain info weights' blocks@((BasicBlock entry _):_) =
- --Optimization, delete edges of weight <= 0.
- --This significantly improves performance whenever
- --we iterate over all edges, which is a few times!
let weights :: CFG
- weights
- = filterEdges (\_f _t edgeInfo -> edgeWeight edgeInfo > 0) weights'
+ weights = --pprTrace "cfg'" (pprEdgeWeights cfg')
+ cfg'
+ where
+ (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-} mkGlobalWeights entry weights'
+ cfg' = {-# SCC rewriteEdges #-}
+ mapFoldlWithKey
+ (\cfg from m ->
+ mapFoldlWithKey
+ (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to )
+ cfg m )
+ weights'
+ globalEdgeWeights
+
+ directEdges :: [CfgEdge]
+ directEdges = sortBy (flip compare) $ catMaybes . map relevantWeight $ (infoEdgeList weights)
+ where
+ relevantWeight :: CfgEdge -> Maybe CfgEdge
+ relevantWeight edge@(CfgEdge from to edgeInfo)
+ | (EdgeInfo CmmSource { trans_cmmNode = CmmCall {} } _) <- edgeInfo
+ -- Ignore edges across calls
+ = Nothing
+ | mapMember to info
+ , w <- edgeWeight edgeInfo
+ -- The payoff is small if we jump over an info table
+ = Just (CfgEdge from to edgeInfo { edgeWeight = w/8 })
+ | otherwise
+ = Just edge
+
blockMap :: LabelMap (GenBasicBlock i)
blockMap
= foldl' (\m blk@(BasicBlock lbl _ins) ->
mapInsert lbl blk m)
mapEmpty blocks
- toNode :: BlockId -> BlockNode C C
- toNode bid =
- -- sorted such that heavier successors come first.
- BN (bid,map fst . getSuccEdgesSorted weights' $ bid)
-
- orderedBlocks :: [BlockId]
- orderedBlocks
- = map fromNode $
- revPostorderFrom (fmap (toNode . blockId) blockMap) entry
-
(builtChains, builtEdges)
= {-# SCC "buildChains" #-}
--pprTraceIt "generatedChains" $
- --pprTrace "orderedBlocks" (ppr orderedBlocks) $
- buildChains weights orderedBlocks
+ --pprTrace "blocks" (ppr (mapKeys blockMap)) $
+ buildChains directEdges (mapKeys blockMap)
- rankedEdges :: WeightedEdgeList
- -- Sort edges descending, remove fused eges
+ rankedEdges :: [CfgEdge]
+ -- Sort descending by weight, remove fused edges
rankedEdges =
- map (\(from, to, weight) -> WeightedEdge from to weight) .
- filter (\(from, to, _)
- -> not (Set.member (from,to) builtEdges)) .
- sortWith (\(_,_,w) -> - w) $ weightedEdgeList weights
+ filter (\edge -> not (Set.member (edgeFrom edge,edgeTo edge) builtEdges)) $
+ directEdges
- (fusedChains, fusedEdges)
+ (neighbourChains, combined)
= ASSERT(noDups $ mapElems builtChains)
- {-# SCC "fuseChains" #-}
- --(pprTrace "RankedEdges" $ ppr rankedEdges) $
- --pprTraceIt "FusedChains" $
- fuseChains rankedEdges builtChains
-
- rankedEdges' =
- filter (\edge -> not $ Set.member edge fusedEdges) $ rankedEdges
-
- neighbourChains
- = ASSERT(noDups $ mapElems fusedChains)
{-# SCC "groupNeighbourChains" #-}
- --pprTraceIt "ResultChains" $
- combineNeighbourhood rankedEdges' (mapElems fusedChains)
+ -- pprTraceIt "NeighbourChains" $
+ combineNeighbourhood rankedEdges (mapElems builtChains)
+
+
+ allEdges :: [CfgEdge]
+ allEdges = {-# SCC allEdges #-}
+ sortOn (relevantWeight) $ filter (not . deadEdge) $ (infoEdgeList weights)
+ where
+ deadEdge :: CfgEdge -> Bool
+ deadEdge (CfgEdge from to _) = let e = (from,to) in Set.member e combined || Set.member e builtEdges
+ relevantWeight :: CfgEdge -> EdgeWeight
+ relevantWeight (CfgEdge _ _ edgeInfo)
+ | EdgeInfo (CmmSource { trans_cmmNode = CmmCall {}}) _ <- edgeInfo
+ -- Penalize edges across calls
+ = weight/(64.0)
+ | otherwise
+ = weight
+ where
+ -- negate to sort descending
+ weight = negate (edgeWeight edgeInfo)
+
+ masterChain =
+ {-# SCC "mergeChains" #-}
+ -- pprTraceIt "MergedChains" $
+ mergeChains allEdges neighbourChains
--Make sure the first block stays first
- ([entryChain],chains')
- = ASSERT(noDups $ neighbourChains)
- partition (chainMember entry) neighbourChains
- (entryChain':entryRest)
- | inFront entry entryChain = [entryChain]
- | (rest,entry) <- breakChainAt entry entryChain
+ prepedChains
+ | inFront entry masterChain
+ = [masterChain]
+ | (rest,entry) <- breakChainAt entry masterChain
= [entry,rest]
| otherwise = pprPanic "Entry point eliminated" $
- ppr ([entryChain],chains')
+ ppr masterChain
- prepedChains
- = entryChain':(entryRest++chains') :: [BlockChain]
blockList
- -- = (concatMap chainToBlocks prepedChains)
- = (concatMap fromOL $ map chainBlocks prepedChains)
+ = ASSERT(noDups [masterChain])
+ (concatMap fromOL $ map chainBlocks prepedChains)
--chainPlaced = setFromList $ map blockId blockList :: LabelSet
chainPlaced = setFromList $ blockList :: LabelSet
@@ -605,14 +733,22 @@ sequenceChain info weights' blocks@((BasicBlock entry _):_) =
in filter (\block -> not (isPlaced block)) blocks
placedBlocks =
+ -- We want debug builds to catch this as it's a good indicator for
+ -- issues with CFG invariants. But we don't want to blow up production
+ -- builds if something slips through.
+ ASSERT(null unplaced)
--pprTraceIt "placedBlocks" $
- blockList ++ unplaced
+ -- ++ [] is stil kinda expensive
+ if null unplaced then blockList else blockList ++ unplaced
getBlock bid = expectJust "Block placment" $ mapLookup bid blockMap
in
--Assert we placed all blocks given as input
ASSERT(all (\bid -> mapMember bid blockMap) placedBlocks)
dropJumps info $ map getBlock placedBlocks
+{-# SCC dropJumps #-}
+-- | Remove redundant jumps between blocks when we can rely on
+-- fall through.
dropJumps :: forall a i. Instruction i => LabelMap a -> [GenBasicBlock i]
-> [GenBasicBlock i]
dropJumps _ [] = []
@@ -641,7 +777,8 @@ sequenceTop
=> DynFlags -- Determine which layout algo to use
-> NcgImpl statics instr jumpDest
-> Maybe CFG -- ^ CFG if we have one.
- -> NatCmmDecl statics instr -> NatCmmDecl statics instr
+ -> NatCmmDecl statics instr -- ^ Function to serialize
+ -> NatCmmDecl statics instr
sequenceTop _ _ _ top@(CmmData _ _) = top
sequenceTop dflags ncgImpl edgeWeights
@@ -650,11 +787,13 @@ sequenceTop dflags ncgImpl edgeWeights
--Use chain based algorithm
, Just cfg <- edgeWeights
= CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $
+ {-# SCC layoutBlocks #-}
sequenceChain info cfg blocks )
| otherwise
--Use old algorithm
= let cfg = if dontUseCfg then Nothing else edgeWeights
in CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $
+ {-# SCC layoutBlocks #-}
sequenceBlocks cfg info blocks)
where
dontUseCfg = gopt Opt_WeightlessBlocklayout dflags ||