summaryrefslogtreecommitdiff
path: root/compiler/nativeGen/BlockLayout.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/nativeGen/BlockLayout.hs')
-rw-r--r--compiler/nativeGen/BlockLayout.hs819
1 files changed, 819 insertions, 0 deletions
diff --git a/compiler/nativeGen/BlockLayout.hs b/compiler/nativeGen/BlockLayout.hs
new file mode 100644
index 0000000000..72aea5bf10
--- /dev/null
+++ b/compiler/nativeGen/BlockLayout.hs
@@ -0,0 +1,819 @@
+--
+-- Copyright (c) 2018 Andreas Klebinger
+--
+
+{-# LANGUAGE TypeFamilies, ScopedTypeVariables, CPP #-}
+
+{-# OPTIONS_GHC -fprof-auto #-}
+--{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -ddump-cmm #-}
+
+module BlockLayout
+ ( sequenceTop )
+where
+
+#include "HsVersions.h"
+import GhcPrelude
+
+import Instruction
+import NCGMonad
+import CFG
+
+import BlockId
+import Cmm
+import Hoopl.Collections
+import Hoopl.Label
+import Hoopl.Block
+
+import DynFlags (gopt, GeneralFlag(..), DynFlags, backendMaintainsCfg)
+import UniqFM
+import Util
+import Unique
+
+import Digraph
+import Outputable
+import Maybes
+
+-- DEBUGGING ONLY
+--import Debug
+--import Debug.Trace
+import ListSetOps (removeDups)
+import PprCmm ()
+
+import OrdList
+import Data.List
+import Data.Foldable (toList)
+import Hoopl.Graph
+
+import qualified Data.Set as Set
+import Control.Applicative
+
+{-
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~~~ Note [Chain based CFG serialization]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ For additional information also look at
+ https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/CodeLayout
+
+ We have a CFG with edge weights based on which we try to place blocks next to
+ each other.
+
+ Edge weights not only represent likelyhood of control transfer between blocks
+ but also how much a block would benefit from being placed sequentially after
+ it's predecessor.
+ For example blocks which are preceeded by an info table are more likely to end
+ up in a different cache line than their predecessor. So there is less benefit
+ in placing them sequentially.
+
+ For example consider this example:
+
+ A: ...
+ jmp cond D (weak successor)
+ jmp B
+ B: ...
+ jmp C
+ C: ...
+ jmp X
+ D: ...
+ jmp B (weak successor)
+
+ We determine a block layout by building up chunks (calling them chains) of
+ possible control flows for which blocks will be placed sequentially.
+
+ Eg for our example we might end up with two chains like:
+ [A->B->C->X],[D]. Blocks inside chains will always be placed sequentially.
+ However there is no particular order in which chains are placed since
+ (hopefully) the blocks for which sequentially is important have already
+ been placed in the same chain.
+
+ -----------------------------------------------------------------------------
+ First try to create a lists of good chains.
+ -----------------------------------------------------------------------------
+
+ We do so by taking a block not yet placed in a chain and
+ looking at these cases:
+
+ *) Check if the best predecessor of the block is at the end of a chain.
+ If so add the current block to the end of that chain.
+
+ Eg if we look at block C and already have the chain (A -> B)
+ then we extend the chain to (A -> B -> C).
+
+ Combined with the fact that we process blocks in reverse post order
+ this means loop bodies and trivially sequential control flow already
+ ends up as a single chain.
+
+ *) Otherwise we create a singleton chain from the block we are looking at.
+ Eg if we have from the example above already constructed (A->B)
+ and look at D we create the chain (D) resulting in the chains [A->B, D]
+
+ -----------------------------------------------------------------------------
+ We then try to fuse chains.
+ -----------------------------------------------------------------------------
+
+ There are edge cases which result in two chains being created which trivially
+ represent linear control flow. For example we might have the chains
+ [(A-B-C),(D-E)] with an cfg triangle:
+
+ A----->C->D->E
+ \->B-/
+
+ We also get three independent chains if two branches end with a jump
+ to a common successor.
+
+ We take care of these cases by fusing chains which are connected by an
+ edge.
+
+ We do so by looking at the list of edges sorted by weight.
+ Given the edge (C -> D) we try to find two chains such that:
+ * C is at the end of chain one.
+ * D is in front of chain two.
+ * If two such chains exist we fuse them.
+ We then remove the edge and repeat the process for the rest of the edges.
+
+ -----------------------------------------------------------------------------
+ Place indirect successors (neighbours) after each other
+ -----------------------------------------------------------------------------
+
+ We might have chains [A,B,C,X],[E] in a CFG of the sort:
+
+ A ---> B ---> C --------> X(exit)
+ \- ->E- -/
+
+ While E does not follow X it's still beneficial to place them near each other.
+ This can be advantageous if eg C,X,E will end up in the same cache line.
+
+ TODO: If we remove edges as we use them (eg if we build up A->B remove A->B
+ from the list) we could save some more work in later phases.
+
+
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~~~ Note [Triangle Control Flow]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Checking if an argument is already evaluating leads to a somewhat
+ special case which looks like this:
+
+ A:
+ if (R1 & 7 != 0) goto Leval; else goto Lwork;
+ Leval: // global
+ call (I64[R1])(R1) returns to Lwork, args: 8, res: 8, upd: 8;
+ Lwork: // global
+ ...
+
+ A
+ |\
+ | Leval
+ |/ - (This edge can be missing because of optimizations)
+ Lwork
+
+ Once we hit the metal the call instruction is just 2-3 bytes large
+ depending on the register used. So we lay out the assembly like this:
+
+ movq %rbx,%rax
+ andl $7,%eax
+ cmpq $1,%rax
+ jne Lwork
+ Leval:
+ jmp *(%rbx) # encoded in 2-3 bytes.
+ <info table>
+ Lwork:
+ ...
+
+ We could explicitly check for this control flow pattern.
+
+ This is advantageous because:
+ * It's optimal if the argument isn't evaluated.
+ * If it's evaluated we only have the extra cost of jumping over
+ the 2-3 bytes for the call.
+ * Guarantees the smaller encoding for the conditional jump.
+
+ However given that Lwork usually has an info table we
+ penalize this edge. So Leval should get placed first
+ either way and things work out for the best.
+
+ Optimizing for the evaluated case instead would penalize
+ the other code path. It adds an jump as we can't fall through
+ to Lwork because of the info table.
+ Assuming that Lwork is large the chance that the "call" ends up
+ in the same cache line is also fairly small.
+
+-}
+
+
+-- | Look at X number of blocks in two chains to determine
+-- if they are "neighbours".
+neighbourOverlapp :: Int
+neighbourOverlapp = 2
+
+-- | Only edges heavier than this are considered
+-- for fusing two chains into a single chain.
+fuseEdgeThreshold :: EdgeWeight
+fuseEdgeThreshold = 0
+
+
+-- | A non empty ordered sequence of basic blocks.
+-- It is suitable for serialization in this order.
+data BlockChain
+ = BlockChain
+ { chainMembers :: !LabelSet
+ , chainBlocks :: !BlockSequence
+ }
+
+instance Eq (BlockChain) where
+ (BlockChain s1 _) == (BlockChain s2 _)
+ = s1 == s2
+
+instance Outputable (BlockChain) where
+ ppr (BlockChain _ blks) =
+ parens (text "Chain:" <+> ppr (seqToList $ blks) )
+
+data WeightedEdge = WeightedEdge !BlockId !BlockId EdgeWeight deriving (Eq)
+
+-- Useful for things like sets and debugging purposes, sorts by blocks
+-- in the chain.
+instance Ord (BlockChain) where
+ (BlockChain lbls1 _) `compare` (BlockChain lbls2 _)
+ = lbls1 `compare` lbls2
+
+-- | Non deterministic! (Uniques) Sorts edges by weight and nodes.
+instance Ord WeightedEdge where
+ compare (WeightedEdge from1 to1 weight1)
+ (WeightedEdge from2 to2 weight2)
+ | weight1 < weight2 || weight1 == weight2 && from1 < from2 ||
+ weight1 == weight2 && from1 == from2 && to1 < to2
+ = LT
+ | from1 == from2 && to1 == to2 && weight1 == weight2
+ = EQ
+ | otherwise
+ = GT
+
+instance Outputable WeightedEdge where
+ ppr (WeightedEdge from to info) =
+ ppr from <> text "->" <> ppr to <> brackets (ppr info)
+
+type WeightedEdgeList = [WeightedEdge]
+
+noDups :: [BlockChain] -> Bool
+noDups chains =
+ let chainBlocks = concatMap chainToBlocks chains :: [BlockId]
+ (_blocks, dups) = removeDups compare chainBlocks
+ in if null dups then True
+ else pprTrace "Duplicates:" (ppr (map toList dups) $$ text "chains" <+> ppr chains ) False
+
+inFront :: BlockId -> BlockChain -> Bool
+inFront bid (BlockChain _ seq)
+ = seqFront seq == bid
+
+chainMember :: BlockId -> BlockChain -> Bool
+chainMember bid chain
+ = setMember bid . chainMembers $ chain
+
+chainSingleton :: BlockId -> BlockChain
+chainSingleton lbl
+ = BlockChain (setSingleton lbl) (Singleton lbl)
+
+chainSnoc :: BlockChain -> BlockId -> BlockChain
+chainSnoc (BlockChain lbls blks) lbl
+ = BlockChain (setInsert lbl lbls) (seqSnoc blks lbl)
+
+chainConcat :: BlockChain -> BlockChain -> BlockChain
+chainConcat (BlockChain lbls1 blks1) (BlockChain lbls2 blks2)
+ = BlockChain (setUnion lbls1 lbls2) (blks1 `seqConcat` blks2)
+
+chainToBlocks :: BlockChain -> [BlockId]
+chainToBlocks (BlockChain _ blks) = seqToList blks
+
+-- | Given the Chain A -> B -> C -> D and we break at C
+-- we get the two Chains (A -> B, C -> D) as result.
+breakChainAt :: BlockId -> BlockChain
+ -> (BlockChain,BlockChain)
+breakChainAt bid (BlockChain lbls blks)
+ | not (setMember bid lbls)
+ = panic "Block not in chain"
+ | otherwise
+ = let (lblks, rblks) = break (\lbl -> lbl == bid)
+ (seqToList blks)
+ --TODO: Remove old
+ --lblSet :: [GenBasicBlock i] -> BlockChain
+ --lblSet blks =
+ -- setFromList
+ --(map (\(BasicBlock lbl _) -> lbl) $ toList blks)
+ in
+ (BlockChain (setFromList lblks) (seqFromBids lblks),
+ BlockChain (setFromList rblks) (seqFromBids rblks))
+
+takeR :: Int -> BlockChain -> [BlockId]
+takeR n (BlockChain _ blks) =
+ take n . seqToRList $ blks
+
+
+takeL :: Int -> BlockChain -> [BlockId]
+takeL n (BlockChain _ blks) = --error "TODO: takeLn"
+ take n . seqToList $ blks
+
+-- | For a given list of chains try to fuse chains with strong
+-- edges between them into a single chain.
+-- Returns the list of fused chains together with a set of
+-- used edges. The set of edges is indirectly encoded in the
+-- chains so doesn't need to be considered for later passes.
+fuseChains :: WeightedEdgeList -> LabelMap BlockChain
+ -> (LabelMap BlockChain, Set.Set WeightedEdge)
+fuseChains weights chains
+ = let fronts = mapFromList $
+ map (\chain -> (head $ takeL 1 chain,chain)) $
+ mapElems chains :: LabelMap BlockChain
+ (chains', used, _) = applyEdges weights chains fronts Set.empty
+ in (chains', used)
+ where
+ applyEdges :: WeightedEdgeList -> LabelMap BlockChain
+ -> LabelMap BlockChain -> Set.Set WeightedEdge
+ -> (LabelMap BlockChain, Set.Set WeightedEdge, LabelMap BlockChain)
+ applyEdges [] chainsEnd chainsFront used
+ = (chainsEnd, used, chainsFront)
+ applyEdges (edge@(WeightedEdge from to w):edges) chainsEnd chainsFront used
+ --Since we order edges descending by weight we can stop here
+ | w <= fuseEdgeThreshold
+ = ( chainsEnd, used, chainsFront)
+ --Fuse the two chains
+ | Just c1 <- mapLookup from chainsEnd
+ , Just c2 <- mapLookup to chainsFront
+ , c1 /= c2
+ = let newChain = chainConcat c1 c2
+ front = head $ takeL 1 newChain
+ end = head $ takeR 1 newChain
+ chainsFront' = mapInsert front newChain $
+ mapDelete to chainsFront
+ chainsEnd' = mapInsert end newChain $
+ mapDelete from chainsEnd
+ in applyEdges edges chainsEnd' chainsFront'
+ (Set.insert edge used)
+ | otherwise
+ --Check next edge
+ = applyEdges edges chainsEnd chainsFront used
+
+
+-- See also Note [Chain based CFG serialization]
+-- We have the chains (A-B-C-D) and (E-F) and an Edge C->E.
+--
+-- While placing the later after the former doesn't result in sequential
+-- control flow it is still be benefical since block C and E might end
+-- up in the same cache line.
+--
+-- So we place these chains next to each other even if we can't fuse them.
+--
+-- A -> B -> C -> D
+-- v
+-- - -> E -> F ...
+--
+-- Simple heuristic to chose which chains we want to combine:
+-- * Process edges in descending priority.
+-- * Check if there is a edge near the end of one chain which goes
+-- to a block near the start of another edge.
+--
+-- While we could take into account the space between the two blocks which
+-- share an edge this blows up compile times quite a bit. It requires
+-- us to find all edges between two chains, check the distance for all edges,
+-- rank them based on the distance and and only then we can select two chains
+-- to combine. Which would add a lot of complexity for little gain.
+
+-- | For a given list of chains and edges try to combine chains with strong
+-- edges between them.
+combineNeighbourhood :: WeightedEdgeList -> [BlockChain]
+ -> [BlockChain]
+combineNeighbourhood edges chains
+ = -- pprTraceIt "Neigbours" $
+ applyEdges edges endFrontier startFrontier
+ where
+ --Build maps from chain ends to chains
+ endFrontier, startFrontier :: FrontierMap
+ endFrontier =
+ mapFromList $ concatMap (\chain ->
+ let ends = getEnds chain
+ entry = (ends,chain)
+ in map (\x -> (x,entry)) ends ) chains
+ startFrontier =
+ mapFromList $ concatMap (\chain ->
+ let front = getFronts chain
+ entry = (front,chain)
+ in map (\x -> (x,entry)) front) chains
+ applyEdges :: WeightedEdgeList -> FrontierMap -> FrontierMap
+ -> [BlockChain]
+ applyEdges [] chainEnds _chainFronts =
+ ordNub $ map snd $ mapElems chainEnds
+ applyEdges ((WeightedEdge from to _w):edges) chainEnds chainFronts
+ | Just (c1_e,c1) <- mapLookup from chainEnds
+ , Just (c2_f,c2) <- mapLookup to chainFronts
+ , c1 /= c2 -- Avoid trying to concat a short chain with itself.
+ = let newChain = chainConcat c1 c2
+ newChainFrontier = getFronts newChain
+ newChainEnds = getEnds newChain
+ newFronts :: FrontierMap
+ newFronts =
+ let withoutOld =
+ foldl' (\m b -> mapDelete b m :: FrontierMap) chainFronts (c2_f ++ getFronts c1)
+ entry =
+ (newChainFrontier,newChain) --let bound to ensure sharing
+ in foldl' (\m x -> mapInsert x entry m)
+ withoutOld newChainFrontier
+
+ newEnds =
+ let withoutOld = foldl' (\m b -> mapDelete b m) chainEnds (c1_e ++ getEnds c2)
+ entry = (newChainEnds,newChain) --let bound to ensure sharing
+ in foldl' (\m x -> mapInsert x entry m)
+ withoutOld newChainEnds
+ in
+ -- pprTrace "ApplyEdges"
+ -- (text "before" $$
+ -- text "fronts" <+> ppr chainFronts $$
+ -- text "ends" <+> ppr chainEnds $$
+
+ -- text "various" $$
+ -- text "newChain" <+> ppr newChain $$
+ -- text "newChainFrontier" <+> ppr newChainFrontier $$
+ -- text "newChainEnds" <+> ppr newChainEnds $$
+ -- text "drop" <+> ppr ((c2_f ++ getFronts c1) ++ (c1_e ++ getEnds c2)) $$
+
+ -- text "after" $$
+ -- text "fronts" <+> ppr newFronts $$
+ -- text "ends" <+> ppr newEnds
+ -- )
+ applyEdges edges newEnds newFronts
+ | otherwise
+ = --pprTrace "noNeigbours" (ppr ()) $
+ applyEdges edges chainEnds chainFronts
+ where
+
+ getFronts chain = takeL neighbourOverlapp chain
+ getEnds chain = takeR neighbourOverlapp chain
+
+
+
+-- See [Chain based CFG serialization]
+buildChains :: CFG -> [BlockId]
+ -> ( LabelMap BlockChain -- Resulting chains.
+ , Set.Set (BlockId, BlockId)) --List of fused edges.
+buildChains succWeights blocks
+ = let (_, fusedEdges, chains) = buildNext setEmpty mapEmpty blocks Set.empty
+ in (chains, fusedEdges)
+ where
+ -- We keep a map from the last block in a chain to the chain itself.
+ -- This we we can easily check if an block should be appened to an
+ -- existing chain!
+ buildNext :: LabelSet
+ -> LabelMap BlockChain -- Map from last element to chain.
+ -> [BlockId] -- Blocks to place
+ -> Set.Set (BlockId, BlockId)
+ -> ( [BlockChain] -- Placed Blocks
+ , Set.Set (BlockId, BlockId) --List of fused edges
+ , LabelMap BlockChain
+ )
+ buildNext _placed chains [] linked =
+ ([], linked, chains)
+ buildNext placed chains (block:todo) linked
+ | setMember block placed
+ = buildNext placed chains todo linked
+ | otherwise
+ = buildNext placed' chains' todo linked'
+ where
+ placed' = (foldl' (flip setInsert) placed placedBlocks)
+ linked' = Set.union linked linkedEdges
+ (placedBlocks, chains', linkedEdges) = findChain block
+
+ --Add the block to a existing or new chain
+ --Returns placed blocks, list of resulting chains
+ --and fused edges
+ findChain :: BlockId
+ -> ([BlockId],LabelMap BlockChain, Set.Set (BlockId, BlockId))
+ findChain block
+ -- B) place block at end of existing chain if
+ -- there is no better block to append.
+ | (pred:_) <- preds
+ , alreadyPlaced pred
+ , Just predChain <- mapLookup pred chains
+ , (best:_) <- filter (not . alreadyPlaced) $ getSuccs pred
+ , best == lbl
+ = --pprTrace "B.2)" (ppr (pred,lbl)) $
+ let newChain = chainSnoc predChain block
+ chainMap = mapInsert lbl newChain $ mapDelete pred chains
+ in ( [lbl]
+ , chainMap
+ , Set.singleton (pred,lbl) )
+
+ | otherwise
+ = --pprTrace "single" (ppr lbl)
+ ( [lbl]
+ , mapInsert lbl (chainSingleton lbl) chains
+ , Set.empty)
+ where
+ alreadyPlaced blkId = (setMember blkId placed)
+ lbl = block
+ getSuccs = map fst . getSuccEdgesSorted succWeights
+ preds = map fst $ getSuccEdgesSorted predWeights lbl
+ --For efficiency we also create the map to look up predecessors here
+ predWeights = reverseEdges succWeights
+
+
+
+-- We make the CFG a Hoopl Graph, so we can reuse revPostOrder.
+newtype BlockNode e x = BN (BlockId,[BlockId])
+instance NonLocal (BlockNode) where
+ entryLabel (BN (lbl,_)) = lbl
+ successors (BN (_,succs)) = succs
+
+fromNode :: BlockNode C C -> BlockId
+fromNode (BN x) = fst x
+
+sequenceChain :: forall a i. (Instruction i, Outputable i) => LabelMap a -> CFG
+ -> [GenBasicBlock i] -> [GenBasicBlock i]
+sequenceChain _info _weights [] = []
+sequenceChain _info _weights [x] = [x]
+sequenceChain info weights' blocks@((BasicBlock entry _):_) =
+ --Optimization, delete edges of weight <= 0.
+ --This significantly improves performance whenever
+ --we iterate over all edges, which is a few times!
+ let weights :: CFG
+ weights
+ = filterEdges (\_f _t edgeInfo -> edgeWeight edgeInfo > 0) weights'
+ blockMap :: LabelMap (GenBasicBlock i)
+ blockMap
+ = foldl' (\m blk@(BasicBlock lbl _ins) ->
+ mapInsert lbl blk m)
+ mapEmpty blocks
+
+ toNode :: BlockId -> BlockNode C C
+ toNode bid =
+ -- sorted such that heavier successors come first.
+ BN (bid,map fst . getSuccEdgesSorted weights' $ bid)
+
+ orderedBlocks :: [BlockId]
+ orderedBlocks
+ = map fromNode $
+ revPostorderFrom (fmap (toNode . blockId) blockMap) entry
+
+ (builtChains, builtEdges)
+ = {-# SCC "buildChains" #-}
+ --pprTraceIt "generatedChains" $
+ --pprTrace "orderedBlocks" (ppr orderedBlocks) $
+ buildChains weights orderedBlocks
+
+ rankedEdges :: WeightedEdgeList
+ -- Sort edges descending, remove fused eges
+ rankedEdges =
+ map (\(from, to, weight) -> WeightedEdge from to weight) .
+ filter (\(from, to, _)
+ -> not (Set.member (from,to) builtEdges)) .
+ sortWith (\(_,_,w) -> - w) $ weightedEdgeList weights
+
+ (fusedChains, fusedEdges)
+ = ASSERT(noDups $ mapElems builtChains)
+ {-# SCC "fuseChains" #-}
+ --(pprTrace "RankedEdges" $ ppr rankedEdges) $
+ --pprTraceIt "FusedChains" $
+ fuseChains rankedEdges builtChains
+
+ rankedEdges' =
+ filter (\edge -> not $ Set.member edge fusedEdges) $ rankedEdges
+
+ neighbourChains
+ = ASSERT(noDups $ mapElems fusedChains)
+ {-# SCC "groupNeighbourChains" #-}
+ --pprTraceIt "ResultChains" $
+ combineNeighbourhood rankedEdges' (mapElems fusedChains)
+
+ --Make sure the first block stays first
+ ([entryChain],chains')
+ = ASSERT(noDups $ neighbourChains)
+ partition (chainMember entry) neighbourChains
+ (entryChain':entryRest)
+ | inFront entry entryChain = [entryChain]
+ | (rest,entry) <- breakChainAt entry entryChain
+ = [entry,rest]
+ | otherwise = pprPanic "Entry point eliminated" $
+ ppr ([entryChain],chains')
+
+ prepedChains
+ = entryChain':(entryRest++chains') :: [BlockChain]
+ blockList
+ -- = (concatMap chainToBlocks prepedChains)
+ = (concatMap seqToList $ map chainBlocks prepedChains)
+
+ --chainPlaced = setFromList $ map blockId blockList :: LabelSet
+ chainPlaced = setFromList $ blockList :: LabelSet
+ unplaced =
+ let blocks = mapKeys blockMap
+ isPlaced b = setMember (b) chainPlaced
+ in filter (\block -> not (isPlaced block)) blocks
+
+ placedBlocks =
+ --pprTraceIt "placedBlocks" $
+ blockList ++ unplaced
+ getBlock bid = expectJust "Block placment" $ mapLookup bid blockMap
+ in
+ --Assert we placed all blocks given as input
+ ASSERT(all (\bid -> mapMember bid blockMap) placedBlocks)
+ dropJumps info $ map getBlock placedBlocks
+
+dropJumps :: forall a i. Instruction i => LabelMap a -> [GenBasicBlock i]
+ -> [GenBasicBlock i]
+dropJumps _ [] = []
+dropJumps info ((BasicBlock lbl ins):todo)
+ | not . null $ ins --This can happen because of shortcutting
+ , [dest] <- jumpDestsOfInstr (last ins)
+ , ((BasicBlock nextLbl _) : _) <- todo
+ , not (mapMember dest info)
+ , nextLbl == dest
+ = BasicBlock lbl (init ins) : dropJumps info todo
+ | otherwise
+ = BasicBlock lbl ins : dropJumps info todo
+
+
+-- -----------------------------------------------------------------------------
+-- Sequencing the basic blocks
+
+-- Cmm BasicBlocks are self-contained entities: they always end in a
+-- jump, either non-local or to another basic block in the same proc.
+-- In this phase, we attempt to place the basic blocks in a sequence
+-- such that as many of the local jumps as possible turn into
+-- fallthroughs.
+
+sequenceTop
+ :: (Instruction instr, Outputable instr)
+ => DynFlags --Use new layout code
+ -> NcgImpl statics instr jumpDest -> CFG
+ -> NatCmmDecl statics instr -> NatCmmDecl statics instr
+
+sequenceTop _ _ _ top@(CmmData _ _) = top
+sequenceTop dflags ncgImpl edgeWeights
+ (CmmProc info lbl live (ListGraph blocks))
+ | (gopt Opt_CfgBlocklayout dflags) && backendMaintainsCfg dflags
+ --Use chain based algorithm
+ = CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $
+ sequenceChain info edgeWeights blocks )
+ | otherwise
+ --Use old algorithm
+ = CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $
+ sequenceBlocks cfg info blocks)
+ where
+ cfg
+ | (gopt Opt_WeightlessBlocklayout dflags) ||
+ (not $ backendMaintainsCfg dflags)
+ -- Don't make use of cfg in the old algorithm
+ = Nothing
+ -- Use cfg in the old algorithm
+ | otherwise = Just edgeWeights
+
+-- The old algorithm:
+-- It is very simple (and stupid): We make a graph out of
+-- the blocks where there is an edge from one block to another iff the
+-- first block ends by jumping to the second. Then we topologically
+-- sort this graph. Then traverse the list: for each block, we first
+-- output the block, then if it has an out edge, we move the
+-- destination of the out edge to the front of the list, and continue.
+
+-- FYI, the classic layout for basic blocks uses postorder DFS; this
+-- algorithm is implemented in Hoopl.
+
+sequenceBlocks :: Instruction inst => Maybe CFG -> LabelMap a
+ -> [GenBasicBlock inst] -> [GenBasicBlock inst]
+sequenceBlocks _edgeWeight _ [] = []
+sequenceBlocks edgeWeights infos (entry:blocks) =
+ let entryNode = mkNode edgeWeights entry
+ bodyNodes = reverse
+ (flattenSCCs (sccBlocks edgeWeights blocks))
+ in dropJumps infos . seqBlocks infos $ ( entryNode : bodyNodes)
+ -- the first block is the entry point ==> it must remain at the start.
+
+sccBlocks
+ :: Instruction instr
+ => Maybe CFG -> [NatBasicBlock instr]
+ -> [SCC (Node BlockId (NatBasicBlock instr))]
+sccBlocks edgeWeights blocks =
+ stronglyConnCompFromEdgedVerticesUniqR
+ (map (mkNode edgeWeights) blocks)
+
+mkNode :: (Instruction t)
+ => Maybe CFG -> GenBasicBlock t
+ -> Node BlockId (GenBasicBlock t)
+mkNode edgeWeights block@(BasicBlock id instrs) =
+ DigraphNode block id outEdges
+ where
+ outEdges :: [BlockId]
+ outEdges
+ --Select the heaviest successor, ignore weights <= zero
+ = successor
+ where
+ successor
+ | Just successors <- fmap (`getSuccEdgesSorted` id)
+ edgeWeights -- :: Maybe [(Label, EdgeInfo)]
+ = case successors of
+ [] -> []
+ ((target,info):_)
+ | length successors > 2 || edgeWeight info <= 0 -> []
+ | otherwise -> [target]
+ | otherwise
+ = case jumpDestsOfInstr (last instrs) of
+ [one] -> [one]
+ _many -> []
+
+
+seqBlocks :: LabelMap i -> [Node BlockId (GenBasicBlock t1)]
+ -> [GenBasicBlock t1]
+seqBlocks infos blocks = placeNext pullable0 todo0
+ where
+ -- pullable: Blocks that are not yet placed
+ -- todo: Original order of blocks, to be followed if we have no good
+ -- reason not to;
+ -- may include blocks that have already been placed, but then
+ -- these are not in pullable
+ pullable0 = listToUFM [ (i,(b,n)) | DigraphNode b i n <- blocks ]
+ todo0 = map node_key blocks
+
+ placeNext _ [] = []
+ placeNext pullable (i:rest)
+ | Just (block, pullable') <- lookupDeleteUFM pullable i
+ = place pullable' rest block
+ | otherwise
+ -- We already placed this block, so ignore
+ = placeNext pullable rest
+
+ place pullable todo (block,[])
+ = block : placeNext pullable todo
+ place pullable todo (block@(BasicBlock id instrs),[next])
+ | mapMember next infos
+ = block : placeNext pullable todo
+ | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next
+ = BasicBlock id instrs : place pullable' todo nextBlock
+ | otherwise
+ = block : placeNext pullable todo
+ place _ _ (_,tooManyNextNodes)
+ = pprPanic "seqBlocks" (ppr tooManyNextNodes)
+
+
+lookupDeleteUFM :: Uniquable key => UniqFM elt -> key
+ -> Maybe (elt, UniqFM elt)
+lookupDeleteUFM m k = do -- Maybe monad
+ v <- lookupUFM m k
+ return (v, delFromUFM m k)
+
+-- -------------------------------------------------------------------
+-- Some specialized data structures to speed things up:
+-- * BlockSequence: A specialized version of Data.Sequence.
+-- Better at indexing at the front/end but lacks ability
+-- to do lookup by position.
+
+type FrontierMap = LabelMap ([BlockId],BlockChain)
+
+-- | A "reverse zipper" of sorts.
+-- We store a list of blocks in two parts, the initial part from left to right
+-- and the remaining part stored in reverse order. This makes it easy to look
+-- the last/first element and append on both sides.
+data BlockSequence
+ = Singleton !BlockId
+ | Pair (OrdList BlockId) (OrdList BlockId)
+ -- ^ For a non empty pair there is at least one element in the left part.
+ | Empty
+
+seqFront :: BlockSequence -> BlockId
+seqFront Empty = panic "Empty sequence"
+seqFront (Singleton bid) = bid
+seqFront (Pair lefts rights) = expectJust "Seq invariant" $
+ listToMaybe (fromOL lefts) <|> listToMaybe (fromOL $ reverseOL rights)
+
+-- seqEnd :: BlockSequence -> BlockId
+-- seqEnd Empty = panic "Empty sequence"
+-- seqEnd (Singleton bid) = bid
+-- seqEnd (Pair lefts rights) = expectJust "Seq invariant" $
+-- listToMaybe (fromOL rights) <|> listToMaybe (fromOL $ reverseOL lefts)
+
+seqToList :: BlockSequence -> [BlockId]
+seqToList Empty = []
+seqToList (Singleton bid) = [bid]
+seqToList (Pair lefts rights) = fromOL $ lefts `appOL` reverseOL rights
+
+
+seqToRList :: BlockSequence -> [BlockId]
+seqToRList Empty = []
+seqToRList (Singleton bid) = [bid]
+seqToRList (Pair lefts rights) = fromOL $ rights `appOL` reverseOL lefts
+
+seqSnoc :: BlockSequence -> BlockId -> BlockSequence
+seqSnoc (Empty) bid = Singleton bid
+seqSnoc (Singleton s) bid= Pair (unitOL s) (unitOL bid)
+seqSnoc (Pair lefts rights) bid = Pair lefts (bid `consOL` rights)
+
+seqConcat :: BlockSequence -> BlockSequence -> BlockSequence
+seqConcat (Empty) x2 = x2
+seqConcat (Singleton b1) (Singleton b2) = Pair (unitOL b1) (unitOL b2)
+seqConcat x1 (Empty) = x1
+seqConcat (Singleton b1) (Pair lefts rights) = Pair (b1 `consOL` lefts) rights
+seqConcat (Pair lefts rights) (Singleton b2) = Pair lefts (b2 `consOL` rights)
+seqConcat (Pair lefts1 rights1) (Pair lefts2 rights2) =
+ Pair (lefts1 `appOL` (reverseOL rights1) `appOL` lefts2) rights2
+
+seqFromBids :: [BlockId] -> BlockSequence
+seqFromBids [] = Empty
+seqFromBids [b1] = Singleton b1
+seqFromBids [b1,b2] = Pair (unitOL b1) (unitOL b2)
+seqFromBids [b1,b2,b3] = Pair (consOL b1 $ unitOL b2) (unitOL b3)
+seqFromBids (b1:b2:b3:bs) = Pair (toOL [b1,b2,b3]) (toOL bs)