diff options
Diffstat (limited to 'compiler/nativeGen/BlockLayout.hs')
-rw-r--r-- | compiler/nativeGen/BlockLayout.hs | 819 |
1 files changed, 819 insertions, 0 deletions
diff --git a/compiler/nativeGen/BlockLayout.hs b/compiler/nativeGen/BlockLayout.hs new file mode 100644 index 0000000000..72aea5bf10 --- /dev/null +++ b/compiler/nativeGen/BlockLayout.hs @@ -0,0 +1,819 @@ +-- +-- Copyright (c) 2018 Andreas Klebinger +-- + +{-# LANGUAGE TypeFamilies, ScopedTypeVariables, CPP #-} + +{-# OPTIONS_GHC -fprof-auto #-} +--{-# OPTIONS_GHC -ddump-simpl -ddump-to-file -ddump-cmm #-} + +module BlockLayout + ( sequenceTop ) +where + +#include "HsVersions.h" +import GhcPrelude + +import Instruction +import NCGMonad +import CFG + +import BlockId +import Cmm +import Hoopl.Collections +import Hoopl.Label +import Hoopl.Block + +import DynFlags (gopt, GeneralFlag(..), DynFlags, backendMaintainsCfg) +import UniqFM +import Util +import Unique + +import Digraph +import Outputable +import Maybes + +-- DEBUGGING ONLY +--import Debug +--import Debug.Trace +import ListSetOps (removeDups) +import PprCmm () + +import OrdList +import Data.List +import Data.Foldable (toList) +import Hoopl.Graph + +import qualified Data.Set as Set +import Control.Applicative + +{- + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~~~ Note [Chain based CFG serialization] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + For additional information also look at + https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/CodeLayout + + We have a CFG with edge weights based on which we try to place blocks next to + each other. + + Edge weights not only represent likelyhood of control transfer between blocks + but also how much a block would benefit from being placed sequentially after + it's predecessor. + For example blocks which are preceeded by an info table are more likely to end + up in a different cache line than their predecessor. So there is less benefit + in placing them sequentially. + + For example consider this example: + + A: ... + jmp cond D (weak successor) + jmp B + B: ... + jmp C + C: ... + jmp X + D: ... + jmp B (weak successor) + + We determine a block layout by building up chunks (calling them chains) of + possible control flows for which blocks will be placed sequentially. + + Eg for our example we might end up with two chains like: + [A->B->C->X],[D]. Blocks inside chains will always be placed sequentially. + However there is no particular order in which chains are placed since + (hopefully) the blocks for which sequentially is important have already + been placed in the same chain. + + ----------------------------------------------------------------------------- + First try to create a lists of good chains. + ----------------------------------------------------------------------------- + + We do so by taking a block not yet placed in a chain and + looking at these cases: + + *) Check if the best predecessor of the block is at the end of a chain. + If so add the current block to the end of that chain. + + Eg if we look at block C and already have the chain (A -> B) + then we extend the chain to (A -> B -> C). + + Combined with the fact that we process blocks in reverse post order + this means loop bodies and trivially sequential control flow already + ends up as a single chain. + + *) Otherwise we create a singleton chain from the block we are looking at. + Eg if we have from the example above already constructed (A->B) + and look at D we create the chain (D) resulting in the chains [A->B, D] + + ----------------------------------------------------------------------------- + We then try to fuse chains. + ----------------------------------------------------------------------------- + + There are edge cases which result in two chains being created which trivially + represent linear control flow. For example we might have the chains + [(A-B-C),(D-E)] with an cfg triangle: + + A----->C->D->E + \->B-/ + + We also get three independent chains if two branches end with a jump + to a common successor. + + We take care of these cases by fusing chains which are connected by an + edge. + + We do so by looking at the list of edges sorted by weight. + Given the edge (C -> D) we try to find two chains such that: + * C is at the end of chain one. + * D is in front of chain two. + * If two such chains exist we fuse them. + We then remove the edge and repeat the process for the rest of the edges. + + ----------------------------------------------------------------------------- + Place indirect successors (neighbours) after each other + ----------------------------------------------------------------------------- + + We might have chains [A,B,C,X],[E] in a CFG of the sort: + + A ---> B ---> C --------> X(exit) + \- ->E- -/ + + While E does not follow X it's still beneficial to place them near each other. + This can be advantageous if eg C,X,E will end up in the same cache line. + + TODO: If we remove edges as we use them (eg if we build up A->B remove A->B + from the list) we could save some more work in later phases. + + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~~~ Note [Triangle Control Flow] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Checking if an argument is already evaluating leads to a somewhat + special case which looks like this: + + A: + if (R1 & 7 != 0) goto Leval; else goto Lwork; + Leval: // global + call (I64[R1])(R1) returns to Lwork, args: 8, res: 8, upd: 8; + Lwork: // global + ... + + A + |\ + | Leval + |/ - (This edge can be missing because of optimizations) + Lwork + + Once we hit the metal the call instruction is just 2-3 bytes large + depending on the register used. So we lay out the assembly like this: + + movq %rbx,%rax + andl $7,%eax + cmpq $1,%rax + jne Lwork + Leval: + jmp *(%rbx) # encoded in 2-3 bytes. + <info table> + Lwork: + ... + + We could explicitly check for this control flow pattern. + + This is advantageous because: + * It's optimal if the argument isn't evaluated. + * If it's evaluated we only have the extra cost of jumping over + the 2-3 bytes for the call. + * Guarantees the smaller encoding for the conditional jump. + + However given that Lwork usually has an info table we + penalize this edge. So Leval should get placed first + either way and things work out for the best. + + Optimizing for the evaluated case instead would penalize + the other code path. It adds an jump as we can't fall through + to Lwork because of the info table. + Assuming that Lwork is large the chance that the "call" ends up + in the same cache line is also fairly small. + +-} + + +-- | Look at X number of blocks in two chains to determine +-- if they are "neighbours". +neighbourOverlapp :: Int +neighbourOverlapp = 2 + +-- | Only edges heavier than this are considered +-- for fusing two chains into a single chain. +fuseEdgeThreshold :: EdgeWeight +fuseEdgeThreshold = 0 + + +-- | A non empty ordered sequence of basic blocks. +-- It is suitable for serialization in this order. +data BlockChain + = BlockChain + { chainMembers :: !LabelSet + , chainBlocks :: !BlockSequence + } + +instance Eq (BlockChain) where + (BlockChain s1 _) == (BlockChain s2 _) + = s1 == s2 + +instance Outputable (BlockChain) where + ppr (BlockChain _ blks) = + parens (text "Chain:" <+> ppr (seqToList $ blks) ) + +data WeightedEdge = WeightedEdge !BlockId !BlockId EdgeWeight deriving (Eq) + +-- Useful for things like sets and debugging purposes, sorts by blocks +-- in the chain. +instance Ord (BlockChain) where + (BlockChain lbls1 _) `compare` (BlockChain lbls2 _) + = lbls1 `compare` lbls2 + +-- | Non deterministic! (Uniques) Sorts edges by weight and nodes. +instance Ord WeightedEdge where + compare (WeightedEdge from1 to1 weight1) + (WeightedEdge from2 to2 weight2) + | weight1 < weight2 || weight1 == weight2 && from1 < from2 || + weight1 == weight2 && from1 == from2 && to1 < to2 + = LT + | from1 == from2 && to1 == to2 && weight1 == weight2 + = EQ + | otherwise + = GT + +instance Outputable WeightedEdge where + ppr (WeightedEdge from to info) = + ppr from <> text "->" <> ppr to <> brackets (ppr info) + +type WeightedEdgeList = [WeightedEdge] + +noDups :: [BlockChain] -> Bool +noDups chains = + let chainBlocks = concatMap chainToBlocks chains :: [BlockId] + (_blocks, dups) = removeDups compare chainBlocks + in if null dups then True + else pprTrace "Duplicates:" (ppr (map toList dups) $$ text "chains" <+> ppr chains ) False + +inFront :: BlockId -> BlockChain -> Bool +inFront bid (BlockChain _ seq) + = seqFront seq == bid + +chainMember :: BlockId -> BlockChain -> Bool +chainMember bid chain + = setMember bid . chainMembers $ chain + +chainSingleton :: BlockId -> BlockChain +chainSingleton lbl + = BlockChain (setSingleton lbl) (Singleton lbl) + +chainSnoc :: BlockChain -> BlockId -> BlockChain +chainSnoc (BlockChain lbls blks) lbl + = BlockChain (setInsert lbl lbls) (seqSnoc blks lbl) + +chainConcat :: BlockChain -> BlockChain -> BlockChain +chainConcat (BlockChain lbls1 blks1) (BlockChain lbls2 blks2) + = BlockChain (setUnion lbls1 lbls2) (blks1 `seqConcat` blks2) + +chainToBlocks :: BlockChain -> [BlockId] +chainToBlocks (BlockChain _ blks) = seqToList blks + +-- | Given the Chain A -> B -> C -> D and we break at C +-- we get the two Chains (A -> B, C -> D) as result. +breakChainAt :: BlockId -> BlockChain + -> (BlockChain,BlockChain) +breakChainAt bid (BlockChain lbls blks) + | not (setMember bid lbls) + = panic "Block not in chain" + | otherwise + = let (lblks, rblks) = break (\lbl -> lbl == bid) + (seqToList blks) + --TODO: Remove old + --lblSet :: [GenBasicBlock i] -> BlockChain + --lblSet blks = + -- setFromList + --(map (\(BasicBlock lbl _) -> lbl) $ toList blks) + in + (BlockChain (setFromList lblks) (seqFromBids lblks), + BlockChain (setFromList rblks) (seqFromBids rblks)) + +takeR :: Int -> BlockChain -> [BlockId] +takeR n (BlockChain _ blks) = + take n . seqToRList $ blks + + +takeL :: Int -> BlockChain -> [BlockId] +takeL n (BlockChain _ blks) = --error "TODO: takeLn" + take n . seqToList $ blks + +-- | For a given list of chains try to fuse chains with strong +-- edges between them into a single chain. +-- Returns the list of fused chains together with a set of +-- used edges. The set of edges is indirectly encoded in the +-- chains so doesn't need to be considered for later passes. +fuseChains :: WeightedEdgeList -> LabelMap BlockChain + -> (LabelMap BlockChain, Set.Set WeightedEdge) +fuseChains weights chains + = let fronts = mapFromList $ + map (\chain -> (head $ takeL 1 chain,chain)) $ + mapElems chains :: LabelMap BlockChain + (chains', used, _) = applyEdges weights chains fronts Set.empty + in (chains', used) + where + applyEdges :: WeightedEdgeList -> LabelMap BlockChain + -> LabelMap BlockChain -> Set.Set WeightedEdge + -> (LabelMap BlockChain, Set.Set WeightedEdge, LabelMap BlockChain) + applyEdges [] chainsEnd chainsFront used + = (chainsEnd, used, chainsFront) + applyEdges (edge@(WeightedEdge from to w):edges) chainsEnd chainsFront used + --Since we order edges descending by weight we can stop here + | w <= fuseEdgeThreshold + = ( chainsEnd, used, chainsFront) + --Fuse the two chains + | Just c1 <- mapLookup from chainsEnd + , Just c2 <- mapLookup to chainsFront + , c1 /= c2 + = let newChain = chainConcat c1 c2 + front = head $ takeL 1 newChain + end = head $ takeR 1 newChain + chainsFront' = mapInsert front newChain $ + mapDelete to chainsFront + chainsEnd' = mapInsert end newChain $ + mapDelete from chainsEnd + in applyEdges edges chainsEnd' chainsFront' + (Set.insert edge used) + | otherwise + --Check next edge + = applyEdges edges chainsEnd chainsFront used + + +-- See also Note [Chain based CFG serialization] +-- We have the chains (A-B-C-D) and (E-F) and an Edge C->E. +-- +-- While placing the later after the former doesn't result in sequential +-- control flow it is still be benefical since block C and E might end +-- up in the same cache line. +-- +-- So we place these chains next to each other even if we can't fuse them. +-- +-- A -> B -> C -> D +-- v +-- - -> E -> F ... +-- +-- Simple heuristic to chose which chains we want to combine: +-- * Process edges in descending priority. +-- * Check if there is a edge near the end of one chain which goes +-- to a block near the start of another edge. +-- +-- While we could take into account the space between the two blocks which +-- share an edge this blows up compile times quite a bit. It requires +-- us to find all edges between two chains, check the distance for all edges, +-- rank them based on the distance and and only then we can select two chains +-- to combine. Which would add a lot of complexity for little gain. + +-- | For a given list of chains and edges try to combine chains with strong +-- edges between them. +combineNeighbourhood :: WeightedEdgeList -> [BlockChain] + -> [BlockChain] +combineNeighbourhood edges chains + = -- pprTraceIt "Neigbours" $ + applyEdges edges endFrontier startFrontier + where + --Build maps from chain ends to chains + endFrontier, startFrontier :: FrontierMap + endFrontier = + mapFromList $ concatMap (\chain -> + let ends = getEnds chain + entry = (ends,chain) + in map (\x -> (x,entry)) ends ) chains + startFrontier = + mapFromList $ concatMap (\chain -> + let front = getFronts chain + entry = (front,chain) + in map (\x -> (x,entry)) front) chains + applyEdges :: WeightedEdgeList -> FrontierMap -> FrontierMap + -> [BlockChain] + applyEdges [] chainEnds _chainFronts = + ordNub $ map snd $ mapElems chainEnds + applyEdges ((WeightedEdge from to _w):edges) chainEnds chainFronts + | Just (c1_e,c1) <- mapLookup from chainEnds + , Just (c2_f,c2) <- mapLookup to chainFronts + , c1 /= c2 -- Avoid trying to concat a short chain with itself. + = let newChain = chainConcat c1 c2 + newChainFrontier = getFronts newChain + newChainEnds = getEnds newChain + newFronts :: FrontierMap + newFronts = + let withoutOld = + foldl' (\m b -> mapDelete b m :: FrontierMap) chainFronts (c2_f ++ getFronts c1) + entry = + (newChainFrontier,newChain) --let bound to ensure sharing + in foldl' (\m x -> mapInsert x entry m) + withoutOld newChainFrontier + + newEnds = + let withoutOld = foldl' (\m b -> mapDelete b m) chainEnds (c1_e ++ getEnds c2) + entry = (newChainEnds,newChain) --let bound to ensure sharing + in foldl' (\m x -> mapInsert x entry m) + withoutOld newChainEnds + in + -- pprTrace "ApplyEdges" + -- (text "before" $$ + -- text "fronts" <+> ppr chainFronts $$ + -- text "ends" <+> ppr chainEnds $$ + + -- text "various" $$ + -- text "newChain" <+> ppr newChain $$ + -- text "newChainFrontier" <+> ppr newChainFrontier $$ + -- text "newChainEnds" <+> ppr newChainEnds $$ + -- text "drop" <+> ppr ((c2_f ++ getFronts c1) ++ (c1_e ++ getEnds c2)) $$ + + -- text "after" $$ + -- text "fronts" <+> ppr newFronts $$ + -- text "ends" <+> ppr newEnds + -- ) + applyEdges edges newEnds newFronts + | otherwise + = --pprTrace "noNeigbours" (ppr ()) $ + applyEdges edges chainEnds chainFronts + where + + getFronts chain = takeL neighbourOverlapp chain + getEnds chain = takeR neighbourOverlapp chain + + + +-- See [Chain based CFG serialization] +buildChains :: CFG -> [BlockId] + -> ( LabelMap BlockChain -- Resulting chains. + , Set.Set (BlockId, BlockId)) --List of fused edges. +buildChains succWeights blocks + = let (_, fusedEdges, chains) = buildNext setEmpty mapEmpty blocks Set.empty + in (chains, fusedEdges) + where + -- We keep a map from the last block in a chain to the chain itself. + -- This we we can easily check if an block should be appened to an + -- existing chain! + buildNext :: LabelSet + -> LabelMap BlockChain -- Map from last element to chain. + -> [BlockId] -- Blocks to place + -> Set.Set (BlockId, BlockId) + -> ( [BlockChain] -- Placed Blocks + , Set.Set (BlockId, BlockId) --List of fused edges + , LabelMap BlockChain + ) + buildNext _placed chains [] linked = + ([], linked, chains) + buildNext placed chains (block:todo) linked + | setMember block placed + = buildNext placed chains todo linked + | otherwise + = buildNext placed' chains' todo linked' + where + placed' = (foldl' (flip setInsert) placed placedBlocks) + linked' = Set.union linked linkedEdges + (placedBlocks, chains', linkedEdges) = findChain block + + --Add the block to a existing or new chain + --Returns placed blocks, list of resulting chains + --and fused edges + findChain :: BlockId + -> ([BlockId],LabelMap BlockChain, Set.Set (BlockId, BlockId)) + findChain block + -- B) place block at end of existing chain if + -- there is no better block to append. + | (pred:_) <- preds + , alreadyPlaced pred + , Just predChain <- mapLookup pred chains + , (best:_) <- filter (not . alreadyPlaced) $ getSuccs pred + , best == lbl + = --pprTrace "B.2)" (ppr (pred,lbl)) $ + let newChain = chainSnoc predChain block + chainMap = mapInsert lbl newChain $ mapDelete pred chains + in ( [lbl] + , chainMap + , Set.singleton (pred,lbl) ) + + | otherwise + = --pprTrace "single" (ppr lbl) + ( [lbl] + , mapInsert lbl (chainSingleton lbl) chains + , Set.empty) + where + alreadyPlaced blkId = (setMember blkId placed) + lbl = block + getSuccs = map fst . getSuccEdgesSorted succWeights + preds = map fst $ getSuccEdgesSorted predWeights lbl + --For efficiency we also create the map to look up predecessors here + predWeights = reverseEdges succWeights + + + +-- We make the CFG a Hoopl Graph, so we can reuse revPostOrder. +newtype BlockNode e x = BN (BlockId,[BlockId]) +instance NonLocal (BlockNode) where + entryLabel (BN (lbl,_)) = lbl + successors (BN (_,succs)) = succs + +fromNode :: BlockNode C C -> BlockId +fromNode (BN x) = fst x + +sequenceChain :: forall a i. (Instruction i, Outputable i) => LabelMap a -> CFG + -> [GenBasicBlock i] -> [GenBasicBlock i] +sequenceChain _info _weights [] = [] +sequenceChain _info _weights [x] = [x] +sequenceChain info weights' blocks@((BasicBlock entry _):_) = + --Optimization, delete edges of weight <= 0. + --This significantly improves performance whenever + --we iterate over all edges, which is a few times! + let weights :: CFG + weights + = filterEdges (\_f _t edgeInfo -> edgeWeight edgeInfo > 0) weights' + blockMap :: LabelMap (GenBasicBlock i) + blockMap + = foldl' (\m blk@(BasicBlock lbl _ins) -> + mapInsert lbl blk m) + mapEmpty blocks + + toNode :: BlockId -> BlockNode C C + toNode bid = + -- sorted such that heavier successors come first. + BN (bid,map fst . getSuccEdgesSorted weights' $ bid) + + orderedBlocks :: [BlockId] + orderedBlocks + = map fromNode $ + revPostorderFrom (fmap (toNode . blockId) blockMap) entry + + (builtChains, builtEdges) + = {-# SCC "buildChains" #-} + --pprTraceIt "generatedChains" $ + --pprTrace "orderedBlocks" (ppr orderedBlocks) $ + buildChains weights orderedBlocks + + rankedEdges :: WeightedEdgeList + -- Sort edges descending, remove fused eges + rankedEdges = + map (\(from, to, weight) -> WeightedEdge from to weight) . + filter (\(from, to, _) + -> not (Set.member (from,to) builtEdges)) . + sortWith (\(_,_,w) -> - w) $ weightedEdgeList weights + + (fusedChains, fusedEdges) + = ASSERT(noDups $ mapElems builtChains) + {-# SCC "fuseChains" #-} + --(pprTrace "RankedEdges" $ ppr rankedEdges) $ + --pprTraceIt "FusedChains" $ + fuseChains rankedEdges builtChains + + rankedEdges' = + filter (\edge -> not $ Set.member edge fusedEdges) $ rankedEdges + + neighbourChains + = ASSERT(noDups $ mapElems fusedChains) + {-# SCC "groupNeighbourChains" #-} + --pprTraceIt "ResultChains" $ + combineNeighbourhood rankedEdges' (mapElems fusedChains) + + --Make sure the first block stays first + ([entryChain],chains') + = ASSERT(noDups $ neighbourChains) + partition (chainMember entry) neighbourChains + (entryChain':entryRest) + | inFront entry entryChain = [entryChain] + | (rest,entry) <- breakChainAt entry entryChain + = [entry,rest] + | otherwise = pprPanic "Entry point eliminated" $ + ppr ([entryChain],chains') + + prepedChains + = entryChain':(entryRest++chains') :: [BlockChain] + blockList + -- = (concatMap chainToBlocks prepedChains) + = (concatMap seqToList $ map chainBlocks prepedChains) + + --chainPlaced = setFromList $ map blockId blockList :: LabelSet + chainPlaced = setFromList $ blockList :: LabelSet + unplaced = + let blocks = mapKeys blockMap + isPlaced b = setMember (b) chainPlaced + in filter (\block -> not (isPlaced block)) blocks + + placedBlocks = + --pprTraceIt "placedBlocks" $ + blockList ++ unplaced + getBlock bid = expectJust "Block placment" $ mapLookup bid blockMap + in + --Assert we placed all blocks given as input + ASSERT(all (\bid -> mapMember bid blockMap) placedBlocks) + dropJumps info $ map getBlock placedBlocks + +dropJumps :: forall a i. Instruction i => LabelMap a -> [GenBasicBlock i] + -> [GenBasicBlock i] +dropJumps _ [] = [] +dropJumps info ((BasicBlock lbl ins):todo) + | not . null $ ins --This can happen because of shortcutting + , [dest] <- jumpDestsOfInstr (last ins) + , ((BasicBlock nextLbl _) : _) <- todo + , not (mapMember dest info) + , nextLbl == dest + = BasicBlock lbl (init ins) : dropJumps info todo + | otherwise + = BasicBlock lbl ins : dropJumps info todo + + +-- ----------------------------------------------------------------------------- +-- Sequencing the basic blocks + +-- Cmm BasicBlocks are self-contained entities: they always end in a +-- jump, either non-local or to another basic block in the same proc. +-- In this phase, we attempt to place the basic blocks in a sequence +-- such that as many of the local jumps as possible turn into +-- fallthroughs. + +sequenceTop + :: (Instruction instr, Outputable instr) + => DynFlags --Use new layout code + -> NcgImpl statics instr jumpDest -> CFG + -> NatCmmDecl statics instr -> NatCmmDecl statics instr + +sequenceTop _ _ _ top@(CmmData _ _) = top +sequenceTop dflags ncgImpl edgeWeights + (CmmProc info lbl live (ListGraph blocks)) + | (gopt Opt_CfgBlocklayout dflags) && backendMaintainsCfg dflags + --Use chain based algorithm + = CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $ + sequenceChain info edgeWeights blocks ) + | otherwise + --Use old algorithm + = CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $ + sequenceBlocks cfg info blocks) + where + cfg + | (gopt Opt_WeightlessBlocklayout dflags) || + (not $ backendMaintainsCfg dflags) + -- Don't make use of cfg in the old algorithm + = Nothing + -- Use cfg in the old algorithm + | otherwise = Just edgeWeights + +-- The old algorithm: +-- It is very simple (and stupid): We make a graph out of +-- the blocks where there is an edge from one block to another iff the +-- first block ends by jumping to the second. Then we topologically +-- sort this graph. Then traverse the list: for each block, we first +-- output the block, then if it has an out edge, we move the +-- destination of the out edge to the front of the list, and continue. + +-- FYI, the classic layout for basic blocks uses postorder DFS; this +-- algorithm is implemented in Hoopl. + +sequenceBlocks :: Instruction inst => Maybe CFG -> LabelMap a + -> [GenBasicBlock inst] -> [GenBasicBlock inst] +sequenceBlocks _edgeWeight _ [] = [] +sequenceBlocks edgeWeights infos (entry:blocks) = + let entryNode = mkNode edgeWeights entry + bodyNodes = reverse + (flattenSCCs (sccBlocks edgeWeights blocks)) + in dropJumps infos . seqBlocks infos $ ( entryNode : bodyNodes) + -- the first block is the entry point ==> it must remain at the start. + +sccBlocks + :: Instruction instr + => Maybe CFG -> [NatBasicBlock instr] + -> [SCC (Node BlockId (NatBasicBlock instr))] +sccBlocks edgeWeights blocks = + stronglyConnCompFromEdgedVerticesUniqR + (map (mkNode edgeWeights) blocks) + +mkNode :: (Instruction t) + => Maybe CFG -> GenBasicBlock t + -> Node BlockId (GenBasicBlock t) +mkNode edgeWeights block@(BasicBlock id instrs) = + DigraphNode block id outEdges + where + outEdges :: [BlockId] + outEdges + --Select the heaviest successor, ignore weights <= zero + = successor + where + successor + | Just successors <- fmap (`getSuccEdgesSorted` id) + edgeWeights -- :: Maybe [(Label, EdgeInfo)] + = case successors of + [] -> [] + ((target,info):_) + | length successors > 2 || edgeWeight info <= 0 -> [] + | otherwise -> [target] + | otherwise + = case jumpDestsOfInstr (last instrs) of + [one] -> [one] + _many -> [] + + +seqBlocks :: LabelMap i -> [Node BlockId (GenBasicBlock t1)] + -> [GenBasicBlock t1] +seqBlocks infos blocks = placeNext pullable0 todo0 + where + -- pullable: Blocks that are not yet placed + -- todo: Original order of blocks, to be followed if we have no good + -- reason not to; + -- may include blocks that have already been placed, but then + -- these are not in pullable + pullable0 = listToUFM [ (i,(b,n)) | DigraphNode b i n <- blocks ] + todo0 = map node_key blocks + + placeNext _ [] = [] + placeNext pullable (i:rest) + | Just (block, pullable') <- lookupDeleteUFM pullable i + = place pullable' rest block + | otherwise + -- We already placed this block, so ignore + = placeNext pullable rest + + place pullable todo (block,[]) + = block : placeNext pullable todo + place pullable todo (block@(BasicBlock id instrs),[next]) + | mapMember next infos + = block : placeNext pullable todo + | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next + = BasicBlock id instrs : place pullable' todo nextBlock + | otherwise + = block : placeNext pullable todo + place _ _ (_,tooManyNextNodes) + = pprPanic "seqBlocks" (ppr tooManyNextNodes) + + +lookupDeleteUFM :: Uniquable key => UniqFM elt -> key + -> Maybe (elt, UniqFM elt) +lookupDeleteUFM m k = do -- Maybe monad + v <- lookupUFM m k + return (v, delFromUFM m k) + +-- ------------------------------------------------------------------- +-- Some specialized data structures to speed things up: +-- * BlockSequence: A specialized version of Data.Sequence. +-- Better at indexing at the front/end but lacks ability +-- to do lookup by position. + +type FrontierMap = LabelMap ([BlockId],BlockChain) + +-- | A "reverse zipper" of sorts. +-- We store a list of blocks in two parts, the initial part from left to right +-- and the remaining part stored in reverse order. This makes it easy to look +-- the last/first element and append on both sides. +data BlockSequence + = Singleton !BlockId + | Pair (OrdList BlockId) (OrdList BlockId) + -- ^ For a non empty pair there is at least one element in the left part. + | Empty + +seqFront :: BlockSequence -> BlockId +seqFront Empty = panic "Empty sequence" +seqFront (Singleton bid) = bid +seqFront (Pair lefts rights) = expectJust "Seq invariant" $ + listToMaybe (fromOL lefts) <|> listToMaybe (fromOL $ reverseOL rights) + +-- seqEnd :: BlockSequence -> BlockId +-- seqEnd Empty = panic "Empty sequence" +-- seqEnd (Singleton bid) = bid +-- seqEnd (Pair lefts rights) = expectJust "Seq invariant" $ +-- listToMaybe (fromOL rights) <|> listToMaybe (fromOL $ reverseOL lefts) + +seqToList :: BlockSequence -> [BlockId] +seqToList Empty = [] +seqToList (Singleton bid) = [bid] +seqToList (Pair lefts rights) = fromOL $ lefts `appOL` reverseOL rights + + +seqToRList :: BlockSequence -> [BlockId] +seqToRList Empty = [] +seqToRList (Singleton bid) = [bid] +seqToRList (Pair lefts rights) = fromOL $ rights `appOL` reverseOL lefts + +seqSnoc :: BlockSequence -> BlockId -> BlockSequence +seqSnoc (Empty) bid = Singleton bid +seqSnoc (Singleton s) bid= Pair (unitOL s) (unitOL bid) +seqSnoc (Pair lefts rights) bid = Pair lefts (bid `consOL` rights) + +seqConcat :: BlockSequence -> BlockSequence -> BlockSequence +seqConcat (Empty) x2 = x2 +seqConcat (Singleton b1) (Singleton b2) = Pair (unitOL b1) (unitOL b2) +seqConcat x1 (Empty) = x1 +seqConcat (Singleton b1) (Pair lefts rights) = Pair (b1 `consOL` lefts) rights +seqConcat (Pair lefts rights) (Singleton b2) = Pair lefts (b2 `consOL` rights) +seqConcat (Pair lefts1 rights1) (Pair lefts2 rights2) = + Pair (lefts1 `appOL` (reverseOL rights1) `appOL` lefts2) rights2 + +seqFromBids :: [BlockId] -> BlockSequence +seqFromBids [] = Empty +seqFromBids [b1] = Singleton b1 +seqFromBids [b1,b2] = Pair (unitOL b1) (unitOL b2) +seqFromBids [b1,b2,b3] = Pair (consOL b1 $ unitOL b2) (unitOL b3) +seqFromBids (b1:b2:b3:bs) = Pair (toOL [b1,b2,b3]) (toOL bs) |