diff options
Diffstat (limited to 'compiler/GHC/CmmToAsm/BlockLayout.hs')
-rw-r--r-- | compiler/GHC/CmmToAsm/BlockLayout.hs | 895 |
1 files changed, 895 insertions, 0 deletions
diff --git a/compiler/GHC/CmmToAsm/BlockLayout.hs b/compiler/GHC/CmmToAsm/BlockLayout.hs new file mode 100644 index 0000000000..01a1388b5f --- /dev/null +++ b/compiler/GHC/CmmToAsm/BlockLayout.hs @@ -0,0 +1,895 @@ +-- +-- Copyright (c) 2018 Andreas Klebinger +-- + +{-# LANGUAGE CPP #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FlexibleContexts #-} + +module GHC.CmmToAsm.BlockLayout + ( sequenceTop ) +where + +#include "HsVersions.h" +import GhcPrelude + +import GHC.CmmToAsm.Instr +import GHC.CmmToAsm.Monad +import GHC.CmmToAsm.CFG + +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Label + +import GHC.Driver.Session (gopt, GeneralFlag(..), DynFlags, backendMaintainsCfg) +import UniqFM +import Util +import Unique + +import Digraph +import Outputable +import Maybes + +-- DEBUGGING ONLY +--import GHC.Cmm.DebugBlock +--import Debug.Trace +import ListSetOps (removeDups) + +import OrdList +import Data.List +import Data.Foldable (toList) + +import qualified Data.Set as Set +import Data.STRef +import Control.Monad.ST.Strict +import Control.Monad (foldM) + +{- + Note [CFG based code layout] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + The major steps in placing blocks are as follow: + * Compute a CFG based on the Cmm AST, see getCfgProc. + This CFG will have edge weights representing a guess + on how important they are. + * After we convert Cmm to Asm we run `optimizeCFG` which + adds a few more "educated guesses" to the equation. + * Then we run loop analysis on the CFG (`loopInfo`) which tells us + about loop headers, loop nesting levels and the sort. + * Based on the CFG and loop information refine the edge weights + in the CFG and normalize them relative to the most often visited + node. (See `mkGlobalWeights`) + * Feed this CFG into the block layout code (`sequenceTop`) in this + module. Which will then produce a code layout based on the input weights. + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~~~ Note [Chain based CFG serialization] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + For additional information also look at + https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/code-layout + + We have a CFG with edge weights based on which we try to place blocks next to + each other. + + Edge weights not only represent likelihood of control transfer between blocks + but also how much a block would benefit from being placed sequentially after + it's predecessor. + For example blocks which are preceded by an info table are more likely to end + up in a different cache line than their predecessor and we can't eliminate the jump + so there is less benefit to placing them sequentially. + + For example consider this example: + + A: ... + jmp cond D (weak successor) + jmp B + B: ... + jmp C + C: ... + jmp X + D: ... + jmp B (weak successor) + + We determine a block layout by building up chunks (calling them chains) of + possible control flows for which blocks will be placed sequentially. + + Eg for our example we might end up with two chains like: + [A->B->C->X],[D]. Blocks inside chains will always be placed sequentially. + However there is no particular order in which chains are placed since + (hopefully) the blocks for which sequentiality is important have already + been placed in the same chain. + + ----------------------------------------------------------------------------- + 1) First try to create a list of good chains. + ----------------------------------------------------------------------------- + + Good chains are these which allow us to eliminate jump instructions. + Which further eliminate often executed jumps first. + + We do so by: + + *) Ignore edges which represent instructions which can not be replaced + by fall through control flow. Primarily calls and edges to blocks which + are prefixed by a info table we have to jump across. + + *) Then process remaining edges in order of frequency taken and: + + +) If source and target have not been placed build a new chain from them. + + +) If source and target have been placed, and are ends of differing chains + try to merge the two chains. + + +) If one side of the edge is a end/front of a chain, add the other block of + to edge to the same chain + + Eg if we look at edge (B -> C) and already have the chain (A -> B) + then we extend the chain to (A -> B -> C). + + +) If the edge was used to modify or build a new chain remove the edge from + our working list. + + *) If there any blocks not being placed into a chain after these steps we place + them into a chain consisting of only this block. + + Ranking edges by their taken frequency, if + two edges compete for fall through on the same target block, the one taken + more often will automatically win out. Resulting in fewer instructions being + executed. + + Creating singleton chains is required for situations where we have code of the + form: + + A: goto B: + <infoTable> + B: goto C: + <infoTable> + C: ... + + As the code in block B is only connected to the rest of the program via edges + which will be ignored in this step we make sure that B still ends up in a chain + this way. + + ----------------------------------------------------------------------------- + 2) We also try to fuse chains. + ----------------------------------------------------------------------------- + + As a result from the above step we still end up with multiple chains which + represent sequential control flow chunks. But they are not yet suitable for + code layout as we need to place *all* blocks into a single sequence. + + In this step we combine chains result from the above step via these steps: + + *) Look at the ranked list of *all* edges, including calls/jumps across info tables + and the like. + + *) Look at each edge and + + +) Given an edge (A -> B) try to find two chains for which + * Block A is at the end of one chain + * Block B is at the front of the other chain. + +) If we find such a chain we "fuse" them into a single chain, remove the + edge from working set and continue. + +) If we can't find such chains we skip the edge and continue. + + ----------------------------------------------------------------------------- + 3) Place indirect successors (neighbours) after each other + ----------------------------------------------------------------------------- + + We might have chains [A,B,C,X],[E] in a CFG of the sort: + + A ---> B ---> C --------> X(exit) + \- ->E- -/ + + While E does not follow X it's still beneficial to place them near each other. + This can be advantageous if eg C,X,E will end up in the same cache line. + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~~~ Note [Triangle Control Flow] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Checking if an argument is already evaluated leads to a somewhat + special case which looks like this: + + A: + if (R1 & 7 != 0) goto Leval; else goto Lwork; + Leval: // global + call (I64[R1])(R1) returns to Lwork, args: 8, res: 8, upd: 8; + Lwork: // global + ... + + A + |\ + | Leval + |/ - (This edge can be missing because of optimizations) + Lwork + + Once we hit the metal the call instruction is just 2-3 bytes large + depending on the register used. So we lay out the assembly like this: + + movq %rbx,%rax + andl $7,%eax + cmpq $1,%rax + jne Lwork + Leval: + jmp *(%rbx) # encoded in 2-3 bytes. + <info table> + Lwork: + ... + + We could explicitly check for this control flow pattern. + + This is advantageous because: + * It's optimal if the argument isn't evaluated. + * If it's evaluated we only have the extra cost of jumping over + the 2-3 bytes for the call. + * Guarantees the smaller encoding for the conditional jump. + + However given that Lwork usually has an info table we + penalize this edge. So Leval should get placed first + either way and things work out for the best. + + Optimizing for the evaluated case instead would penalize + the other code path. It adds an jump as we can't fall through + to Lwork because of the info table. + Assuming that Lwork is large the chance that the "call" ends up + in the same cache line is also fairly small. + +-} + + +-- | Look at X number of blocks in two chains to determine +-- if they are "neighbours". +neighbourOverlapp :: Int +neighbourOverlapp = 2 + +-- | Maps blocks near the end of a chain to it's chain AND +-- the other blocks near the end. +-- [A,B,C,D,E] Gives entries like (B -> ([A,B], [A,B,C,D,E])) +-- where [A,B] are blocks in the end region of a chain. +-- This is cheaper then recomputing the ends multiple times. +type FrontierMap = LabelMap ([BlockId],BlockChain) + +-- | A non empty ordered sequence of basic blocks. +-- It is suitable for serialization in this order. +-- +-- We use OrdList instead of [] to allow fast append on both sides +-- when combining chains. +newtype BlockChain + = BlockChain { chainBlocks :: (OrdList BlockId) } + +-- All chains are constructed the same way so comparison +-- including structure is faster. +instance Eq BlockChain where + BlockChain b1 == BlockChain b2 = strictlyEqOL b1 b2 + +-- Useful for things like sets and debugging purposes, sorts by blocks +-- in the chain. +instance Ord (BlockChain) where + (BlockChain lbls1) `compare` (BlockChain lbls2) + = ASSERT(toList lbls1 /= toList lbls2 || lbls1 `strictlyEqOL` lbls2) + strictlyOrdOL lbls1 lbls2 + +instance Outputable (BlockChain) where + ppr (BlockChain blks) = + parens (text "Chain:" <+> ppr (fromOL $ blks) ) + +chainFoldl :: (b -> BlockId -> b) -> b -> BlockChain -> b +chainFoldl f z (BlockChain blocks) = foldl' f z blocks + +noDups :: [BlockChain] -> Bool +noDups chains = + let chainBlocks = concatMap chainToBlocks chains :: [BlockId] + (_blocks, dups) = removeDups compare chainBlocks + in if null dups then True + else pprTrace "Duplicates:" (ppr (map toList dups) $$ text "chains" <+> ppr chains ) False + +inFront :: BlockId -> BlockChain -> Bool +inFront bid (BlockChain seq) + = headOL seq == bid + +chainSingleton :: BlockId -> BlockChain +chainSingleton lbl + = BlockChain (unitOL lbl) + +chainFromList :: [BlockId] -> BlockChain +chainFromList = BlockChain . toOL + +chainSnoc :: BlockChain -> BlockId -> BlockChain +chainSnoc (BlockChain blks) lbl + = BlockChain (blks `snocOL` lbl) + +chainCons :: BlockId -> BlockChain -> BlockChain +chainCons lbl (BlockChain blks) + = BlockChain (lbl `consOL` blks) + +chainConcat :: BlockChain -> BlockChain -> BlockChain +chainConcat (BlockChain blks1) (BlockChain blks2) + = BlockChain (blks1 `appOL` blks2) + +chainToBlocks :: BlockChain -> [BlockId] +chainToBlocks (BlockChain blks) = fromOL blks + +-- | Given the Chain A -> B -> C -> D and we break at C +-- we get the two Chains (A -> B, C -> D) as result. +breakChainAt :: BlockId -> BlockChain + -> (BlockChain,BlockChain) +breakChainAt bid (BlockChain blks) + | not (bid == head rblks) + = panic "Block not in chain" + | otherwise + = (BlockChain (toOL lblks), + BlockChain (toOL rblks)) + where + (lblks, rblks) = break (\lbl -> lbl == bid) (fromOL blks) + +takeR :: Int -> BlockChain -> [BlockId] +takeR n (BlockChain blks) = + take n . fromOLReverse $ blks + +takeL :: Int -> BlockChain -> [BlockId] +takeL n (BlockChain blks) = + take n . fromOL $ blks + +-- Note [Combining neighborhood chains] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-- See also Note [Chain based CFG serialization] +-- We have the chains (A-B-C-D) and (E-F) and an Edge C->E. +-- +-- While placing the latter after the former doesn't result in sequential +-- control flow it is still beneficial. As block C and E might end +-- up in the same cache line. +-- +-- So we place these chains next to each other even if we can't fuse them. +-- +-- A -> B -> C -> D +-- v +-- - -> E -> F ... +-- +-- A simple heuristic to chose which chains we want to combine: +-- * Process edges in descending priority. +-- * Check if there is a edge near the end of one chain which goes +-- to a block near the start of another edge. +-- +-- While we could take into account the space between the two blocks which +-- share an edge this blows up compile times quite a bit. It requires +-- us to find all edges between two chains, check the distance for all edges, +-- rank them based on the distance and only then we can select two chains +-- to combine. Which would add a lot of complexity for little gain. +-- +-- So instead we just rank by the strength of the edge and use the first pair we +-- find. + +-- | For a given list of chains and edges try to combine chains with strong +-- edges between them. +combineNeighbourhood :: [CfgEdge] -- ^ Edges to consider + -> [BlockChain] -- ^ Current chains of blocks + -> ([BlockChain], Set.Set (BlockId,BlockId)) + -- ^ Resulting list of block chains, and a set of edges which + -- were used to fuse chains and as such no longer need to be + -- considered. +combineNeighbourhood edges chains + = -- pprTraceIt "Neighbours" $ + -- pprTrace "combineNeighbours" (ppr edges) $ + applyEdges edges endFrontier startFrontier (Set.empty) + where + --Build maps from chain ends to chains + endFrontier, startFrontier :: FrontierMap + endFrontier = + mapFromList $ concatMap (\chain -> + let ends = getEnds chain :: [BlockId] + entry = (ends,chain) + in map (\x -> (x,entry)) ends ) chains + startFrontier = + mapFromList $ concatMap (\chain -> + let front = getFronts chain + entry = (front,chain) + in map (\x -> (x,entry)) front) chains + applyEdges :: [CfgEdge] -> FrontierMap -> FrontierMap -> Set.Set (BlockId, BlockId) + -> ([BlockChain], Set.Set (BlockId,BlockId)) + applyEdges [] chainEnds _chainFronts combined = + (ordNub $ map snd $ mapElems chainEnds, combined) + applyEdges ((CfgEdge from to _w):edges) chainEnds chainFronts combined + | Just (c1_e,c1) <- mapLookup from chainEnds + , Just (c2_f,c2) <- mapLookup to chainFronts + , c1 /= c2 -- Avoid trying to concat a chain with itself. + = let newChain = chainConcat c1 c2 + newChainFrontier = getFronts newChain + newChainEnds = getEnds newChain + newFronts :: FrontierMap + newFronts = + let withoutOld = + foldl' (\m b -> mapDelete b m :: FrontierMap) chainFronts (c2_f ++ getFronts c1) + entry = + (newChainFrontier,newChain) --let bound to ensure sharing + in foldl' (\m x -> mapInsert x entry m) + withoutOld newChainFrontier + + newEnds = + let withoutOld = foldl' (\m b -> mapDelete b m) chainEnds (c1_e ++ getEnds c2) + entry = (newChainEnds,newChain) --let bound to ensure sharing + in foldl' (\m x -> mapInsert x entry m) + withoutOld newChainEnds + in + -- pprTrace "ApplyEdges" + -- (text "before" $$ + -- text "fronts" <+> ppr chainFronts $$ + -- text "ends" <+> ppr chainEnds $$ + + -- text "various" $$ + -- text "newChain" <+> ppr newChain $$ + -- text "newChainFrontier" <+> ppr newChainFrontier $$ + -- text "newChainEnds" <+> ppr newChainEnds $$ + -- text "drop" <+> ppr ((c2_f ++ getFronts c1) ++ (c1_e ++ getEnds c2)) $$ + + -- text "after" $$ + -- text "fronts" <+> ppr newFronts $$ + -- text "ends" <+> ppr newEnds + -- ) + applyEdges edges newEnds newFronts (Set.insert (from,to) combined) + | otherwise + = applyEdges edges chainEnds chainFronts combined + where + + getFronts chain = takeL neighbourOverlapp chain + getEnds chain = takeR neighbourOverlapp chain + +-- In the last stop we combine all chains into a single one. +-- Trying to place chains with strong edges next to each other. +mergeChains :: [CfgEdge] -> [BlockChain] + -> (BlockChain) +mergeChains edges chains + = -- pprTrace "combine" (ppr edges) $ + runST $ do + let addChain m0 chain = do + ref <- newSTRef chain + return $ chainFoldl (\m' b -> mapInsert b ref m') m0 chain + chainMap' <- foldM (\m0 c -> addChain m0 c) mapEmpty chains + merge edges chainMap' + where + -- We keep a map from ALL blocks to their respective chain (sigh) + -- This is required since when looking at an edge we need to find + -- the associated chains quickly. + -- We use a map of STRefs, maintaining a invariant of one STRef per chain. + -- When merging chains we can update the + -- STRef of one chain once (instead of writing to the map for each block). + -- We then overwrite the STRefs for the other chain so there is again only + -- a single STRef for the combined chain. + -- The difference in terms of allocations saved is ~0.2% with -O so actually + -- significant compared to using a regular map. + + merge :: forall s. [CfgEdge] -> LabelMap (STRef s BlockChain) -> ST s BlockChain + merge [] chains = do + chains' <- ordNub <$> (mapM readSTRef $ mapElems chains) :: ST s [BlockChain] + return $ foldl' chainConcat (head chains') (tail chains') + merge ((CfgEdge from to _):edges) chains + -- | pprTrace "merge" (ppr (from,to) <> ppr chains) False + -- = undefined + | cFrom == cTo + = merge edges chains + | otherwise + = do + chains' <- mergeComb cFrom cTo + merge edges chains' + where + mergeComb :: STRef s BlockChain -> STRef s BlockChain -> ST s (LabelMap (STRef s BlockChain)) + mergeComb refFrom refTo = do + cRight <- readSTRef refTo + chain <- pure chainConcat <*> readSTRef refFrom <*> pure cRight + writeSTRef refFrom chain + return $ chainFoldl (\m b -> mapInsert b refFrom m) chains cRight + + cFrom = expectJust "mergeChains:chainMap:from" $ mapLookup from chains + cTo = expectJust "mergeChains:chainMap:to" $ mapLookup to chains + + +-- See Note [Chain based CFG serialization] for the general idea. +-- This creates and fuses chains at the same time for performance reasons. + +-- Try to build chains from a list of edges. +-- Edges must be sorted **descending** by their priority. +-- Returns the constructed chains, along with all edges which +-- are irrelevant past this point, this information doesn't need +-- to be complete - it's only used to speed up the process. +-- An Edge is irrelevant if the ends are part of the same chain. +-- We say these edges are already linked +buildChains :: [CfgEdge] -> [BlockId] + -> ( LabelMap BlockChain -- Resulting chains, indexd by end if chain. + , Set.Set (BlockId, BlockId)) --List of fused edges. +buildChains edges blocks + = runST $ buildNext setEmpty mapEmpty mapEmpty edges Set.empty + where + -- buildNext builds up chains from edges one at a time. + + -- We keep a map from the ends of chains to the chains. + -- This we we can easily check if an block should be appended to an + -- existing chain! + -- We store them using STRefs so we don't have to rebuild the spine of both + -- maps every time we update a chain. + buildNext :: forall s. LabelSet + -> LabelMap (STRef s BlockChain) -- Map from end of chain to chain. + -> LabelMap (STRef s BlockChain) -- Map from start of chain to chain. + -> [CfgEdge] -- Edges to check - ordered by decreasing weight + -> Set.Set (BlockId, BlockId) -- Used edges + -> ST s ( LabelMap BlockChain -- Chains by end + , Set.Set (BlockId, BlockId) --List of fused edges + ) + buildNext placed _chainStarts chainEnds [] linked = do + ends' <- sequence $ mapMap readSTRef chainEnds :: ST s (LabelMap BlockChain) + -- Any remaining blocks have to be made to singleton chains. + -- They might be combined with other chains later on outside this function. + let unplaced = filter (\x -> not (setMember x placed)) blocks + singletons = map (\x -> (x,chainSingleton x)) unplaced :: [(BlockId,BlockChain)] + return (foldl' (\m (k,v) -> mapInsert k v m) ends' singletons , linked) + buildNext placed chainStarts chainEnds (edge:todo) linked + | from == to + -- We skip self edges + = buildNext placed chainStarts chainEnds todo (Set.insert (from,to) linked) + | not (alreadyPlaced from) && + not (alreadyPlaced to) + = do + --pprTraceM "Edge-Chain:" (ppr edge) + chain' <- newSTRef $ chainFromList [from,to] + buildNext + (setInsert to (setInsert from placed)) + (mapInsert from chain' chainStarts) + (mapInsert to chain' chainEnds) + todo + (Set.insert (from,to) linked) + + | (alreadyPlaced from) && + (alreadyPlaced to) + , Just predChain <- mapLookup from chainEnds + , Just succChain <- mapLookup to chainStarts + , predChain /= succChain -- Otherwise we try to create a cycle. + = do + -- pprTraceM "Fusing edge" (ppr edge) + fuseChain predChain succChain + + | (alreadyPlaced from) && + (alreadyPlaced to) + = --pprTraceM "Skipping:" (ppr edge) >> + buildNext placed chainStarts chainEnds todo linked + + | otherwise + = do -- pprTraceM "Finding chain for:" (ppr edge $$ + -- text "placed" <+> ppr placed) + findChain + where + from = edgeFrom edge + to = edgeTo edge + alreadyPlaced blkId = (setMember blkId placed) + + -- Combine two chains into a single one. + fuseChain :: STRef s BlockChain -> STRef s BlockChain + -> ST s ( LabelMap BlockChain -- Chains by end + , Set.Set (BlockId, BlockId) --List of fused edges + ) + fuseChain fromRef toRef = do + fromChain <- readSTRef fromRef + toChain <- readSTRef toRef + let newChain = chainConcat fromChain toChain + ref <- newSTRef newChain + let start = head $ takeL 1 newChain + let end = head $ takeR 1 newChain + -- chains <- sequence $ mapMap readSTRef chainStarts + -- pprTraceM "pre-fuse chains:" $ ppr chains + buildNext + placed + (mapInsert start ref $ mapDelete to $ chainStarts) + (mapInsert end ref $ mapDelete from $ chainEnds) + todo + (Set.insert (from,to) linked) + + + --Add the block to a existing chain or creates a new chain + findChain :: ST s ( LabelMap BlockChain -- Chains by end + , Set.Set (BlockId, BlockId) --List of fused edges + ) + findChain + -- We can attach the block to the end of a chain + | alreadyPlaced from + , Just predChain <- mapLookup from chainEnds + = do + chain <- readSTRef predChain + let newChain = chainSnoc chain to + writeSTRef predChain newChain + let chainEnds' = mapInsert to predChain $ mapDelete from chainEnds + -- chains <- sequence $ mapMap readSTRef chainStarts + -- pprTraceM "from chains:" $ ppr chains + buildNext (setInsert to placed) chainStarts chainEnds' todo (Set.insert (from,to) linked) + -- We can attack it to the front of a chain + | alreadyPlaced to + , Just succChain <- mapLookup to chainStarts + = do + chain <- readSTRef succChain + let newChain = from `chainCons` chain + writeSTRef succChain newChain + let chainStarts' = mapInsert from succChain $ mapDelete to chainStarts + -- chains <- sequence $ mapMap readSTRef chainStarts' + -- pprTraceM "to chains:" $ ppr chains + buildNext (setInsert from placed) chainStarts' chainEnds todo (Set.insert (from,to) linked) + -- The placed end of the edge is part of a chain already and not an end. + | otherwise + = do + let block = if alreadyPlaced to then from else to + --pprTraceM "Singleton" $ ppr block + let newChain = chainSingleton block + ref <- newSTRef newChain + buildNext (setInsert block placed) (mapInsert block ref chainStarts) + (mapInsert block ref chainEnds) todo (linked) + where + alreadyPlaced blkId = (setMember blkId placed) + +-- | Place basic blocks based on the given CFG. +-- See Note [Chain based CFG serialization] +sequenceChain :: forall a i. (Instruction i, Outputable i) + => LabelMap a -- ^ Keys indicate an info table on the block. + -> CFG -- ^ Control flow graph and some meta data. + -> [GenBasicBlock i] -- ^ List of basic blocks to be placed. + -> [GenBasicBlock i] -- ^ Blocks placed in sequence. +sequenceChain _info _weights [] = [] +sequenceChain _info _weights [x] = [x] +sequenceChain info weights' blocks@((BasicBlock entry _):_) = + let weights :: CFG + weights = --pprTrace "cfg'" (pprEdgeWeights cfg') + cfg' + where + (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-} mkGlobalWeights entry weights' + cfg' = {-# SCC rewriteEdges #-} + mapFoldlWithKey + (\cfg from m -> + mapFoldlWithKey + (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to ) + cfg m ) + weights' + globalEdgeWeights + + directEdges :: [CfgEdge] + directEdges = sortBy (flip compare) $ catMaybes . map relevantWeight $ (infoEdgeList weights) + where + relevantWeight :: CfgEdge -> Maybe CfgEdge + relevantWeight edge@(CfgEdge from to edgeInfo) + | (EdgeInfo CmmSource { trans_cmmNode = CmmCall {} } _) <- edgeInfo + -- Ignore edges across calls + = Nothing + | mapMember to info + , w <- edgeWeight edgeInfo + -- The payoff is small if we jump over an info table + = Just (CfgEdge from to edgeInfo { edgeWeight = w/8 }) + | otherwise + = Just edge + + blockMap :: LabelMap (GenBasicBlock i) + blockMap + = foldl' (\m blk@(BasicBlock lbl _ins) -> + mapInsert lbl blk m) + mapEmpty blocks + + (builtChains, builtEdges) + = {-# SCC "buildChains" #-} + --pprTraceIt "generatedChains" $ + --pprTrace "blocks" (ppr (mapKeys blockMap)) $ + buildChains directEdges (mapKeys blockMap) + + rankedEdges :: [CfgEdge] + -- Sort descending by weight, remove fused edges + rankedEdges = + filter (\edge -> not (Set.member (edgeFrom edge,edgeTo edge) builtEdges)) $ + directEdges + + (neighbourChains, combined) + = ASSERT(noDups $ mapElems builtChains) + {-# SCC "groupNeighbourChains" #-} + -- pprTraceIt "NeighbourChains" $ + combineNeighbourhood rankedEdges (mapElems builtChains) + + + allEdges :: [CfgEdge] + allEdges = {-# SCC allEdges #-} + sortOn (relevantWeight) $ filter (not . deadEdge) $ (infoEdgeList weights) + where + deadEdge :: CfgEdge -> Bool + deadEdge (CfgEdge from to _) = let e = (from,to) in Set.member e combined || Set.member e builtEdges + relevantWeight :: CfgEdge -> EdgeWeight + relevantWeight (CfgEdge _ _ edgeInfo) + | EdgeInfo (CmmSource { trans_cmmNode = CmmCall {}}) _ <- edgeInfo + -- Penalize edges across calls + = weight/(64.0) + | otherwise + = weight + where + -- negate to sort descending + weight = negate (edgeWeight edgeInfo) + + masterChain = + {-# SCC "mergeChains" #-} + -- pprTraceIt "MergedChains" $ + mergeChains allEdges neighbourChains + + --Make sure the first block stays first + prepedChains + | inFront entry masterChain + = [masterChain] + | (rest,entry) <- breakChainAt entry masterChain + = [entry,rest] +#if __GLASGOW_HASKELL__ <= 810 + | otherwise = pprPanic "Entry point eliminated" $ + ppr masterChain +#endif + + blockList + = ASSERT(noDups [masterChain]) + (concatMap fromOL $ map chainBlocks prepedChains) + + --chainPlaced = setFromList $ map blockId blockList :: LabelSet + chainPlaced = setFromList $ blockList :: LabelSet + unplaced = + let blocks = mapKeys blockMap + isPlaced b = setMember (b) chainPlaced + in filter (\block -> not (isPlaced block)) blocks + + placedBlocks = + -- We want debug builds to catch this as it's a good indicator for + -- issues with CFG invariants. But we don't want to blow up production + -- builds if something slips through. + ASSERT(null unplaced) + --pprTraceIt "placedBlocks" $ + -- ++ [] is stil kinda expensive + if null unplaced then blockList else blockList ++ unplaced + getBlock bid = expectJust "Block placement" $ mapLookup bid blockMap + in + --Assert we placed all blocks given as input + ASSERT(all (\bid -> mapMember bid blockMap) placedBlocks) + dropJumps info $ map getBlock placedBlocks + +{-# SCC dropJumps #-} +-- | Remove redundant jumps between blocks when we can rely on +-- fall through. +dropJumps :: forall a i. Instruction i => LabelMap a -> [GenBasicBlock i] + -> [GenBasicBlock i] +dropJumps _ [] = [] +dropJumps info ((BasicBlock lbl ins):todo) + | not . null $ ins --This can happen because of shortcutting + , [dest] <- jumpDestsOfInstr (last ins) + , ((BasicBlock nextLbl _) : _) <- todo + , not (mapMember dest info) + , nextLbl == dest + = BasicBlock lbl (init ins) : dropJumps info todo + | otherwise + = BasicBlock lbl ins : dropJumps info todo + + +-- ----------------------------------------------------------------------------- +-- Sequencing the basic blocks + +-- Cmm BasicBlocks are self-contained entities: they always end in a +-- jump, either non-local or to another basic block in the same proc. +-- In this phase, we attempt to place the basic blocks in a sequence +-- such that as many of the local jumps as possible turn into +-- fallthroughs. + +sequenceTop + :: (Instruction instr, Outputable instr) + => DynFlags -- Determine which layout algo to use + -> NcgImpl statics instr jumpDest + -> Maybe CFG -- ^ CFG if we have one. + -> NatCmmDecl statics instr -- ^ Function to serialize + -> NatCmmDecl statics instr + +sequenceTop _ _ _ top@(CmmData _ _) = top +sequenceTop dflags ncgImpl edgeWeights + (CmmProc info lbl live (ListGraph blocks)) + | (gopt Opt_CfgBlocklayout dflags) && backendMaintainsCfg dflags + --Use chain based algorithm + , Just cfg <- edgeWeights + = CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $ + {-# SCC layoutBlocks #-} + sequenceChain info cfg blocks ) + | otherwise + --Use old algorithm + = let cfg = if dontUseCfg then Nothing else edgeWeights + in CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $ + {-# SCC layoutBlocks #-} + sequenceBlocks cfg info blocks) + where + dontUseCfg = gopt Opt_WeightlessBlocklayout dflags || + (not $ backendMaintainsCfg dflags) + +-- The old algorithm: +-- It is very simple (and stupid): We make a graph out of +-- the blocks where there is an edge from one block to another iff the +-- first block ends by jumping to the second. Then we topologically +-- sort this graph. Then traverse the list: for each block, we first +-- output the block, then if it has an out edge, we move the +-- destination of the out edge to the front of the list, and continue. + +-- FYI, the classic layout for basic blocks uses postorder DFS; this +-- algorithm is implemented in Hoopl. + +sequenceBlocks :: Instruction inst => Maybe CFG -> LabelMap a + -> [GenBasicBlock inst] -> [GenBasicBlock inst] +sequenceBlocks _edgeWeight _ [] = [] +sequenceBlocks edgeWeights infos (entry:blocks) = + let entryNode = mkNode edgeWeights entry + bodyNodes = reverse + (flattenSCCs (sccBlocks edgeWeights blocks)) + in dropJumps infos . seqBlocks infos $ ( entryNode : bodyNodes) + -- the first block is the entry point ==> it must remain at the start. + +sccBlocks + :: Instruction instr + => Maybe CFG -> [NatBasicBlock instr] + -> [SCC (Node BlockId (NatBasicBlock instr))] +sccBlocks edgeWeights blocks = + stronglyConnCompFromEdgedVerticesUniqR + (map (mkNode edgeWeights) blocks) + +mkNode :: (Instruction t) + => Maybe CFG -> GenBasicBlock t + -> Node BlockId (GenBasicBlock t) +mkNode edgeWeights block@(BasicBlock id instrs) = + DigraphNode block id outEdges + where + outEdges :: [BlockId] + outEdges + --Select the heaviest successor, ignore weights <= zero + = successor + where + successor + | Just successors <- fmap (`getSuccEdgesSorted` id) + edgeWeights -- :: Maybe [(Label, EdgeInfo)] + = case successors of + [] -> [] + ((target,info):_) + | length successors > 2 || edgeWeight info <= 0 -> [] + | otherwise -> [target] + | otherwise + = case jumpDestsOfInstr (last instrs) of + [one] -> [one] + _many -> [] + + +seqBlocks :: LabelMap i -> [Node BlockId (GenBasicBlock t1)] + -> [GenBasicBlock t1] +seqBlocks infos blocks = placeNext pullable0 todo0 + where + -- pullable: Blocks that are not yet placed + -- todo: Original order of blocks, to be followed if we have no good + -- reason not to; + -- may include blocks that have already been placed, but then + -- these are not in pullable + pullable0 = listToUFM [ (i,(b,n)) | DigraphNode b i n <- blocks ] + todo0 = map node_key blocks + + placeNext _ [] = [] + placeNext pullable (i:rest) + | Just (block, pullable') <- lookupDeleteUFM pullable i + = place pullable' rest block + | otherwise + -- We already placed this block, so ignore + = placeNext pullable rest + + place pullable todo (block,[]) + = block : placeNext pullable todo + place pullable todo (block@(BasicBlock id instrs),[next]) + | mapMember next infos + = block : placeNext pullable todo + | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next + = BasicBlock id instrs : place pullable' todo nextBlock + | otherwise + = block : placeNext pullable todo + place _ _ (_,tooManyNextNodes) + = pprPanic "seqBlocks" (ppr tooManyNextNodes) + + +lookupDeleteUFM :: Uniquable key => UniqFM elt -> key + -> Maybe (elt, UniqFM elt) +lookupDeleteUFM m k = do -- Maybe monad + v <- lookupUFM m k + return (v, delFromUFM m k) |