diff options
author | Michal Terepeta <michal.terepeta@gmail.com> | 2018-03-19 11:58:54 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2018-03-19 12:05:11 -0400 |
commit | bbcea13af845d41a9d51a932476eb841ba182ea5 (patch) | |
tree | d846f7d73f60e2bb5865a4bc258b986e63c8f3b7 | |
parent | 0db0e46c40a3a2af71f23033aa09a142d43b8538 (diff) | |
download | haskell-bbcea13af845d41a9d51a932476eb841ba182ea5.tar.gz |
Hoopl: improve postorder calculation
- Fix the naming and comments to indicate that we are calculating
*reverse* postorder (and not the standard postorder).
- Rewrite the calculation to avoid CPS code. I found it fairly
difficult to understand and the new one seems faster (according to
nofib, decreases compiler allocations by 0.2%)
- Remove `LabelsPtr`, which seems unnecessary and could be *really*
confusing. For instance, previously:
`postorder_dfs_from <block with label X>`
and
`postorder_dfs_from <label X>`
would actually mean quite different things (and give different
results).
- Change the `Dataflow` module to always use entry of the graph for
reverse postorder calculation. This should be the only change in
behavior of this commit.
Previously, if the caller provided initial facts for some of the
labels, we would use those labels for our postorder calculation.
However, I don't think that's correct in general - if the initial
facts did not contain the entry of the graph, we would never analyze
the blocks reachable from the entry but unreachable from the labels
provided with the initial facts. It seems that the only analysis that
used this was proc-point analysis, which I think would always include
the entry block (so I don't think there's any bug due to this).
Signed-off-by: Michal Terepeta <michal.terepeta@gmail.com>
Test Plan: ./validate
Reviewers: bgamari, simonmar
Reviewed By: simonmar
Subscribers: rwbarton, thomie, carter
Differential Revision: https://phabricator.haskell.org/D4464
-rw-r--r-- | compiler/cmm/CmmCommonBlockElim.hs | 4 | ||||
-rw-r--r-- | compiler/cmm/CmmContFlowOpt.hs | 9 | ||||
-rw-r--r-- | compiler/cmm/CmmLayoutStack.hs | 2 | ||||
-rw-r--r-- | compiler/cmm/CmmProcPoint.hs | 6 | ||||
-rw-r--r-- | compiler/cmm/CmmSink.hs | 2 | ||||
-rw-r--r-- | compiler/cmm/CmmUtils.hs | 7 | ||||
-rw-r--r-- | compiler/cmm/Hoopl/Dataflow.hs | 29 | ||||
-rw-r--r-- | compiler/cmm/Hoopl/Graph.hs | 118 | ||||
-rw-r--r-- | compiler/cmm/PprCmm.hs | 4 | ||||
-rw-r--r-- | testsuite/tests/cmm/Makefile | 3 | ||||
-rw-r--r-- | testsuite/tests/cmm/should_run/HooplPostorder.hs | 69 | ||||
-rw-r--r-- | testsuite/tests/cmm/should_run/HooplPostorder.stdout | 4 | ||||
-rw-r--r-- | testsuite/tests/cmm/should_run/Makefile | 3 | ||||
-rw-r--r-- | testsuite/tests/cmm/should_run/all.T | 4 |
14 files changed, 162 insertions, 102 deletions
diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs index fce8f7dae8..c91d553c47 100644 --- a/compiler/cmm/CmmCommonBlockElim.hs +++ b/compiler/cmm/CmmCommonBlockElim.hs @@ -64,7 +64,9 @@ elimCommonBlocks :: CmmGraph -> CmmGraph elimCommonBlocks g = replaceLabels env $ copyTicks env g where env = iterate mapEmpty blocks_with_key - groups = groupByInt hash_block (postorderDfs g) + -- The order of blocks doesn't matter here, but revPostorder also drops any + -- unreachable blocks, which is useful. + groups = groupByInt hash_block (revPostorder g) blocks_with_key = [ [ (successors b, [b]) | b <- bs] | bs <- groups] -- Invariant: The blocks in the list are pairwise distinct diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index da365cfe7f..9f091da8c2 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -174,10 +174,9 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } | otherwise = (entry_id, shortcut_map) - -- blocks is a list of blocks in DFS postorder, while blockmap is - -- a map of blocks. We process each element from blocks and update - -- blockmap accordingly - blocks = postorderDfs g + -- blocks are sorted in reverse postorder, but we want to go from the exit + -- towards beginning, so we use foldr below. + blocks = revPostorder g blockmap = foldl' (flip addBlock) emptyBody blocks -- Accumulator contains three components: @@ -435,7 +434,7 @@ removeUnreachableBlocksProc proc@(CmmProc info lbl live g) | otherwise = env used_blocks :: [CmmBlock] - used_blocks = postorderDfs g + used_blocks = revPostorder g used_lbls :: LabelSet used_lbls = setFromList $ map entryLabel used_blocks diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs index 3f1633404c..d2525d1ffd 100644 --- a/compiler/cmm/CmmLayoutStack.hs +++ b/compiler/cmm/CmmLayoutStack.hs @@ -244,7 +244,7 @@ cmmLayoutStack dflags procpoints entry_args -- We need liveness info. Dead assignments are removed later -- by the sinking pass. let liveness = cmmLocalLiveness dflags graph - blocks = postorderDfs graph + blocks = revPostorder graph (final_stackmaps, _final_high_sp, new_blocks) <- mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) -> diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs index eeae96083a..e3eb1dc45d 100644 --- a/compiler/cmm/CmmProcPoint.hs +++ b/compiler/cmm/CmmProcPoint.hs @@ -190,7 +190,7 @@ minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph -- Given the set of successors of calls (which must be proc-points) -- figure out the minimal set of necessary proc-points minimalProcPointSet platform callProcPoints g - = extendPPSet platform g (postorderDfs g) callProcPoints + = extendPPSet platform g (revPostorder g) callProcPoints extendPPSet :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> UniqSM ProcPointSet @@ -374,8 +374,8 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap -- reversed later. let (_, block_order) = foldl' add_block_num (0::Int, mapEmpty :: LabelMap Int) - (postorderDfs g) - add_block_num (!i, !map) block = + (revPostorder g) + add_block_num (i, map) block = (i + 1, mapInsert (entryLabel block) i map) sort_fn (bid, _) (bid', _) = compare (expectJust "block_order" $ mapLookup bid block_order) diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs index c939736939..43444639e1 100644 --- a/compiler/cmm/CmmSink.hs +++ b/compiler/cmm/CmmSink.hs @@ -173,7 +173,7 @@ cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks liveness = cmmLocalLiveness dflags graph getLive l = mapFindWithDefault Set.empty l liveness - blocks = postorderDfs graph + blocks = revPostorder graph join_pts = findJoinPoints blocks diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index fcd0ec5d3f..aff16b3a19 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -59,7 +59,7 @@ module CmmUtils( ofBlockMap, toBlockMap, insertBlock, ofBlockList, toBlockList, bodyToBlockList, toBlockListEntryFirst, toBlockListEntryFirstFalseFallthrough, - foldlGraphBlocks, mapGraphNodes, postorderDfs, mapGraphNodes1, + foldlGraphBlocks, mapGraphNodes, revPostorder, mapGraphNodes1, -- * Ticks blockTicks @@ -566,8 +566,9 @@ mapGraphNodes1 f = modifyGraph (mapGraph f) foldlGraphBlocks :: (a -> CmmBlock -> a) -> a -> CmmGraph -> a foldlGraphBlocks k z g = mapFoldl k z $ toBlockMap g -postorderDfs :: CmmGraph -> [CmmBlock] -postorderDfs g = {-# SCC "postorderDfs" #-} postorder_dfs_from (toBlockMap g) (g_entry g) +revPostorder :: CmmGraph -> [CmmBlock] +revPostorder g = {-# SCC "revPostorder" #-} + revPostorderFrom (toBlockMap g) (g_entry g) ------------------------------------------------- -- Tick utilities diff --git a/compiler/cmm/Hoopl/Dataflow.hs b/compiler/cmm/Hoopl/Dataflow.hs index 0b0434bb36..2538b70ee3 100644 --- a/compiler/cmm/Hoopl/Dataflow.hs +++ b/compiler/cmm/Hoopl/Dataflow.hs @@ -111,8 +111,7 @@ analyzeCmm dir lattice transfer cmmGraph initFact = blockMap = case hooplGraph of GMany NothingO bm NothingO -> bm - entries = if mapNull initFact then [entry] else mapKeys initFact - in fixpointAnalysis dir lattice transfer entries blockMap initFact + in fixpointAnalysis dir lattice transfer entry blockMap initFact -- Fixpoint algorithm. fixpointAnalysis @@ -120,16 +119,16 @@ fixpointAnalysis Direction -> DataflowLattice f -> TransferFun f - -> [Label] + -> Label -> LabelMap CmmBlock -> FactBase f -> FactBase f -fixpointAnalysis direction lattice do_block entries blockmap = loop start +fixpointAnalysis direction lattice do_block entry blockmap = loop start where -- Sorting the blocks helps to minimize the number of times we need to -- process blocks. For instance, for forward analysis we want to look at -- blocks in reverse postorder. Also, see comments for sortBlocks. - blocks = sortBlocks direction entries blockmap + blocks = sortBlocks direction entry blockmap num_blocks = length blocks block_arr = {-# SCC "block_arr" #-} listArray (0, num_blocks - 1) blocks start = {-# SCC "start" #-} IntSet.fromDistinctAscList @@ -174,9 +173,8 @@ rewriteCmm dir lattice rwFun cmmGraph initFact = do blockMap1 = case hooplGraph of GMany NothingO bm NothingO -> bm - entries = if mapNull initFact then [entry] else mapKeys initFact (blockMap2, facts) <- - fixpointRewrite dir lattice rwFun entries blockMap1 initFact + fixpointRewrite dir lattice rwFun entry blockMap1 initFact return (cmmGraph {g_graph = GMany NothingO blockMap2 NothingO}, facts) fixpointRewrite @@ -184,16 +182,16 @@ fixpointRewrite Direction -> DataflowLattice f -> RewriteFun f - -> [Label] + -> Label -> LabelMap CmmBlock -> FactBase f -> UniqSM (LabelMap CmmBlock, FactBase f) -fixpointRewrite dir lattice do_block entries blockmap = loop start blockmap +fixpointRewrite dir lattice do_block entry blockmap = loop start blockmap where -- Sorting the blocks helps to minimize the number of times we need to -- process blocks. For instance, for forward analysis we want to look at -- blocks in reverse postorder. Also, see comments for sortBlocks. - blocks = sortBlocks dir entries blockmap + blocks = sortBlocks dir entry blockmap num_blocks = length blocks block_arr = {-# SCC "block_arr_rewrite" #-} listArray (0, num_blocks - 1) blocks @@ -268,20 +266,15 @@ we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4. -- | Sort the blocks into the right order for analysis. This means reverse -- postorder for a forward analysis. For the backward one, we simply reverse -- that (see Note [Backward vs forward analysis]). --- --- Note: We're using Hoopl's confusingly named `postorder_dfs_from` but AFAICS --- it returns the *reverse* postorder of the blocks (it visits blocks in the --- postorder and uses (:) to collect them, which gives the reverse of the --- visitation order). sortBlocks :: NonLocal n - => Direction -> [Label] -> LabelMap (Block n C C) -> [Block n C C] -sortBlocks direction entries blockmap = + => Direction -> Label -> LabelMap (Block n C C) -> [Block n C C] +sortBlocks direction entry blockmap = case direction of Fwd -> fwd Bwd -> reverse fwd where - fwd = postorder_dfs_from blockmap entries + fwd = revPostorderFrom blockmap entry -- Note [Backward vs forward analysis] -- diff --git a/compiler/cmm/Hoopl/Graph.hs b/compiler/cmm/Hoopl/Graph.hs index ca482ab4a8..df1ebe3ec1 100644 --- a/compiler/cmm/Hoopl/Graph.hs +++ b/compiler/cmm/Hoopl/Graph.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE RankNTypes #-} @@ -14,7 +15,7 @@ module Hoopl.Graph , labelsDefined , mapGraph , mapGraphBlocks - , postorder_dfs_from + , revPostorderFrom ) where @@ -119,22 +120,10 @@ labelsDefined (GMany _ body x) = mapFoldlWithKey addEntry (exitLabel x) body ---------------------------------------------------------------- -class LabelsPtr l where - targetLabels :: l -> [Label] - -instance NonLocal n => LabelsPtr (n e C) where - targetLabels n = successors n - -instance LabelsPtr Label where - targetLabels l = [l] - -instance LabelsPtr LabelSet where - targetLabels = setElems - -instance LabelsPtr l => LabelsPtr [l] where - targetLabels = concatMap targetLabels - --- | This is the most important traversal over this data structure. It drops +-- | Returns a list of blocks reachable from the provided Labels in the reverse +-- postorder. +-- +-- This is the most important traversal over this data structure. It drops -- unreachable code and puts blocks in an order that is good for solving forward -- dataflow problems quickly. The reverse order is good for solving backward -- dataflow problems quickly. The forward order is also reasonably good for @@ -143,59 +132,52 @@ instance LabelsPtr l => LabelsPtr [l] where -- that you would need a more serious analysis, probably based on dominators, to -- identify loop headers. -- --- The ubiquity of 'postorder_dfs' is one reason for the ubiquity of the 'LGraph' --- representation, when for most purposes the plain 'Graph' representation is --- more mathematically elegant (but results in more complicated code). --- --- Here's an easy way to go wrong! Consider +-- For forward analyses we want reverse postorder visitation, consider: -- @ -- A -> [B,C] -- B -> D -- C -> D -- @ --- Then ordinary dfs would give [A,B,D,C] which has a back ref from C to D. --- Better to get [A,B,C,D] - - --- | Traversal: 'postorder_dfs' returns a list of blocks reachable --- from the entry of enterable graph. The entry and exit are *not* included. --- The list has the following property: --- --- Say a "back reference" exists if one of a block's --- control-flow successors precedes it in the output list --- --- Then there are as few back references as possible --- --- The output is suitable for use in --- a forward dataflow problem. For a backward problem, simply reverse --- the list. ('postorder_dfs' is sufficiently tricky to implement that --- one doesn't want to try and maintain both forward and backward --- versions.) - -postorder_dfs_from_except :: forall block e . (NonLocal block, LabelsPtr e) - => LabelMap (block C C) -> e -> LabelSet -> [block C C] -postorder_dfs_from_except blocks b visited = - vchildren (get_children b) (\acc _visited -> acc) [] visited - where - vnode :: block C C -> ([block C C] -> LabelSet -> a) -> [block C C] -> LabelSet -> a - vnode block cont acc visited = - if setMember id visited then - cont acc visited - else - let cont' acc visited = cont (block:acc) visited in - vchildren (get_children block) cont' acc (setInsert id visited) - where id = entryLabel block - vchildren :: forall a. [block C C] -> ([block C C] -> LabelSet -> a) -> [block C C] -> LabelSet -> a - vchildren bs cont acc visited = next bs acc visited - where next children acc visited = - case children of [] -> cont acc visited - (b:bs) -> vnode b (next bs) acc visited - get_children :: forall l. LabelsPtr l => l -> [block C C] - get_children block = foldr add_id [] $ targetLabels block - add_id id rst = case lookupFact id blocks of - Just b -> b : rst - Nothing -> rst - -postorder_dfs_from - :: (NonLocal block, LabelsPtr b) => LabelMap (block C C) -> b -> [block C C] -postorder_dfs_from blocks b = postorder_dfs_from_except blocks b setEmpty +-- Postorder: [D, C, B, A] (or [D, B, C, A]) +-- Reverse postorder: [A, B, C, D] (or [A, C, B, D]) +-- This matters for, e.g., forward analysis, because we want to analyze *both* +-- B and C before we analyze D. +revPostorderFrom + :: forall block. (NonLocal block) + => LabelMap (block C C) -> Label -> [block C C] +revPostorderFrom graph start = go start_worklist setEmpty [] + where + start_worklist = lookup_for_descend start Nil + + -- To compute the postorder we need to "visit" a block (mark as done) + -- *after* visiting all its successors. So we need to know whether we + -- already processed all successors of each block (and @NonLocal@ allows + -- arbitrary many successors). So we use an explicit stack with an extra bit + -- of information: + -- * @ConsTodo@ means to explore the block if it wasn't visited before + -- * @ConsMark@ means that all successors were already done and we can add + -- the block to the result. + -- + -- NOTE: We add blocks to the result list in postorder, but we *prepend* + -- them (i.e., we use @(:)@), which means that the final list is in reverse + -- postorder. + go :: DfsStack (block C C) -> LabelSet -> [block C C] -> [block C C] + go Nil !_ !result = result + go (ConsMark block rest) !wip_or_done !result = + go rest wip_or_done (block : result) + go (ConsTodo block rest) !wip_or_done !result + | entryLabel block `setMember` wip_or_done = go rest wip_or_done result + | otherwise = + let new_worklist = + foldr lookup_for_descend + (ConsMark block rest) + (successors block) + in go new_worklist (setInsert (entryLabel block) wip_or_done) result + + lookup_for_descend :: Label -> DfsStack (block C C) -> DfsStack (block C C) + lookup_for_descend label wl + | Just b <- mapLookup label graph = ConsTodo b wl + | otherwise = + error $ "Label that doesn't have a block?! " ++ show label + +data DfsStack a = ConsTodo a (DfsStack a) | ConsMark a (DfsStack a) | Nil diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs index 6a93ea818e..c9a6003aaf 100644 --- a/compiler/cmm/PprCmm.hs +++ b/compiler/cmm/PprCmm.hs @@ -141,8 +141,8 @@ pprCmmGraph g = text "{" <> text "offset" $$ nest 2 (vcat $ map ppr blocks) $$ text "}" - where blocks = postorderDfs g - -- postorderDfs has the side-effect of discarding unreachable code, + where blocks = revPostorder g + -- revPostorder has the side-effect of discarding unreachable code, -- so pretty-printed Cmm will omit any unreachable blocks. This can -- sometimes be confusing. diff --git a/testsuite/tests/cmm/Makefile b/testsuite/tests/cmm/Makefile new file mode 100644 index 0000000000..9a36a1c5fe --- /dev/null +++ b/testsuite/tests/cmm/Makefile @@ -0,0 +1,3 @@ +TOP=../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk diff --git a/testsuite/tests/cmm/should_run/HooplPostorder.hs b/testsuite/tests/cmm/should_run/HooplPostorder.hs new file mode 100644 index 0000000000..d7a8bbaef1 --- /dev/null +++ b/testsuite/tests/cmm/should_run/HooplPostorder.hs @@ -0,0 +1,69 @@ +module Main where + +import Hoopl.Block +import Hoopl.Collections +import Hoopl.Graph +import Hoopl.Label + +import Data.Maybe + +data TestBlock e x = TB { label_ :: Label, successors_ :: [Label] } + deriving (Eq, Show) + +instance NonLocal TestBlock where + entryLabel = label_ + successors = successors_ + +-- Test the classical diamond shape graph. +test_diamond :: LabelMap (TestBlock C C) +test_diamond = mapFromList $ map (\b -> (label_ b, b)) blocks + where + blocks = + [ TB (mkHooplLabel 1) [mkHooplLabel 2, mkHooplLabel 3] + , TB (mkHooplLabel 2) [mkHooplLabel 4] + , TB (mkHooplLabel 3) [mkHooplLabel 4] + , TB (mkHooplLabel 4) [] + ] + +-- Test that the backedge doesn't change anything. +test_diamond_backedge :: LabelMap (TestBlock C C) +test_diamond_backedge = mapFromList $ map (\b -> (label_ b, b)) blocks + where + blocks = + [ TB (mkHooplLabel 1) [mkHooplLabel 2, mkHooplLabel 3] + , TB (mkHooplLabel 2) [mkHooplLabel 4] + , TB (mkHooplLabel 3) [mkHooplLabel 4] + , TB (mkHooplLabel 4) [mkHooplLabel 1] + ] + +-- Test that the "bypass" edge from 1 to 4 doesn't change anything. +test_3 :: LabelMap (TestBlock C C) +test_3 = mapFromList $ map (\b -> (label_ b, b)) blocks + where + blocks = + [ TB (mkHooplLabel 1) [mkHooplLabel 2, mkHooplLabel 4] + , TB (mkHooplLabel 2) [mkHooplLabel 4] + , TB (mkHooplLabel 4) [] + ] + +-- Like test_3 but with different order of successors for the entry point. +test_4 :: LabelMap (TestBlock C C) +test_4 = mapFromList $ map (\b -> (label_ b, b)) blocks + where + blocks = + [ TB (mkHooplLabel 1) [mkHooplLabel 4, mkHooplLabel 2] + , TB (mkHooplLabel 2) [mkHooplLabel 4] + , TB (mkHooplLabel 4) [] + ] + + +main :: IO () +main = do + let result = revPostorderFrom test_diamond (mkHooplLabel 1) + putStrLn (show $ map label_ result) + let result = revPostorderFrom test_diamond_backedge (mkHooplLabel 1) + putStrLn (show $ map label_ result) + let result = revPostorderFrom test_3 (mkHooplLabel 1) + putStrLn (show $ map label_ result) + let result = revPostorderFrom test_4 (mkHooplLabel 1) + putStrLn (show $ map label_ result) diff --git a/testsuite/tests/cmm/should_run/HooplPostorder.stdout b/testsuite/tests/cmm/should_run/HooplPostorder.stdout new file mode 100644 index 0000000000..7e704c224b --- /dev/null +++ b/testsuite/tests/cmm/should_run/HooplPostorder.stdout @@ -0,0 +1,4 @@ +[L1,L3,L2,L4] +[L1,L3,L2,L4] +[L1,L2,L4] +[L1,L2,L4] diff --git a/testsuite/tests/cmm/should_run/Makefile b/testsuite/tests/cmm/should_run/Makefile new file mode 100644 index 0000000000..9101fbd40a --- /dev/null +++ b/testsuite/tests/cmm/should_run/Makefile @@ -0,0 +1,3 @@ +TOP=../../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk diff --git a/testsuite/tests/cmm/should_run/all.T b/testsuite/tests/cmm/should_run/all.T new file mode 100644 index 0000000000..00838075cf --- /dev/null +++ b/testsuite/tests/cmm/should_run/all.T @@ -0,0 +1,4 @@ +test('HooplPostorder', + extra_run_opts('"' + config.libdir + '"'), + compile_and_run, + ['-package ghc']) |