14 files changed, 162 insertions, 102 deletions
diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs
index fce8f7dae8..c91d553c47 100644
--- a/compiler/cmm/CmmCommonBlockElim.hs
+++ b/compiler/cmm/CmmCommonBlockElim.hs
@@ -64,7 +64,9 @@ elimCommonBlocks :: CmmGraph -> CmmGraph
 elimCommonBlocks g = replaceLabels env $ copyTicks env g
   where
      env = iterate mapEmpty blocks_with_key
-     groups = groupByInt hash_block (postorderDfs g)
+     -- The order of blocks doesn't matter here, but revPostorder also drops any
+     -- unreachable blocks, which is useful.
+     groups = groupByInt hash_block (revPostorder g)
      blocks_with_key = [ [ (successors b, [b]) | b <- bs] | bs <- groups]
 
 -- Invariant: The blocks in the list are pairwise distinct
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs
index da365cfe7f..9f091da8c2 100644
--- a/compiler/cmm/CmmContFlowOpt.hs
+++ b/compiler/cmm/CmmContFlowOpt.hs
@@ -174,10 +174,9 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
        | otherwise
        = (entry_id, shortcut_map)
 
-     -- blocks is a list of blocks in DFS postorder, while blockmap is
-     -- a map of blocks. We process each element from blocks and update
-     -- blockmap accordingly
-     blocks = postorderDfs g
+     -- blocks are sorted in reverse postorder, but we want to go from the exit
+     -- towards beginning, so we use foldr below.
+     blocks = revPostorder g
      blockmap = foldl' (flip addBlock) emptyBody blocks
 
      -- Accumulator contains three components:
@@ -435,7 +434,7 @@ removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
                   | otherwise               = env
 
      used_blocks :: [CmmBlock]
-     used_blocks = postorderDfs g
+     used_blocks = revPostorder g
 
      used_lbls :: LabelSet
      used_lbls = setFromList $ map entryLabel used_blocks
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
index 3f1633404c..d2525d1ffd 100644
--- a/compiler/cmm/CmmLayoutStack.hs
+++ b/compiler/cmm/CmmLayoutStack.hs
@@ -244,7 +244,7 @@ cmmLayoutStack dflags procpoints entry_args
     -- We need liveness info. Dead assignments are removed later
     -- by the sinking pass.
     let liveness = cmmLocalLiveness dflags graph
-        blocks = postorderDfs graph
+        blocks = revPostorder graph
 
     (final_stackmaps, _final_high_sp, new_blocks) <-
           mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs
index eeae96083a..e3eb1dc45d 100644
--- a/compiler/cmm/CmmProcPoint.hs
+++ b/compiler/cmm/CmmProcPoint.hs
@@ -190,7 +190,7 @@ minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph
 -- Given the set of successors of calls (which must be proc-points)
 -- figure out the minimal set of necessary proc-points
 minimalProcPointSet platform callProcPoints g
-  = extendPPSet platform g (postorderDfs g) callProcPoints
+  = extendPPSet platform g (revPostorder g) callProcPoints
 
 extendPPSet
     :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> UniqSM ProcPointSet
@@ -374,8 +374,8 @@ splitAtProcPoints dflags entry_label callPPs procPoints procMap
      -- reversed later.
      let (_, block_order) =
              foldl' add_block_num (0::Int, mapEmpty :: LabelMap Int)
-                    (postorderDfs g)
-         add_block_num (!i, !map) block =
+                   (revPostorder g)
+         add_block_num (i, map) block =
            (i + 1, mapInsert (entryLabel block) i map)
          sort_fn (bid, _) (bid', _) =
            compare (expectJust "block_order" $ mapLookup bid  block_order)
diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs
index c939736939..43444639e1 100644
--- a/compiler/cmm/CmmSink.hs
+++ b/compiler/cmm/CmmSink.hs
@@ -173,7 +173,7 @@ cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks
   liveness = cmmLocalLiveness dflags graph
   getLive l = mapFindWithDefault Set.empty l liveness
 
-  blocks = postorderDfs graph
+  blocks = revPostorder graph
 
   join_pts = findJoinPoints blocks
 
diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs
index fcd0ec5d3f..aff16b3a19 100644
--- a/compiler/cmm/CmmUtils.hs
+++ b/compiler/cmm/CmmUtils.hs
@@ -59,7 +59,7 @@ module CmmUtils(
         ofBlockMap, toBlockMap, insertBlock,
         ofBlockList, toBlockList, bodyToBlockList,
         toBlockListEntryFirst, toBlockListEntryFirstFalseFallthrough,
-        foldlGraphBlocks, mapGraphNodes, postorderDfs, mapGraphNodes1,
+        foldlGraphBlocks, mapGraphNodes, revPostorder, mapGraphNodes1,
 
         -- * Ticks
         blockTicks
@@ -566,8 +566,9 @@ mapGraphNodes1 f = modifyGraph (mapGraph f)
 foldlGraphBlocks :: (a -> CmmBlock -> a) -> a -> CmmGraph -> a
 foldlGraphBlocks k z g = mapFoldl k z $ toBlockMap g
 
-postorderDfs :: CmmGraph -> [CmmBlock]
-postorderDfs g = {-# SCC "postorderDfs" #-} postorder_dfs_from (toBlockMap g) (g_entry g)
+revPostorder :: CmmGraph -> [CmmBlock]
+revPostorder g = {-# SCC "revPostorder" #-}
+    revPostorderFrom (toBlockMap g) (g_entry g)
 
 -------------------------------------------------
 -- Tick utilities
diff --git a/compiler/cmm/Hoopl/Dataflow.hs b/compiler/cmm/Hoopl/Dataflow.hs
index 0b0434bb36..2538b70ee3 100644
--- a/compiler/cmm/Hoopl/Dataflow.hs
+++ b/compiler/cmm/Hoopl/Dataflow.hs
@@ -111,8 +111,7 @@ analyzeCmm dir lattice transfer cmmGraph initFact =
         blockMap =
             case hooplGraph of
                 GMany NothingO bm NothingO -> bm
-        entries = if mapNull initFact then [entry] else mapKeys initFact
-    in fixpointAnalysis dir lattice transfer entries blockMap initFact
+    in fixpointAnalysis dir lattice transfer entry blockMap initFact
 
 -- Fixpoint algorithm.
 fixpointAnalysis
@@ -120,16 +119,16 @@ fixpointAnalysis
        Direction
     -> DataflowLattice f
     -> TransferFun f
-    -> [Label]
+    -> Label
     -> LabelMap CmmBlock
     -> FactBase f
     -> FactBase f
-fixpointAnalysis direction lattice do_block entries blockmap = loop start
+fixpointAnalysis direction lattice do_block entry blockmap = loop start
   where
     -- Sorting the blocks helps to minimize the number of times we need to
     -- process blocks. For instance, for forward analysis we want to look at
     -- blocks in reverse postorder. Also, see comments for sortBlocks.
-    blocks     = sortBlocks direction entries blockmap
+    blocks     = sortBlocks direction entry blockmap
     num_blocks = length blocks
     block_arr  = {-# SCC "block_arr" #-} listArray (0, num_blocks - 1) blocks
     start      = {-# SCC "start" #-} IntSet.fromDistinctAscList
@@ -174,9 +173,8 @@ rewriteCmm dir lattice rwFun cmmGraph initFact = do
         blockMap1 =
             case hooplGraph of
                 GMany NothingO bm NothingO -> bm
-        entries = if mapNull initFact then [entry] else mapKeys initFact
     (blockMap2, facts) <-
-        fixpointRewrite dir lattice rwFun entries blockMap1 initFact
+        fixpointRewrite dir lattice rwFun entry blockMap1 initFact
     return (cmmGraph {g_graph = GMany NothingO blockMap2 NothingO}, facts)
 
 fixpointRewrite
@@ -184,16 +182,16 @@ fixpointRewrite
        Direction
     -> DataflowLattice f
     -> RewriteFun f
-    -> [Label]
+    -> Label
     -> LabelMap CmmBlock
     -> FactBase f
     -> UniqSM (LabelMap CmmBlock, FactBase f)
-fixpointRewrite dir lattice do_block entries blockmap = loop start blockmap
+fixpointRewrite dir lattice do_block entry blockmap = loop start blockmap
   where
     -- Sorting the blocks helps to minimize the number of times we need to
     -- process blocks. For instance, for forward analysis we want to look at
     -- blocks in reverse postorder. Also, see comments for sortBlocks.
-    blocks     = sortBlocks dir entries blockmap
+    blocks     = sortBlocks dir entry blockmap
     num_blocks = length blocks
     block_arr  = {-# SCC "block_arr_rewrite" #-}
                  listArray (0, num_blocks - 1) blocks
@@ -268,20 +266,15 @@ we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4.
 -- | Sort the blocks into the right order for analysis. This means reverse
 -- postorder for a forward analysis. For the backward one, we simply reverse
 -- that (see Note [Backward vs forward analysis]).
---
--- Note: We're using Hoopl's confusingly named `postorder_dfs_from` but AFAICS
--- it returns the *reverse* postorder of the blocks (it visits blocks in the
--- postorder and uses (:) to collect them, which gives the reverse of the
--- visitation order).
 sortBlocks
     :: NonLocal n
-    => Direction -> [Label] -> LabelMap (Block n C C) -> [Block n C C]
-sortBlocks direction entries blockmap =
+    => Direction -> Label -> LabelMap (Block n C C) -> [Block n C C]
+sortBlocks direction entry blockmap =
     case direction of
         Fwd -> fwd
         Bwd -> reverse fwd
   where
-    fwd = postorder_dfs_from blockmap entries
+    fwd = revPostorderFrom blockmap entry
 
 -- Note [Backward vs forward analysis]
 --
diff --git a/compiler/cmm/Hoopl/Graph.hs b/compiler/cmm/Hoopl/Graph.hs
index ca482ab4a8..df1ebe3ec1 100644
--- a/compiler/cmm/Hoopl/Graph.hs
+++ b/compiler/cmm/Hoopl/Graph.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE GADTs #-}
 {-# LANGUAGE RankNTypes #-}
@@ -14,7 +15,7 @@ module Hoopl.Graph
     , labelsDefined
     , mapGraph
     , mapGraphBlocks
-    , postorder_dfs_from
+    , revPostorderFrom
     ) where
 
 
@@ -119,22 +120,10 @@ labelsDefined (GMany _ body x) = mapFoldlWithKey addEntry (exitLabel x) body
 
 ----------------------------------------------------------------
 
-class LabelsPtr l where
-  targetLabels :: l -> [Label]
-
-instance NonLocal n => LabelsPtr (n e C) where
-  targetLabels n = successors n
-
-instance LabelsPtr Label where
-  targetLabels l = [l]
-
-instance LabelsPtr LabelSet where
-  targetLabels = setElems
-
-instance LabelsPtr l => LabelsPtr [l] where
-  targetLabels = concatMap targetLabels
-
--- | This is the most important traversal over this data structure.  It drops
+-- | Returns a list of blocks reachable from the provided Labels in the reverse
+-- postorder.
+--
+-- This is the most important traversal over this data structure.  It drops
 -- unreachable code and puts blocks in an order that is good for solving forward
 -- dataflow problems quickly.  The reverse order is good for solving backward
 -- dataflow problems quickly.  The forward order is also reasonably good for
@@ -143,59 +132,52 @@ instance LabelsPtr l => LabelsPtr [l] where
 -- that you would need a more serious analysis, probably based on dominators, to
 -- identify loop headers.
 --
--- The ubiquity of 'postorder_dfs' is one reason for the ubiquity of the 'LGraph'
--- representation, when for most purposes the plain 'Graph' representation is
--- more mathematically elegant (but results in more complicated code).
---
--- Here's an easy way to go wrong!  Consider
+-- For forward analyses we want reverse postorder visitation, consider:
 -- @
 --      A -> [B,C]
 --      B -> D
 --      C -> D
 -- @
--- Then ordinary dfs would give [A,B,D,C] which has a back ref from C to D.
--- Better to get [A,B,C,D]
-
-
--- | Traversal: 'postorder_dfs' returns a list of blocks reachable
--- from the entry of enterable graph. The entry and exit are *not* included.
--- The list has the following property:
---
---      Say a "back reference" exists if one of a block's
---      control-flow successors precedes it in the output list
---
---      Then there are as few back references as possible
---
--- The output is suitable for use in
--- a forward dataflow problem.  For a backward problem, simply reverse
--- the list.  ('postorder_dfs' is sufficiently tricky to implement that
--- one doesn't want to try and maintain both forward and backward
--- versions.)
-
-postorder_dfs_from_except :: forall block e . (NonLocal block, LabelsPtr e)
-                          => LabelMap (block C C) -> e -> LabelSet -> [block C C]
-postorder_dfs_from_except blocks b visited =
- vchildren (get_children b) (\acc _visited -> acc) [] visited
- where
-   vnode :: block C C -> ([block C C] -> LabelSet -> a) -> [block C C] -> LabelSet -> a
-   vnode block cont acc visited =
-        if setMember id visited then
-            cont acc visited
-        else
-            let cont' acc visited = cont (block:acc) visited in
-            vchildren (get_children block) cont' acc (setInsert id visited)
-      where id = entryLabel block
-   vchildren :: forall a. [block C C] -> ([block C C] -> LabelSet -> a) -> [block C C] -> LabelSet -> a
-   vchildren bs cont acc visited = next bs acc visited
-      where next children acc visited =
-                case children of []     -> cont acc visited
-                                 (b:bs) -> vnode b (next bs) acc visited
-   get_children :: forall l. LabelsPtr l => l -> [block C C]
-   get_children block = foldr add_id [] $ targetLabels block
-   add_id id rst = case lookupFact id blocks of
-                      Just b -> b : rst
-                      Nothing -> rst
-
-postorder_dfs_from
-    :: (NonLocal block, LabelsPtr b) => LabelMap (block C C) -> b -> [block C C]
-postorder_dfs_from blocks b = postorder_dfs_from_except blocks b setEmpty
+-- Postorder: [D, C, B, A] (or [D, B, C, A])
+-- Reverse postorder: [A, B, C, D] (or [A, C, B, D])
+-- This matters for, e.g., forward analysis, because we want to analyze *both*
+-- B and C before we analyze D.
+revPostorderFrom
+  :: forall block.  (NonLocal block)
+  => LabelMap (block C C) -> Label -> [block C C]
+revPostorderFrom graph start = go start_worklist setEmpty []
+  where
+    start_worklist = lookup_for_descend start Nil
+
+    -- To compute the postorder we need to "visit" a block (mark as done)
+    -- *after* visiting all its successors. So we need to know whether we
+    -- already processed all successors of each block (and @NonLocal@ allows
+    -- arbitrary many successors). So we use an explicit stack with an extra bit
+    -- of information:
+    -- * @ConsTodo@ means to explore the block if it wasn't visited before
+    -- * @ConsMark@ means that all successors were already done and we can add
+    --   the block to the result.
+    --
+    -- NOTE: We add blocks to the result list in postorder, but we *prepend*
+    -- them (i.e., we use @(:)@), which means that the final list is in reverse
+    -- postorder.
+    go :: DfsStack (block C C) -> LabelSet -> [block C C] -> [block C C]
+    go Nil                      !_           !result = result
+    go (ConsMark block rest)    !wip_or_done !result =
+        go rest wip_or_done (block : result)
+    go (ConsTodo block rest)    !wip_or_done !result
+        | entryLabel block `setMember` wip_or_done = go rest wip_or_done result
+        | otherwise =
+            let new_worklist =
+                    foldr lookup_for_descend
+                          (ConsMark block rest)
+                          (successors block)
+            in go new_worklist (setInsert (entryLabel block) wip_or_done) result
+
+    lookup_for_descend :: Label -> DfsStack (block C C) -> DfsStack (block C C)
+    lookup_for_descend label wl
+      | Just b <- mapLookup label graph = ConsTodo b wl
+      | otherwise =
+           error $ "Label that doesn't have a block?! " ++ show label
+
+data DfsStack a = ConsTodo a (DfsStack a) | ConsMark a (DfsStack a) | Nil
diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs
index 6a93ea818e..c9a6003aaf 100644
--- a/compiler/cmm/PprCmm.hs
+++ b/compiler/cmm/PprCmm.hs
@@ -141,8 +141,8 @@ pprCmmGraph g
    = text "{" <> text "offset"
   $$ nest 2 (vcat $ map ppr blocks)
   $$ text "}"
-  where blocks = postorderDfs g
-    -- postorderDfs has the side-effect of discarding unreachable code,
+  where blocks = revPostorder g
+    -- revPostorder has the side-effect of discarding unreachable code,
     -- so pretty-printed Cmm will omit any unreachable blocks.  This can
     -- sometimes be confusing.
 
diff --git a/testsuite/tests/cmm/Makefile b/testsuite/tests/cmm/Makefile
new file mode 100644
index 0000000000..9a36a1c5fe
--- /dev/null
+++ b/testsuite/tests/cmm/Makefile
@@ -0,0 +1,3 @@
+TOP=../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
diff --git a/testsuite/tests/cmm/should_run/HooplPostorder.hs b/testsuite/tests/cmm/should_run/HooplPostorder.hs
new file mode 100644
index 0000000000..d7a8bbaef1
--- /dev/null
+++ b/testsuite/tests/cmm/should_run/HooplPostorder.hs
@@ -0,0 +1,69 @@
+module Main where
+
+import Hoopl.Block
+import Hoopl.Collections
+import Hoopl.Graph
+import Hoopl.Label
+
+import Data.Maybe
+
+data TestBlock e x = TB { label_ :: Label, successors_ :: [Label] }
+    deriving (Eq, Show)
+
+instance NonLocal TestBlock where
+  entryLabel = label_
+  successors = successors_
+
+-- Test the classical diamond shape graph.
+test_diamond :: LabelMap (TestBlock C C)
+test_diamond = mapFromList $ map (\b -> (label_ b, b)) blocks
+  where
+    blocks =
+        [ TB (mkHooplLabel 1) [mkHooplLabel 2, mkHooplLabel 3]
+        , TB (mkHooplLabel 2) [mkHooplLabel 4]
+        , TB (mkHooplLabel 3) [mkHooplLabel 4]
+        , TB (mkHooplLabel 4) []
+        ]
+
+-- Test that the backedge doesn't change anything.
+test_diamond_backedge :: LabelMap (TestBlock C C)
+test_diamond_backedge = mapFromList $ map (\b -> (label_ b, b)) blocks
+  where
+    blocks =
+        [ TB (mkHooplLabel 1) [mkHooplLabel 2, mkHooplLabel 3]
+        , TB (mkHooplLabel 2) [mkHooplLabel 4]
+        , TB (mkHooplLabel 3) [mkHooplLabel 4]
+        , TB (mkHooplLabel 4) [mkHooplLabel 1]
+        ]
+
+-- Test that the "bypass" edge from 1 to 4 doesn't change anything.
+test_3 :: LabelMap (TestBlock C C)
+test_3 = mapFromList $ map (\b -> (label_ b, b)) blocks
+  where
+    blocks =
+        [ TB (mkHooplLabel 1) [mkHooplLabel 2, mkHooplLabel 4]
+        , TB (mkHooplLabel 2) [mkHooplLabel 4]
+        , TB (mkHooplLabel 4) []
+        ]
+
+-- Like test_3 but with different order of successors for the entry point.
+test_4 :: LabelMap (TestBlock C C)
+test_4 = mapFromList $ map (\b -> (label_ b, b)) blocks
+  where
+    blocks =
+        [ TB (mkHooplLabel 1) [mkHooplLabel 4, mkHooplLabel 2]
+        , TB (mkHooplLabel 2) [mkHooplLabel 4]
+        , TB (mkHooplLabel 4) []
+        ]
+
+
+main :: IO ()
+main = do
+    let result = revPostorderFrom test_diamond (mkHooplLabel 1)
+    putStrLn (show $ map label_ result)
+    let result = revPostorderFrom test_diamond_backedge (mkHooplLabel 1)
+    putStrLn (show $ map label_ result)
+    let result = revPostorderFrom test_3 (mkHooplLabel 1)
+    putStrLn (show $ map label_ result)
+    let result = revPostorderFrom test_4 (mkHooplLabel 1)
+    putStrLn (show $ map label_ result)
diff --git a/testsuite/tests/cmm/should_run/HooplPostorder.stdout b/testsuite/tests/cmm/should_run/HooplPostorder.stdout
new file mode 100644
index 0000000000..7e704c224b
--- /dev/null
+++ b/testsuite/tests/cmm/should_run/HooplPostorder.stdout
@@ -0,0 +1,4 @@
+[L1,L3,L2,L4]
+[L1,L3,L2,L4]
+[L1,L2,L4]
+[L1,L2,L4]
diff --git a/testsuite/tests/cmm/should_run/Makefile b/testsuite/tests/cmm/should_run/Makefile
new file mode 100644
index 0000000000..9101fbd40a
--- /dev/null
+++ b/testsuite/tests/cmm/should_run/Makefile
@@ -0,0 +1,3 @@
+TOP=../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
diff --git a/testsuite/tests/cmm/should_run/all.T b/testsuite/tests/cmm/should_run/all.T
new file mode 100644
index 0000000000..00838075cf
--- /dev/null
+++ b/testsuite/tests/cmm/should_run/all.T
@@ -0,0 +1,4 @@
+test('HooplPostorder',
+     extra_run_opts('"' + config.libdir + '"'),
+     compile_and_run,
+     ['-package ghc'])