CmmPipeline: add a second pass of CmmCommonBlockElim

The sinking pass often gets rid of unnecessary registers registers/assignements exposing more opportunities for CBE, so this commit adds a second round of CBE after the sinking pass and should fix #12915 (and some examples in #14226). Nofib results: * Binary size: 0.9% reduction on average * Compile allocations: 0.7% increase on average * Runtime: noisy, two separate runs of nofib showed a tiny reduction on average, (~0.2-0.3%), but I think this is mostly noise * Compile time: very noisy, but generally within +/- 0.5% (one run faster, one slower) One interesting part of this change is that running CBE invalidates results of proc-point analysis. But instead of re-doing the whole analysis, we can use the map that CBE creates for replacing/comparing block labels (maps a redundant label to a useful one) to update the results of proc-point analysis. This lowers the overhead compared to the previous experiment in #12915. Signed-off-by: Michal Terepeta <michal.terepeta@gmail.com> Test Plan: ./validate Reviewers: bgamari, simonmar Reviewed By: simonmar Subscribers: rwbarton, thomie, carter GHC Trac Issues: #12915, #14226 Differential Revision: https://phabricator.haskell.org/D4417
author: Michal Terepeta <michal.terepeta@gmail.com> 2018-03-26 15:18:44 -0400
committer: Ben Gamari <ben@smart-cactus.org> 2018-03-27 09:53:10 -0400
commit: d5c4d46a62ce6a0cfa6440344f707136eff18119 (patch)
tree: 539f6eaf8fa90d791ab8315b6f40c8687f56e92b
parent: 8cfd2e4e5832bd188816e64daaa95c9003437723 (diff)
download: haskell-d5c4d46a62ce6a0cfa6440344f707136eff18119.tar.gz
2 files changed, 43 insertions, 7 deletions
diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs
index c91d553c47..ad10511c1d 100644
--- a/compiler/cmm/CmmCommonBlockElim.hs
+++ b/compiler/cmm/CmmCommonBlockElim.hs
@@ -59,9 +59,10 @@ import Data.List (foldl')
 -- hashes, and at most once otherwise. Previously, we were slower, and people
 -- rightfully complained: #10397
 
--- TODO: Use optimization fuel
-elimCommonBlocks :: CmmGraph -> CmmGraph
-elimCommonBlocks g = replaceLabels env $ copyTicks env g
+type Subst = LabelMap BlockId
+
+elimCommonBlocks :: CmmGraph -> (CmmGraph, Subst)
+elimCommonBlocks g = (replaceLabels env $ copyTicks env g, env)
   where
      env = iterate mapEmpty blocks_with_key
      -- The order of blocks doesn't matter here, but revPostorder also drops any
@@ -73,7 +74,6 @@ elimCommonBlocks g = replaceLabels env $ copyTicks env g
 -- (so avoid comparing them again)
 type DistinctBlocks = [CmmBlock]
 type Key = [Label]
-type Subst = LabelMap BlockId
 
 -- The outer list groups by hash. We retain this grouping throughout.
 iterate :: Subst -> [[(Key, DistinctBlocks)]] -> Subst
diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs
index 4d109a4086..babdb0b300 100644
--- a/compiler/cmm/CmmPipeline.hs
+++ b/compiler/cmm/CmmPipeline.hs
@@ -1,4 +1,5 @@
 {-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE TypeFamilies #-}
 
 module CmmPipeline (
   -- | Converts C-- with an implicit stack and native C-- calls into
@@ -28,6 +29,8 @@ import Control.Monad
 import Outputable
 import Platform
 
+import Data.Maybe
+
 -----------------------------------------------------------------------------
 -- | Top level driver for C-- pipeline
 -----------------------------------------------------------------------------
@@ -67,9 +70,9 @@ cpsTop hsc_env proc =
                                           , do_layout = do_layout }} = h
 
        ----------- Eliminate common blocks -------------------------------------
-       g <- {-# SCC "elimCommonBlocks" #-}
-            condPass Opt_CmmElimCommonBlocks elimCommonBlocks g
-                          Opt_D_dump_cmm_cbe "Post common block elimination"
+       (g, _) <- {-# SCC "elimCommonBlocks" #-}
+                 condPass2 Opt_CmmElimCommonBlocks elimCommonBlocks g mapEmpty
+                           Opt_D_dump_cmm_cbe "Post common block elimination"
 
        -- Any work storing block Labels must be performed _after_
        -- elimCommonBlocks
@@ -104,6 +107,32 @@ cpsTop hsc_env proc =
             condPass Opt_CmmSink (cmmSink dflags) g
                      Opt_D_dump_cmm_sink "Sink assignments"
 
+       (g, call_pps, proc_points) <- do
+         -- Only do the second CBE if we did the sinking pass. Otherwise,
+         -- it's unlikely we'll have any new opportunities to find redundant
+         -- blocks.
+         if not (gopt Opt_CmmSink dflags)
+           then pure (g, call_pps, proc_points)
+           else do
+             (g, cbe_subst) <- {-# SCC "elimCommonBlocks2" #-}
+               condPass2
+                 Opt_CmmElimCommonBlocks elimCommonBlocks g mapEmpty
+                 Opt_D_dump_cmm_cbe "Post common block elimination 2"
+
+             -- CBE might invalidate the results of proc-point analysis (by
+             -- removing labels). So we need to fix it. Instead of re-doing
+             -- the whole analysis, we use the final substitution env from
+             -- CBE to update existing results.
+             let cbe_fix set bid =
+                   setInsert (fromMaybe bid (mapLookup bid cbe_subst)) set
+             let !new_call_pps = setFoldl cbe_fix setEmpty call_pps
+             let !new_proc_points
+                   | splitting_proc_points =
+                       setFoldl cbe_fix setEmpty proc_points
+                   | otherwise = new_call_pps
+
+             return (g, new_call_pps, new_proc_points)
+
        ------------- CAF analysis ----------------------------------------------
        let cafEnv = {-# SCC "cafAnal" #-} cafAnal g
        dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" (ppr cafEnv)
@@ -155,6 +184,13 @@ cpsTop hsc_env proc =
                     return g
                else return g
 
+        condPass2 flag pass g a dumpflag dumpname =
+            if gopt flag dflags
+               then do
+                    (g, a) <- return $ pass g
+                    dump dumpflag dumpname g
+                    return (g, a)
+               else return (g, a)
 
         -- we don't need to split proc points for the NCG, unless
         -- tablesNextToCode is off.  The latter is because we have no
author	Michal Terepeta <michal.terepeta@gmail.com>	2018-03-26 15:18:44 -0400
committer	Ben Gamari <ben@smart-cactus.org>	2018-03-27 09:53:10 -0400
commit	d5c4d46a62ce6a0cfa6440344f707136eff18119 (patch)
tree	539f6eaf8fa90d791ab8315b6f40c8687f56e92b
parent	8cfd2e4e5832bd188816e64daaa95c9003437723 (diff)
download	haskell-d5c4d46a62ce6a0cfa6440344f707136eff18119.tar.gz