summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Terepeta <michal.terepeta@gmail.com>2018-03-26 15:18:44 -0400
committerBen Gamari <ben@smart-cactus.org>2018-03-27 09:53:10 -0400
commitd5c4d46a62ce6a0cfa6440344f707136eff18119 (patch)
tree539f6eaf8fa90d791ab8315b6f40c8687f56e92b
parent8cfd2e4e5832bd188816e64daaa95c9003437723 (diff)
downloadhaskell-d5c4d46a62ce6a0cfa6440344f707136eff18119.tar.gz
CmmPipeline: add a second pass of CmmCommonBlockElim
The sinking pass often gets rid of unnecessary registers registers/assignements exposing more opportunities for CBE, so this commit adds a second round of CBE after the sinking pass and should fix #12915 (and some examples in #14226). Nofib results: * Binary size: 0.9% reduction on average * Compile allocations: 0.7% increase on average * Runtime: noisy, two separate runs of nofib showed a tiny reduction on average, (~0.2-0.3%), but I think this is mostly noise * Compile time: very noisy, but generally within +/- 0.5% (one run faster, one slower) One interesting part of this change is that running CBE invalidates results of proc-point analysis. But instead of re-doing the whole analysis, we can use the map that CBE creates for replacing/comparing block labels (maps a redundant label to a useful one) to update the results of proc-point analysis. This lowers the overhead compared to the previous experiment in #12915. Signed-off-by: Michal Terepeta <michal.terepeta@gmail.com> Test Plan: ./validate Reviewers: bgamari, simonmar Reviewed By: simonmar Subscribers: rwbarton, thomie, carter GHC Trac Issues: #12915, #14226 Differential Revision: https://phabricator.haskell.org/D4417
-rw-r--r--compiler/cmm/CmmCommonBlockElim.hs8
-rw-r--r--compiler/cmm/CmmPipeline.hs42
2 files changed, 43 insertions, 7 deletions
diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs
index c91d553c47..ad10511c1d 100644
--- a/compiler/cmm/CmmCommonBlockElim.hs
+++ b/compiler/cmm/CmmCommonBlockElim.hs
@@ -59,9 +59,10 @@ import Data.List (foldl')
-- hashes, and at most once otherwise. Previously, we were slower, and people
-- rightfully complained: #10397
--- TODO: Use optimization fuel
-elimCommonBlocks :: CmmGraph -> CmmGraph
-elimCommonBlocks g = replaceLabels env $ copyTicks env g
+type Subst = LabelMap BlockId
+
+elimCommonBlocks :: CmmGraph -> (CmmGraph, Subst)
+elimCommonBlocks g = (replaceLabels env $ copyTicks env g, env)
where
env = iterate mapEmpty blocks_with_key
-- The order of blocks doesn't matter here, but revPostorder also drops any
@@ -73,7 +74,6 @@ elimCommonBlocks g = replaceLabels env $ copyTicks env g
-- (so avoid comparing them again)
type DistinctBlocks = [CmmBlock]
type Key = [Label]
-type Subst = LabelMap BlockId
-- The outer list groups by hash. We retain this grouping throughout.
iterate :: Subst -> [[(Key, DistinctBlocks)]] -> Subst
diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs
index 4d109a4086..babdb0b300 100644
--- a/compiler/cmm/CmmPipeline.hs
+++ b/compiler/cmm/CmmPipeline.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE TypeFamilies #-}
module CmmPipeline (
-- | Converts C-- with an implicit stack and native C-- calls into
@@ -28,6 +29,8 @@ import Control.Monad
import Outputable
import Platform
+import Data.Maybe
+
-----------------------------------------------------------------------------
-- | Top level driver for C-- pipeline
-----------------------------------------------------------------------------
@@ -67,9 +70,9 @@ cpsTop hsc_env proc =
, do_layout = do_layout }} = h
----------- Eliminate common blocks -------------------------------------
- g <- {-# SCC "elimCommonBlocks" #-}
- condPass Opt_CmmElimCommonBlocks elimCommonBlocks g
- Opt_D_dump_cmm_cbe "Post common block elimination"
+ (g, _) <- {-# SCC "elimCommonBlocks" #-}
+ condPass2 Opt_CmmElimCommonBlocks elimCommonBlocks g mapEmpty
+ Opt_D_dump_cmm_cbe "Post common block elimination"
-- Any work storing block Labels must be performed _after_
-- elimCommonBlocks
@@ -104,6 +107,32 @@ cpsTop hsc_env proc =
condPass Opt_CmmSink (cmmSink dflags) g
Opt_D_dump_cmm_sink "Sink assignments"
+ (g, call_pps, proc_points) <- do
+ -- Only do the second CBE if we did the sinking pass. Otherwise,
+ -- it's unlikely we'll have any new opportunities to find redundant
+ -- blocks.
+ if not (gopt Opt_CmmSink dflags)
+ then pure (g, call_pps, proc_points)
+ else do
+ (g, cbe_subst) <- {-# SCC "elimCommonBlocks2" #-}
+ condPass2
+ Opt_CmmElimCommonBlocks elimCommonBlocks g mapEmpty
+ Opt_D_dump_cmm_cbe "Post common block elimination 2"
+
+ -- CBE might invalidate the results of proc-point analysis (by
+ -- removing labels). So we need to fix it. Instead of re-doing
+ -- the whole analysis, we use the final substitution env from
+ -- CBE to update existing results.
+ let cbe_fix set bid =
+ setInsert (fromMaybe bid (mapLookup bid cbe_subst)) set
+ let !new_call_pps = setFoldl cbe_fix setEmpty call_pps
+ let !new_proc_points
+ | splitting_proc_points =
+ setFoldl cbe_fix setEmpty proc_points
+ | otherwise = new_call_pps
+
+ return (g, new_call_pps, new_proc_points)
+
------------- CAF analysis ----------------------------------------------
let cafEnv = {-# SCC "cafAnal" #-} cafAnal g
dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" (ppr cafEnv)
@@ -155,6 +184,13 @@ cpsTop hsc_env proc =
return g
else return g
+ condPass2 flag pass g a dumpflag dumpname =
+ if gopt flag dflags
+ then do
+ (g, a) <- return $ pass g
+ dump dumpflag dumpname g
+ return (g, a)
+ else return (g, a)
-- we don't need to split proc points for the NCG, unless
-- tablesNextToCode is off. The latter is because we have no