Speed up elimCommonBlocks by grouping blocks also by outgoing labels

This is an attempt to improve the situation described in #10397, where the linear scan of possible candidates for commoning up is far too expensive. There is (ever) more room for improvement, but this is a start. Differential Revision: https://phabricator.haskell.org/D892 (cherry picked from commit c256357242ee2dd282fd0516260edccbb7617244)
author: Joachim Breitner <mail@joachim-breitner.de> 2015-05-16 01:22:06 +0200
committer: Austin Seipp <austin@well-typed.com> 2015-05-18 05:58:34 -0500
commit: bac8717c68ef4907908f80b23dc9dd9e88dfa987 (patch)
tree: 7cb273910480cadd9664b1a7c0f18b2499a4fd36
parent: 70925f0a317a9db31d228ae51e0d0acd0cb66542 (diff)
download: haskell-bac8717c68ef4907908f80b23dc9dd9e88dfa987.tar.gz
1 files changed, 112 insertions, 31 deletions
diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs
index 95910d16d5..83b2841b28 100644
--- a/compiler/cmm/CmmCommonBlockElim.hs
+++ b/compiler/cmm/CmmCommonBlockElim.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE GADTs #-}
+{-# LANGUAGE GADTs, BangPatterns #-}
 module CmmCommonBlockElim
   ( elimCommonBlocks
   )
@@ -19,9 +19,8 @@ import Data.Word
 import qualified Data.Map as M
 import Outputable
 import UniqFM
-
-my_trace :: String -> SDoc -> a -> a
-my_trace = if False then pprTrace else \_ _ a -> a
+import Unique
+import Control.Arrow (first, second)
 
 -- -----------------------------------------------------------------------------
 -- Eliminate common blocks
@@ -37,40 +36,72 @@ my_trace = if False then pprTrace else \_ _ a -> a
 -- is made redundant by the old block.
 -- Otherwise, it is added to the useful blocks.
 
+-- To avoid comparing every block with every other block repeatedly, we group
+-- them by
+--   * a hash of the block, ignoring labels (explained below)
+--   * the list of outgoing labels
+-- The hash is invariant under relabeling, so we only ever compare within
+-- the same group of blocks.
+--
+-- The list of outgoing labels is updated as we merge blocks, and only blocks
+-- that had different labels before are compared.
+--
+-- All in all, two blocks should never be compared if they have different
+-- hashes, and at most once otherwise. Previously, we were slower, and people
+-- rightfully complained: #10397
+
 -- TODO: Use optimization fuel
 elimCommonBlocks :: CmmGraph -> CmmGraph
 elimCommonBlocks g = replaceLabels env $ copyTicks env g
   where
-     env = iterate hashed_blocks mapEmpty
-     hashed_blocks = map (\b -> (hash_block b, b)) $ postorderDfs g
-
--- Iterate over the blocks until convergence
-iterate :: [(HashCode,CmmBlock)] -> BlockEnv BlockId -> BlockEnv BlockId
-iterate blocks subst =
-  case foldl common_block (False, emptyUFM, subst) blocks of
-    (changed,  _, subst)
-       | changed   -> iterate blocks subst
-       | otherwise -> subst
+     env = iterate mapEmpty blocks_with_key
+     groups = groupBy hash_block (postorderDfs g)
+     blocks_with_key = [ [ (successors b, [b]) | b <- bs] | bs <- groups]
+
+-- Invariant: The blocks in the list are pairwise distinct
+-- (so avoid comparing them again)
+type DistinctBlocks = [CmmBlock]
+type Key = [Label]
+type Subst = BlockEnv BlockId
+
+-- The outer list groups by hash. We retain this grouping throughout.
+iterate :: Subst -> [[(Key, DistinctBlocks)]] -> Subst
+iterate subst blocks
+    | mapNull new_substs = subst
+    | otherwise = iterate subst' updated_blocks
+  where
+    grouped_blocks :: [[(Key, [DistinctBlocks])]]
+    grouped_blocks = map groupByLabel blocks
 
-type State  = (ChangeFlag, UniqFM [CmmBlock], BlockEnv BlockId)
+    merged_blocks :: [[(Key, DistinctBlocks)]]
+    (new_substs, merged_blocks) = List.mapAccumL (List.mapAccumL go) mapEmpty grouped_blocks
+      where
+        go !new_subst1 (k,dbs) = (new_subst1 `mapUnion` new_subst2, (k,db))
+          where
+            (new_subst2, db) = mergeBlockList subst dbs
 
-type ChangeFlag = Bool
-type HashCode = Int
+    subst' = subst `mapUnion` new_substs
+    updated_blocks = map (map (first (map (lookupBid subst')))) merged_blocks
 
--- Try to find a block that is equal (or ``common'') to b.
-common_block :: State -> (HashCode, CmmBlock) -> State
-common_block (old_change, bmap, subst) (hash, b) =
-  case lookupUFM bmap hash of
-    Just bs -> case (List.find (eqBlockBodyWith (eqBid subst) b) bs,
-                     mapLookup bid subst) of
-                 (Just b', Nothing)                         -> addSubst b'
-                 (Just b', Just b'') | entryLabel b' /= b'' -> addSubst b'
-                                     | otherwise -> (old_change, bmap, subst)
-                 _ -> (old_change, addToUFM bmap hash (b : bs), subst)
-    Nothing -> (old_change, addToUFM bmap hash [b], subst)
-  where bid = entryLabel b
-        addSubst b' = my_trace "found new common block" (ppr bid <> char '=' <> ppr (entryLabel b')) $
-                      (True, bmap, mapInsert bid (entryLabel b') subst)
+mergeBlocks :: Subst -> DistinctBlocks -> DistinctBlocks -> (Subst, DistinctBlocks)
+mergeBlocks subst existing new = go new
+  where
+    go [] = (mapEmpty, existing)
+    go (b:bs) = case List.find (eqBlockBodyWith (eqBid subst) b) existing of
+        -- This block is a duplicate. Drop it, and add it to the substitution
+        Just b' -> first (mapInsert (entryLabel b) (entryLabel b')) $ go bs
+        -- This block is not a duplicate, keep it.
+        Nothing -> second (b:) $ go bs
+
+mergeBlockList :: Subst -> [DistinctBlocks] -> (Subst, DistinctBlocks)
+mergeBlockList _ [] = pprPanic "mergeBlockList" empty
+mergeBlockList subst (b:bs) = go mapEmpty b bs
+  where
+    go !new_subst1 b [] = (new_subst1, b)
+    go !new_subst1 b1 (b2:bs) = go new_subst b bs
+      where
+        (new_subst2, b) =  mergeBlocks subst b1 b2
+        new_subst = new_subst1 `mapUnion` new_subst2
 
 
 -- -----------------------------------------------------------------------------
@@ -82,6 +113,9 @@ common_block (old_change, bmap, subst) (hash, b) =
 -- To speed up comparisons, we hash each basic block modulo labels.
 -- The hashing is a bit arbitrary (the numbers are completely arbitrary),
 -- but it should be fast and good enough.
+
+type HashCode = Int
+
 hash_block :: CmmBlock -> HashCode
 hash_block block =
   fromIntegral (foldBlockNodesB3 (hash_fst, hash_mid, hash_lst) block (0 :: Word32) .&. (0x7fffffff :: Word32))
@@ -237,3 +271,50 @@ copyTicks env g
               (CmmEntry lbl scp1, code) = blockSplitHead to
           in CmmEntry lbl (combineTickScopes scp0 scp1) `blockJoinHead`
              foldr blockCons code (map CmmTick ticks)
+
+-- Group by [Label]
+groupByLabel :: [(Key, a)] -> [(Key, [a])]
+groupByLabel = go emptyILM
+  where
+    go !m [] = elemsILM m
+    go !m ((k,v) : entries) = go (alterILM adjust m k') entries
+      where k' = map getUnique k
+            adjust Nothing       = Just (k,[v])
+            adjust (Just (_,vs)) = Just (k,v:vs)
+
+groupBy :: (a -> Int) -> [a] -> [[a]]
+groupBy f xs = eltsUFM $ List.foldl' go emptyUFM xs
+  where go m x = alterUFM (Just . maybe [x] (x:)) m (f x)
+
+-- Efficient lookup into [([Unique], a)]
+data IntListMap a = ILM (Maybe a) (UniqFM (IntListMap a))
+
+emptyILM :: IntListMap a
+emptyILM = ILM Nothing emptyUFM
+
+unitILM :: [Unique] -> a -> IntListMap a
+unitILM [] a     = ILM (Just a) emptyUFM
+unitILM (l:ls) a = ILM Nothing  (unitUFM l (unitILM ls a))
+
+
+alterILM :: (Maybe a -> Maybe a) -> IntListMap a -> [Unique] ->  IntListMap a
+alterILM f (ILM ma m)  []    = ILM (f ma) m
+alterILM f (ILM ma m) (l:ls) = ILM ma (alterUFM go m l)
+  where go Nothing    = fmap (unitILM ls) (f Nothing)
+        go (Just ilm) = Just $ alterILM f ilm ls
+
+{- currently unused
+addToILM :: IntListMap a -> [Unique] -> a -> IntListMap a
+addToILM (ILM _ m)  []     a = ILM (Just a) m
+addToILM (ILM ma m) (l:ls) a = ILM ma $ alterUFM go m l
+  where go Nothing    = Just $ unitILM ls a
+        go (Just ilm) = Just $ addToILM ilm ls a
+
+lookupILM :: IntListMap a -> [Unique] -> Maybe a
+lookupILM (ILM ma _) [] = ma
+lookupILM (ILM _ m) (l:ls) = lookupUFM m l >>= (\m -> lookupILM m ls)
+-}
+
+elemsILM :: IntListMap a -> [a]
+elemsILM (ILM ma m) = maybe id (:) ma $ concatMap elemsILM $ eltsUFM m
+
author	Joachim Breitner <mail@joachim-breitner.de>	2015-05-16 01:22:06 +0200
committer	Austin Seipp <austin@well-typed.com>	2015-05-18 05:58:34 -0500
commit	bac8717c68ef4907908f80b23dc9dd9e88dfa987 (patch)
tree	7cb273910480cadd9664b1a7c0f18b2499a4fd36
parent	70925f0a317a9db31d228ae51e0d0acd0cb66542 (diff)
download	haskell-bac8717c68ef4907908f80b23dc9dd9e88dfa987.tar.gz