summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Klebinger <klebinger.andreas@gmx.at>2020-11-27 21:03:47 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-12-08 22:43:21 -0500
commit69ae10c39bfed1c4f90f34b42aa0630e0fda2b1b (patch)
tree96c07cbc7b483ea7a5054b8da9d714155acfb82e
parent51e3bb6db85c20cb6b287fa5ec7cfe679a7e5259 (diff)
downloadhaskell-69ae10c39bfed1c4f90f34b42aa0630e0fda2b1b.tar.gz
CmmSink: Force inlining of foldRegsDefd
Helps avoid allocating the folding function. Improves perf for T3294 by about 1%.
-rw-r--r--compiler/GHC/Cmm/Sink.hs51
1 files changed, 45 insertions, 6 deletions
diff --git a/compiler/GHC/Cmm/Sink.hs b/compiler/GHC/Cmm/Sink.hs
index 87bc2c33ee..cf20291fae 100644
--- a/compiler/GHC/Cmm/Sink.hs
+++ b/compiler/GHC/Cmm/Sink.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE GADTs #-}
+{-# LANGUAGE ScopedTypeVariables #-}
module GHC.Cmm.Sink (
cmmSink
) where
@@ -24,6 +25,8 @@ import Data.List (partition)
import qualified Data.Set as Set
import Data.Maybe
+import GHC.Exts (inline)
+
-- Compact sets for membership tests of local variables.
type LRegSet = IntSet.IntSet
@@ -403,7 +406,7 @@ dropAssignments platform should_drop state assigs
-- inlining opens up opportunities for doing so.
tryToInline
- :: Platform
+ :: forall x. Platform
-> LocalRegSet -- set of registers live after this
-- node. We cannot inline anything
-- that is live after the node, unless
@@ -437,13 +440,14 @@ tryToInline platform live node assigs = go usages node emptyLRegSet assigs
dont_inline = keep node -- don't inline the assignment, keep it
inline_and_keep = keep inl_node -- inline the assignment, keep it
+ keep :: CmmNode O x -> (CmmNode O x, Assignments)
keep node' = (final_node, a : rest')
where (final_node, rest') = go usages' node' (insertLRegSet l skipped) rest
+
+ -- Avoid discarding of assignments to vars on the rhs.
+ -- See Note [Keeping assignemnts mentioned in skipped RHSs]
usages' = foldLocalRegsUsed platform (\m r -> addToUFM m r 2)
usages rhs
- -- we must not inline anything that is mentioned in the RHS
- -- of a binding that we have already skipped, so we set the
- -- usages of the regs on the RHS to 2.
cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments]
|| l `elemLRegSet` skipped
@@ -467,6 +471,25 @@ tryToInline platform live node assigs = go usages node emptyLRegSet assigs
inl_exp (CmmMachOp op args) = cmmMachOpFold platform op args
inl_exp other = other
+{- Note [Keeping assignemnts mentioned in skipped RHSs]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ If we have to assignments: [z = y, y = e1] and we skip
+ z we *must* retain the assignment y = e1. This is because
+ we might inline "z = y" into another node later on so we
+ must ensure y is still defined at this point.
+
+ If we dropped the assignment of "y = e1" then we would end up
+ referencing a variable which hasn't been mentioned after
+ inlining.
+
+ We use a hack to do this, which is setting all regs used on the
+ RHS to two uses. Since we only discard assignments to variables
+ which are used once or never this prevents discarding of the
+ assignment. It still allows inlining should e1 be a trivial rhs
+ however.
+
+-}
{- Note [improveConditional]
@@ -610,18 +633,34 @@ conflicts platform (r, rhs, addr) node
-- (7) otherwise, no conflict
| otherwise = False
+{- Note [Inlining foldRegsDefd]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ foldRegsDefd is, after optimization, *not* a small function so
+ it's only marked INLINEABLE, but not INLINE.
+
+ However in some specific cases we call it *very* often making it
+ important to avoid the overhead of allocating the folding function.
+
+ So we simply force inlining via the magic inline function.
+ For T3294 this improves allocation with -O by ~1%.
+
+-}
+
-- Returns True if node defines any global registers that are used in the
-- Cmm expression
globalRegistersConflict :: Platform -> CmmExpr -> CmmNode e x -> Bool
globalRegistersConflict platform expr node =
- foldRegsDefd platform (\b r -> b || regUsedIn platform (CmmGlobal r) expr)
+ -- See Note [Inlining foldRegsDefd]
+ inline foldRegsDefd platform (\b r -> b || regUsedIn platform (CmmGlobal r) expr)
False node
-- Returns True if node defines any local registers that are used in the
-- Cmm expression
localRegistersConflict :: Platform -> CmmExpr -> CmmNode e x -> Bool
localRegistersConflict platform expr node =
- foldRegsDefd platform (\b r -> b || regUsedIn platform (CmmLocal r) expr)
+ -- See Note [Inlining foldRegsDefd]
+ inline foldRegsDefd platform (\b r -> b || regUsedIn platform (CmmLocal r) expr)
False node
-- Note [Sinking and calls]