diff options
author | Andreas Klebinger <klebinger.andreas@gmx.at> | 2020-11-27 21:03:47 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-12-08 22:43:21 -0500 |
commit | 69ae10c39bfed1c4f90f34b42aa0630e0fda2b1b (patch) | |
tree | 96c07cbc7b483ea7a5054b8da9d714155acfb82e /compiler/GHC/Cmm | |
parent | 51e3bb6db85c20cb6b287fa5ec7cfe679a7e5259 (diff) | |
download | haskell-69ae10c39bfed1c4f90f34b42aa0630e0fda2b1b.tar.gz |
CmmSink: Force inlining of foldRegsDefd
Helps avoid allocating the folding function. Improves
perf for T3294 by about 1%.
Diffstat (limited to 'compiler/GHC/Cmm')
-rw-r--r-- | compiler/GHC/Cmm/Sink.hs | 51 |
1 files changed, 45 insertions, 6 deletions
diff --git a/compiler/GHC/Cmm/Sink.hs b/compiler/GHC/Cmm/Sink.hs index 87bc2c33ee..cf20291fae 100644 --- a/compiler/GHC/Cmm/Sink.hs +++ b/compiler/GHC/Cmm/Sink.hs @@ -1,4 +1,5 @@ {-# LANGUAGE GADTs #-} +{-# LANGUAGE ScopedTypeVariables #-} module GHC.Cmm.Sink ( cmmSink ) where @@ -24,6 +25,8 @@ import Data.List (partition) import qualified Data.Set as Set import Data.Maybe +import GHC.Exts (inline) + -- Compact sets for membership tests of local variables. type LRegSet = IntSet.IntSet @@ -403,7 +406,7 @@ dropAssignments platform should_drop state assigs -- inlining opens up opportunities for doing so. tryToInline - :: Platform + :: forall x. Platform -> LocalRegSet -- set of registers live after this -- node. We cannot inline anything -- that is live after the node, unless @@ -437,13 +440,14 @@ tryToInline platform live node assigs = go usages node emptyLRegSet assigs dont_inline = keep node -- don't inline the assignment, keep it inline_and_keep = keep inl_node -- inline the assignment, keep it + keep :: CmmNode O x -> (CmmNode O x, Assignments) keep node' = (final_node, a : rest') where (final_node, rest') = go usages' node' (insertLRegSet l skipped) rest + + -- Avoid discarding of assignments to vars on the rhs. + -- See Note [Keeping assignemnts mentioned in skipped RHSs] usages' = foldLocalRegsUsed platform (\m r -> addToUFM m r 2) usages rhs - -- we must not inline anything that is mentioned in the RHS - -- of a binding that we have already skipped, so we set the - -- usages of the regs on the RHS to 2. cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments] || l `elemLRegSet` skipped @@ -467,6 +471,25 @@ tryToInline platform live node assigs = go usages node emptyLRegSet assigs inl_exp (CmmMachOp op args) = cmmMachOpFold platform op args inl_exp other = other +{- Note [Keeping assignemnts mentioned in skipped RHSs] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + If we have to assignments: [z = y, y = e1] and we skip + z we *must* retain the assignment y = e1. This is because + we might inline "z = y" into another node later on so we + must ensure y is still defined at this point. + + If we dropped the assignment of "y = e1" then we would end up + referencing a variable which hasn't been mentioned after + inlining. + + We use a hack to do this, which is setting all regs used on the + RHS to two uses. Since we only discard assignments to variables + which are used once or never this prevents discarding of the + assignment. It still allows inlining should e1 be a trivial rhs + however. + +-} {- Note [improveConditional] @@ -610,18 +633,34 @@ conflicts platform (r, rhs, addr) node -- (7) otherwise, no conflict | otherwise = False +{- Note [Inlining foldRegsDefd] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + foldRegsDefd is, after optimization, *not* a small function so + it's only marked INLINEABLE, but not INLINE. + + However in some specific cases we call it *very* often making it + important to avoid the overhead of allocating the folding function. + + So we simply force inlining via the magic inline function. + For T3294 this improves allocation with -O by ~1%. + +-} + -- Returns True if node defines any global registers that are used in the -- Cmm expression globalRegistersConflict :: Platform -> CmmExpr -> CmmNode e x -> Bool globalRegistersConflict platform expr node = - foldRegsDefd platform (\b r -> b || regUsedIn platform (CmmGlobal r) expr) + -- See Note [Inlining foldRegsDefd] + inline foldRegsDefd platform (\b r -> b || regUsedIn platform (CmmGlobal r) expr) False node -- Returns True if node defines any local registers that are used in the -- Cmm expression localRegistersConflict :: Platform -> CmmExpr -> CmmNode e x -> Bool localRegistersConflict platform expr node = - foldRegsDefd platform (\b r -> b || regUsedIn platform (CmmLocal r) expr) + -- See Note [Inlining foldRegsDefd] + inline foldRegsDefd platform (\b r -> b || regUsedIn platform (CmmLocal r) expr) False node -- Note [Sinking and calls] |