summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/cmm/CmmLayoutStack.hs99
-rw-r--r--compiler/cmm/CmmPipeline.hs10
-rw-r--r--compiler/cmm/CmmSink.hs255
-rw-r--r--compiler/ghc.cabal.in1
4 files changed, 278 insertions, 87 deletions
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
index 732fb2b849..d45c4d8546 100644
--- a/compiler/cmm/CmmLayoutStack.hs
+++ b/compiler/cmm/CmmLayoutStack.hs
@@ -3,7 +3,7 @@
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
#endif
module CmmLayoutStack (
- cmmLayoutStack, setInfoTableStackMap, cmmSink
+ cmmLayoutStack, setInfoTableStackMap
) where
import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX
@@ -34,7 +34,7 @@ import qualified Data.Set as Set
import Control.Monad.Fix
import Data.Array as Array
import Data.Bits
-import Data.List (nub, partition)
+import Data.List (nub)
import Control.Monad (liftM)
#include "HsVersions.h"
@@ -111,20 +111,20 @@ cmmLayoutStack :: ProcPointSet -> ByteOff -> CmmGraph
cmmLayoutStack procpoints entry_args
graph0@(CmmGraph { g_entry = entry })
= do
- pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
+ -- pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
(graph, liveness) <- removeDeadAssignments graph0
- pprTrace "liveness" (ppr liveness) $ return ()
+ -- pprTrace "liveness" (ppr liveness) $ return ()
let blocks = postorderDfs graph
- (final_stackmaps, final_high_sp, new_blocks) <-
+ (final_stackmaps, _final_high_sp, new_blocks) <-
mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
layout procpoints liveness entry entry_args
rec_stackmaps rec_high_sp blocks
new_blocks' <- mapM lowerSafeForeignCall new_blocks
- pprTrace ("Sp HWM") (ppr final_high_sp) $
- return (ofBlockList entry new_blocks', final_stackmaps)
+ -- pprTrace ("Sp HWM") (ppr _final_high_sp) $ return ()
+ return (ofBlockList entry new_blocks', final_stackmaps)
@@ -167,7 +167,7 @@ layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks
(pprPanic "no stack map for" (ppr entry_lbl))
entry_lbl acc_stackmaps
- pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()
+ -- pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()
-- (a) Update the stack map to include the effects of
-- assignments in this block
@@ -188,7 +188,7 @@ layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks
<- handleLastNode procpoints liveness cont_info
acc_stackmaps stack1 middle0 last0
- pprTrace "layout(out)" (ppr out) $ return ()
+ -- pprTrace "layout(out)" (ppr out) $ return ()
-- (d) Manifest Sp: run over the nodes in the block and replace
-- CmmStackSlot with CmmLoad from Sp with a concrete offset.
@@ -416,8 +416,8 @@ handleLastNode procpoints liveness cont_info stackmaps
case mapLookup l stackmaps of
Just pp_sm -> (pp_sm, fixupStack stack0 pp_sm)
Nothing ->
- pprTrace "first visit to proc point"
- (ppr l <+> ppr stack1) $
+ --pprTrace "first visit to proc point"
+ -- (ppr l <+> ppr stack1) $
(stack1, assigs)
where
cont_args = mapFindWithDefault 0 l cont_info
@@ -570,7 +570,7 @@ allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O])
allocate ret_off live stackmap@StackMap{ sm_sp = sp0
, sm_regs = regs0 }
=
- pprTrace "allocate" (ppr live $$ ppr stackmap) $
+ -- pprTrace "allocate" (ppr live $$ ppr stackmap) $
-- we only have to save regs that are not already in a slot
let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
@@ -798,7 +798,8 @@ elimStackStores stackmap stackmaps area_off nodes
CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
| Just (_,off) <- lookupUFM (sm_regs stackmap) r
, area_off area + m == off
- -> pprTrace "eliminated a node!" (ppr r) $ go stackmap ns
+ -> -- pprTrace "eliminated a node!" (ppr r) $
+ go stackmap ns
_otherwise
-> n : go (procMiddle stackmaps n stackmap) ns
@@ -978,75 +979,3 @@ insertReloads stackmap =
stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
stackSlotRegs sm = eltsUFM (sm_regs sm)
--- -----------------------------------------------------------------------------
-
--- If we do this *before* stack layout, we might be able to avoid
--- saving some things across calls/procpoints.
---
--- *but*, that will invalidate the liveness analysis, and we'll have
--- to re-do it.
-
-cmmSink :: CmmGraph -> UniqSM CmmGraph
-cmmSink graph = do
- let liveness = cmmLiveness graph
- return $ cmmSink' liveness graph
-
-cmmSink' :: BlockEnv CmmLive -> CmmGraph -> CmmGraph
-cmmSink' liveness graph
- = ofBlockList (g_entry graph) $ sink mapEmpty $ postorderDfs graph
- where
-
- sink :: BlockEnv [(LocalReg, CmmExpr)] -> [CmmBlock] -> [CmmBlock]
- sink _ [] = []
- sink sunk (b:bs) =
- pprTrace "sink" (ppr l) $
- blockJoin first final_middle last : sink sunk' bs
- where
- l = entryLabel b
- (first, middle, last) = blockSplit b
- (middle', assigs) = walk (blockToList middle) emptyBlock
- (mapFindWithDefault [] l sunk)
-
- (dropped_last, assigs') = partition (`conflictsWithLast` last) assigs
-
- final_middle = foldl blockSnoc middle' (toNodes dropped_last)
-
- sunk' = mapUnion sunk $
- mapFromList [ (l, filt assigs' (getLive l))
- | l <- successors last ]
- where
- getLive l = mapFindWithDefault Set.empty l liveness
- filt as live = [ (r,e) | (r,e) <- as, r `Set.member` live ]
-
-
-walk :: [CmmNode O O] -> Block CmmNode O O -> [(LocalReg, CmmExpr)]
- -> (Block CmmNode O O, [(LocalReg, CmmExpr)])
-
-walk [] acc as = (acc, as)
-walk (n:ns) acc as
- | Just a <- collect_it = walk ns acc (a:as)
- | otherwise = walk ns (foldr (flip blockSnoc) acc (n:drop_nodes)) as'
- where
- collect_it = case n of
- CmmAssign (CmmLocal r) e@(CmmReg (CmmGlobal _)) -> Just (r,e)
--- CmmAssign (CmmLocal r) e@(CmmLoad addr _) |
--- foldRegsUsed (\b r -> False) True addr -> Just (r,e)
- _ -> Nothing
-
- drop_nodes = toNodes dropped
- (dropped, as') = partition should_drop as
- where should_drop a = a `conflicts` n
-
-toNodes :: [(LocalReg,CmmExpr)] -> [CmmNode O O]
-toNodes as = [ CmmAssign (CmmLocal r) rhs | (r,rhs) <- as ]
-
--- We only sink "r = G" assignments right now, so conflicts is very simple:
-conflicts :: (LocalReg,CmmExpr) -> CmmNode O O -> Bool
-(_, rhs) `conflicts` CmmAssign reg _ | reg `regUsedIn` rhs = True
---(r, CmmLoad _ _) `conflicts` CmmStore _ _ = True
-(r, _) `conflicts` node
- = foldRegsUsed (\b r' -> r == r' || b) False node
-
-conflictsWithLast :: (LocalReg,CmmExpr) -> CmmNode O C -> Bool
-(r, _) `conflictsWithLast` node
- = foldRegsUsed (\b r' -> r == r' || b) False node
diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs
index bb8d5b2f22..3b5a6ebbfc 100644
--- a/compiler/cmm/CmmPipeline.hs
+++ b/compiler/cmm/CmmPipeline.hs
@@ -17,6 +17,7 @@ import CmmCommonBlockElim
import CmmProcPoint
import CmmContFlowOpt
import CmmLayoutStack
+import CmmSink
import UniqSupply
import DynFlags
@@ -110,8 +111,13 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}})
runUniqSM $ cmmLayoutStack procPoints entry_off g
dump Opt_D_dump_cmmz_sp "Layout Stack" g
--- g <- {-# SCC "sink" #-} runUniqSM $ cmmSink g
--- dump Opt_D_dump_cmmz_rewrite "Sink assignments" g
+ g <- if optLevel dflags >= 99
+ then do g <- {-# SCC "sink" #-} return (cmmSink g)
+ dump Opt_D_dump_cmmz_rewrite "Sink assignments" g
+ g <- {-# SCC "inline" #-} return (cmmPeepholeInline g)
+ dump Opt_D_dump_cmmz_rewrite "Peephole inline" g
+ return g
+ else return g
-- ----------- Sink and inline assignments -------------------
-- g <- {-# SCC "rewriteAssignments" #-} runOptimization $
diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs
new file mode 100644
index 0000000000..3dd5bf7fba
--- /dev/null
+++ b/compiler/cmm/CmmSink.hs
@@ -0,0 +1,255 @@
+{-# LANGUAGE GADTs #-}
+module CmmSink (
+ cmmSink,
+ cmmPeepholeInline
+ ) where
+
+import Cmm
+import BlockId
+import CmmLive
+import CmmUtils
+import Hoopl
+
+import UniqFM
+import Unique
+import Outputable
+
+import qualified Data.Set as Set
+
+-- -----------------------------------------------------------------------------
+-- Sinking
+
+-- This is an optimisation pass that
+-- (a) moves assignments closer to their uses, to reduce register pressure
+-- (b) pushes assignments into a single branch of a conditional if possible
+
+-- It is particularly helpful in the Cmm generated by the Stg->Cmm
+-- code generator, in which every function starts with a copyIn
+-- sequence like:
+--
+-- x1 = R1
+-- x2 = Sp[8]
+-- x3 = Sp[16]
+-- if (Sp - 32 < SpLim) then L1 else L2
+--
+-- we really want to push the x1..x3 assignments into the L2 branch.
+--
+-- Algorithm:
+--
+-- * Start by doing liveness analysis.
+-- * Keep a list of assignments; earlier ones may refer to later ones
+-- * Walk forwards through the graph;
+-- * At an assignment:
+-- * pick up the assignment and add it to the list
+-- * At a store:
+-- * drop any assignments that the store refers to
+-- * drop any assignments that refer to memory that may be written
+-- by the store
+-- * do this recursively, dropping dependent assignments
+-- * At a multi-way branch:
+-- * drop any assignments that are live on more than one branch
+-- * if any successor has more than one predecessor, drop everything
+-- live in that successor
+--
+-- As a side-effect we'll delete some dead assignments (transitively,
+-- even). Maybe we could do without removeDeadAssignments?
+
+-- If we do this *before* stack layout, we might be able to avoid
+-- saving some things across calls/procpoints.
+--
+-- *but*, that will invalidate the liveness analysis, and we'll have
+-- to re-do it.
+
+cmmSink :: CmmGraph -> CmmGraph
+cmmSink graph = cmmSink' (cmmLiveness graph) graph
+
+type Assignment = (LocalReg, CmmExpr, AbsAddr)
+
+cmmSink' :: BlockEnv CmmLive -> CmmGraph -> CmmGraph
+cmmSink' liveness graph
+ = ofBlockList (g_entry graph) $ sink mapEmpty $ postorderDfs graph
+ where
+
+ sink :: BlockEnv [Assignment] -> [CmmBlock] -> [CmmBlock]
+ sink _ [] = []
+ sink sunk (b:bs) =
+ pprTrace "sink" (ppr lbl) $
+ blockJoin first final_middle last : sink sunk' bs
+ where
+ lbl = entryLabel b
+ (first, middle, last) = blockSplit b
+ (middle', assigs) = walk (blockToList middle) emptyBlock
+ (mapFindWithDefault [] lbl sunk)
+
+ getLive l = mapFindWithDefault Set.empty l liveness
+ lives = map getLive (successors last)
+
+ -- multilive is a list of registers that are live in more than
+ -- one successor branch, and we should therefore drop them here.
+ multilive = [ r | (r,n) <- ufmToList livemap, n > 1 ]
+ where livemap = foldr (\r m -> addToUFM_C (+) m r (1::Int))
+ emptyUFM (concatMap Set.toList lives)
+
+ (dropped_last, assigs') = dropAssignments drop_if assigs
+
+ drop_if a@(r,_,_) = a `conflicts` last || getUnique r `elem` multilive
+
+ final_middle = foldl blockSnoc middle' dropped_last
+
+ sunk' = mapUnion sunk $
+ mapFromList [ (l, filterAssignments (getLive l) assigs')
+ | l <- successors last ]
+
+
+filterAssignments :: RegSet -> [Assignment] -> [Assignment]
+filterAssignments live assigs = reverse (go assigs [])
+ where go [] kept = kept
+ go (a@(r,_,_):as) kept | needed = go as (a:kept)
+ | otherwise = go as kept
+ where
+ needed = r `Set.member` live || any (a `conflicts`) (map toNode kept)
+
+
+walk :: [CmmNode O O] -> Block CmmNode O O -> [Assignment]
+ -> (Block CmmNode O O, [Assignment])
+
+walk [] block as = (block, as)
+walk (n:ns) block as
+ | Just a <- shouldSink n = walk ns block (a : as)
+ | otherwise = walk ns block' as'
+ where
+ (dropped, as') = dropAssignments (`conflicts` n) as
+ block' = foldl blockSnoc block dropped `blockSnoc` n
+
+shouldSink (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprAddr e)
+ where no_local_regs = foldRegsUsed (\_ _ -> False) True e
+shouldSink _other = Nothing
+
+toNode :: Assignment -> CmmNode O O
+toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs
+
+dropAssignments :: (Assignment -> Bool) -> [Assignment] -> ([CmmNode O O], [Assignment])
+dropAssignments should_drop assigs
+ = (dropped, reverse kept)
+ where
+ (dropped,kept) = go assigs [] []
+
+ go [] dropped kept = (dropped, kept)
+ go (assig : rest) dropped kept
+ | conflict = go rest (toNode assig : dropped) kept
+ | otherwise = go rest dropped (assig:kept)
+ where
+ conflict = should_drop assig || any (assig `conflicts`) dropped
+
+-- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment
+-- @r = e@ can be safely commuted past @stmt@.
+--
+-- We only sink "r = G" assignments right now, so conflicts is very simple:
+--
+conflicts :: Assignment -> CmmNode O x -> Bool
+(_, rhs, _ ) `conflicts` CmmAssign reg _ | reg `regUsedIn` rhs = True
+(_, _, addr) `conflicts` CmmStore addr' _ | addrConflicts addr (loadAddr addr') = True
+(r, _, _) `conflicts` node
+ = foldRegsUsed (\b r' -> r == r' || b) False node
+
+-- An abstraction of the addresses read or written.
+data AbsAddr = NoAddr | HeapAddr | StackAddr | AnyAddr
+
+bothAddrs :: AbsAddr -> AbsAddr -> AbsAddr
+bothAddrs NoAddr x = x
+bothAddrs x NoAddr = x
+bothAddrs HeapAddr HeapAddr = HeapAddr
+bothAddrs StackAddr StackAddr = StackAddr
+bothAddrs _ _ = AnyAddr
+
+addrConflicts :: AbsAddr -> AbsAddr -> Bool
+addrConflicts NoAddr _ = False
+addrConflicts _ NoAddr = False
+addrConflicts HeapAddr StackAddr = False
+addrConflicts StackAddr HeapAddr = False
+addrConflicts _ _ = True
+
+exprAddr :: CmmExpr -> AbsAddr -- here NoAddr means "no reads"
+exprAddr (CmmLoad addr _) = loadAddr addr
+exprAddr (CmmMachOp _ es) = foldr bothAddrs NoAddr (map exprAddr es)
+exprAddr _ = NoAddr
+
+absAddr :: CmmExpr -> AbsAddr -- here NoAddr means "don't know"
+absAddr (CmmLoad addr _) = bothAddrs HeapAddr (loadAddr addr) -- (1)
+absAddr (CmmMachOp _ es) = foldr bothAddrs NoAddr (map absAddr es)
+absAddr (CmmReg r) = regAddr r
+absAddr (CmmRegOff r _) = regAddr r
+absAddr _ = NoAddr
+
+loadAddr :: CmmExpr -> AbsAddr
+loadAddr e = case absAddr e of
+ NoAddr -> HeapAddr -- (2)
+ a -> a
+
+-- (1) we assume that an address read from memory is a heap address.
+-- We never read a stack address from memory.
+--
+-- (2) loading from an unknown address is assumed to be a heap load.
+
+regAddr :: CmmReg -> AbsAddr
+regAddr (CmmGlobal Sp) = StackAddr
+regAddr (CmmGlobal Hp) = HeapAddr
+regAddr _ = NoAddr
+
+-- After sinking, if we have an assignment to a temporary that is used
+-- exactly once, then it will either be of the form
+--
+-- x = E
+-- .. stmt involving x ..
+--
+-- OR
+--
+-- x = E
+-- .. stmt conflicting with E ..
+
+-- So the idea in peepholeInline is to spot the first case
+-- (recursively) and inline x. We start with the set of live
+-- registers and move backwards through the block.
+--
+-- ToDo: doesn't inline into the last node
+--
+cmmPeepholeInline :: CmmGraph -> CmmGraph
+cmmPeepholeInline graph = ofBlockList (g_entry graph) $ map do_block (toBlockList graph)
+ where
+ liveness = cmmLiveness graph
+
+ do_block :: Block CmmNode C C -> Block CmmNode C C
+ do_block block = blockJoin first (go rmiddle live_middle) last
+ where
+ (first, middle, last) = blockSplit block
+ rmiddle = reverse (blockToList middle)
+
+ live = Set.unions [ mapFindWithDefault Set.empty l liveness | l <- successors last ]
+
+ live_middle = gen_kill last live
+
+ go :: [CmmNode O O] -> RegSet -> Block CmmNode O O
+ go [] _ = emptyBlock
+ go [stmt] _ = blockCons stmt emptyBlock
+ go (stmt : rest) live = tryInline stmt usages live rest
+ where
+ usages :: UniqFM Int
+ usages = foldRegsUsed addUsage emptyUFM stmt
+
+ addUsage :: UniqFM Int -> LocalReg -> UniqFM Int
+ addUsage m r = addToUFM_C (+) m r 1
+
+ tryInline stmt usages live stmts@(CmmAssign (CmmLocal l) rhs : rest)
+ | not (l `elemRegSet` live),
+ Just 1 <- lookupUFM usages l = tryInline stmt' usages' live' rest
+ where live' = foldRegsUsed extendRegSet live rhs
+ usages' = foldRegsUsed addUsage usages rhs
+
+ stmt' = mapExpDeep inline stmt
+ where inline (CmmReg (CmmLocal l')) | l == l' = rhs
+ inline (CmmRegOff (CmmLocal l') off) | l == l' = cmmOffset rhs off
+ inline other = other
+
+ tryInline stmt _usages live stmts
+ = go stmts (gen_kill stmt live) `blockSnoc` stmt
+
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 3c13bb4704..9e772d23e8 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -186,6 +186,7 @@ Library
CmmParse
CmmProcPoint
CmmRewriteAssignments
+ CmmSink
CmmType
CmmUtils
CmmLayoutStack