diff options
-rw-r--r-- | compiler/cmm/CmmLayoutStack.hs | 99 | ||||
-rw-r--r-- | compiler/cmm/CmmPipeline.hs | 10 | ||||
-rw-r--r-- | compiler/cmm/CmmSink.hs | 255 | ||||
-rw-r--r-- | compiler/ghc.cabal.in | 1 |
4 files changed, 278 insertions, 87 deletions
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs index 732fb2b849..d45c4d8546 100644 --- a/compiler/cmm/CmmLayoutStack.hs +++ b/compiler/cmm/CmmLayoutStack.hs @@ -3,7 +3,7 @@ {-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} #endif module CmmLayoutStack ( - cmmLayoutStack, setInfoTableStackMap, cmmSink + cmmLayoutStack, setInfoTableStackMap ) where import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX @@ -34,7 +34,7 @@ import qualified Data.Set as Set import Control.Monad.Fix import Data.Array as Array import Data.Bits -import Data.List (nub, partition) +import Data.List (nub) import Control.Monad (liftM) #include "HsVersions.h" @@ -111,20 +111,20 @@ cmmLayoutStack :: ProcPointSet -> ByteOff -> CmmGraph cmmLayoutStack procpoints entry_args graph0@(CmmGraph { g_entry = entry }) = do - pprTrace "cmmLayoutStack" (ppr entry_args) $ return () + -- pprTrace "cmmLayoutStack" (ppr entry_args) $ return () (graph, liveness) <- removeDeadAssignments graph0 - pprTrace "liveness" (ppr liveness) $ return () + -- pprTrace "liveness" (ppr liveness) $ return () let blocks = postorderDfs graph - (final_stackmaps, final_high_sp, new_blocks) <- + (final_stackmaps, _final_high_sp, new_blocks) <- mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) -> layout procpoints liveness entry entry_args rec_stackmaps rec_high_sp blocks new_blocks' <- mapM lowerSafeForeignCall new_blocks - pprTrace ("Sp HWM") (ppr final_high_sp) $ - return (ofBlockList entry new_blocks', final_stackmaps) + -- pprTrace ("Sp HWM") (ppr _final_high_sp) $ return () + return (ofBlockList entry new_blocks', final_stackmaps) @@ -167,7 +167,7 @@ layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks (pprPanic "no stack map for" (ppr entry_lbl)) entry_lbl acc_stackmaps - pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return () + -- pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return () -- (a) Update the stack map to include the effects of -- assignments in this block @@ -188,7 +188,7 @@ layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks <- handleLastNode procpoints liveness cont_info acc_stackmaps stack1 middle0 last0 - pprTrace "layout(out)" (ppr out) $ return () + -- pprTrace "layout(out)" (ppr out) $ return () -- (d) Manifest Sp: run over the nodes in the block and replace -- CmmStackSlot with CmmLoad from Sp with a concrete offset. @@ -416,8 +416,8 @@ handleLastNode procpoints liveness cont_info stackmaps case mapLookup l stackmaps of Just pp_sm -> (pp_sm, fixupStack stack0 pp_sm) Nothing -> - pprTrace "first visit to proc point" - (ppr l <+> ppr stack1) $ + --pprTrace "first visit to proc point" + -- (ppr l <+> ppr stack1) $ (stack1, assigs) where cont_args = mapFindWithDefault 0 l cont_info @@ -570,7 +570,7 @@ allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O]) allocate ret_off live stackmap@StackMap{ sm_sp = sp0 , sm_regs = regs0 } = - pprTrace "allocate" (ppr live $$ ppr stackmap) $ + -- pprTrace "allocate" (ppr live $$ ppr stackmap) $ -- we only have to save regs that are not already in a slot let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live) @@ -798,7 +798,8 @@ elimStackStores stackmap stackmaps area_off nodes CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r)) | Just (_,off) <- lookupUFM (sm_regs stackmap) r , area_off area + m == off - -> pprTrace "eliminated a node!" (ppr r) $ go stackmap ns + -> -- pprTrace "eliminated a node!" (ppr r) $ + go stackmap ns _otherwise -> n : go (procMiddle stackmaps n stackmap) ns @@ -978,75 +979,3 @@ insertReloads stackmap = stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)] stackSlotRegs sm = eltsUFM (sm_regs sm) --- ----------------------------------------------------------------------------- - --- If we do this *before* stack layout, we might be able to avoid --- saving some things across calls/procpoints. --- --- *but*, that will invalidate the liveness analysis, and we'll have --- to re-do it. - -cmmSink :: CmmGraph -> UniqSM CmmGraph -cmmSink graph = do - let liveness = cmmLiveness graph - return $ cmmSink' liveness graph - -cmmSink' :: BlockEnv CmmLive -> CmmGraph -> CmmGraph -cmmSink' liveness graph - = ofBlockList (g_entry graph) $ sink mapEmpty $ postorderDfs graph - where - - sink :: BlockEnv [(LocalReg, CmmExpr)] -> [CmmBlock] -> [CmmBlock] - sink _ [] = [] - sink sunk (b:bs) = - pprTrace "sink" (ppr l) $ - blockJoin first final_middle last : sink sunk' bs - where - l = entryLabel b - (first, middle, last) = blockSplit b - (middle', assigs) = walk (blockToList middle) emptyBlock - (mapFindWithDefault [] l sunk) - - (dropped_last, assigs') = partition (`conflictsWithLast` last) assigs - - final_middle = foldl blockSnoc middle' (toNodes dropped_last) - - sunk' = mapUnion sunk $ - mapFromList [ (l, filt assigs' (getLive l)) - | l <- successors last ] - where - getLive l = mapFindWithDefault Set.empty l liveness - filt as live = [ (r,e) | (r,e) <- as, r `Set.member` live ] - - -walk :: [CmmNode O O] -> Block CmmNode O O -> [(LocalReg, CmmExpr)] - -> (Block CmmNode O O, [(LocalReg, CmmExpr)]) - -walk [] acc as = (acc, as) -walk (n:ns) acc as - | Just a <- collect_it = walk ns acc (a:as) - | otherwise = walk ns (foldr (flip blockSnoc) acc (n:drop_nodes)) as' - where - collect_it = case n of - CmmAssign (CmmLocal r) e@(CmmReg (CmmGlobal _)) -> Just (r,e) --- CmmAssign (CmmLocal r) e@(CmmLoad addr _) | --- foldRegsUsed (\b r -> False) True addr -> Just (r,e) - _ -> Nothing - - drop_nodes = toNodes dropped - (dropped, as') = partition should_drop as - where should_drop a = a `conflicts` n - -toNodes :: [(LocalReg,CmmExpr)] -> [CmmNode O O] -toNodes as = [ CmmAssign (CmmLocal r) rhs | (r,rhs) <- as ] - --- We only sink "r = G" assignments right now, so conflicts is very simple: -conflicts :: (LocalReg,CmmExpr) -> CmmNode O O -> Bool -(_, rhs) `conflicts` CmmAssign reg _ | reg `regUsedIn` rhs = True ---(r, CmmLoad _ _) `conflicts` CmmStore _ _ = True -(r, _) `conflicts` node - = foldRegsUsed (\b r' -> r == r' || b) False node - -conflictsWithLast :: (LocalReg,CmmExpr) -> CmmNode O C -> Bool -(r, _) `conflictsWithLast` node - = foldRegsUsed (\b r' -> r == r' || b) False node diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs index bb8d5b2f22..3b5a6ebbfc 100644 --- a/compiler/cmm/CmmPipeline.hs +++ b/compiler/cmm/CmmPipeline.hs @@ -17,6 +17,7 @@ import CmmCommonBlockElim import CmmProcPoint import CmmContFlowOpt import CmmLayoutStack +import CmmSink import UniqSupply import DynFlags @@ -110,8 +111,13 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) runUniqSM $ cmmLayoutStack procPoints entry_off g dump Opt_D_dump_cmmz_sp "Layout Stack" g --- g <- {-# SCC "sink" #-} runUniqSM $ cmmSink g --- dump Opt_D_dump_cmmz_rewrite "Sink assignments" g + g <- if optLevel dflags >= 99 + then do g <- {-# SCC "sink" #-} return (cmmSink g) + dump Opt_D_dump_cmmz_rewrite "Sink assignments" g + g <- {-# SCC "inline" #-} return (cmmPeepholeInline g) + dump Opt_D_dump_cmmz_rewrite "Peephole inline" g + return g + else return g -- ----------- Sink and inline assignments ------------------- -- g <- {-# SCC "rewriteAssignments" #-} runOptimization $ diff --git a/compiler/cmm/CmmSink.hs b/compiler/cmm/CmmSink.hs new file mode 100644 index 0000000000..3dd5bf7fba --- /dev/null +++ b/compiler/cmm/CmmSink.hs @@ -0,0 +1,255 @@ +{-# LANGUAGE GADTs #-} +module CmmSink ( + cmmSink, + cmmPeepholeInline + ) where + +import Cmm +import BlockId +import CmmLive +import CmmUtils +import Hoopl + +import UniqFM +import Unique +import Outputable + +import qualified Data.Set as Set + +-- ----------------------------------------------------------------------------- +-- Sinking + +-- This is an optimisation pass that +-- (a) moves assignments closer to their uses, to reduce register pressure +-- (b) pushes assignments into a single branch of a conditional if possible + +-- It is particularly helpful in the Cmm generated by the Stg->Cmm +-- code generator, in which every function starts with a copyIn +-- sequence like: +-- +-- x1 = R1 +-- x2 = Sp[8] +-- x3 = Sp[16] +-- if (Sp - 32 < SpLim) then L1 else L2 +-- +-- we really want to push the x1..x3 assignments into the L2 branch. +-- +-- Algorithm: +-- +-- * Start by doing liveness analysis. +-- * Keep a list of assignments; earlier ones may refer to later ones +-- * Walk forwards through the graph; +-- * At an assignment: +-- * pick up the assignment and add it to the list +-- * At a store: +-- * drop any assignments that the store refers to +-- * drop any assignments that refer to memory that may be written +-- by the store +-- * do this recursively, dropping dependent assignments +-- * At a multi-way branch: +-- * drop any assignments that are live on more than one branch +-- * if any successor has more than one predecessor, drop everything +-- live in that successor +-- +-- As a side-effect we'll delete some dead assignments (transitively, +-- even). Maybe we could do without removeDeadAssignments? + +-- If we do this *before* stack layout, we might be able to avoid +-- saving some things across calls/procpoints. +-- +-- *but*, that will invalidate the liveness analysis, and we'll have +-- to re-do it. + +cmmSink :: CmmGraph -> CmmGraph +cmmSink graph = cmmSink' (cmmLiveness graph) graph + +type Assignment = (LocalReg, CmmExpr, AbsAddr) + +cmmSink' :: BlockEnv CmmLive -> CmmGraph -> CmmGraph +cmmSink' liveness graph + = ofBlockList (g_entry graph) $ sink mapEmpty $ postorderDfs graph + where + + sink :: BlockEnv [Assignment] -> [CmmBlock] -> [CmmBlock] + sink _ [] = [] + sink sunk (b:bs) = + pprTrace "sink" (ppr lbl) $ + blockJoin first final_middle last : sink sunk' bs + where + lbl = entryLabel b + (first, middle, last) = blockSplit b + (middle', assigs) = walk (blockToList middle) emptyBlock + (mapFindWithDefault [] lbl sunk) + + getLive l = mapFindWithDefault Set.empty l liveness + lives = map getLive (successors last) + + -- multilive is a list of registers that are live in more than + -- one successor branch, and we should therefore drop them here. + multilive = [ r | (r,n) <- ufmToList livemap, n > 1 ] + where livemap = foldr (\r m -> addToUFM_C (+) m r (1::Int)) + emptyUFM (concatMap Set.toList lives) + + (dropped_last, assigs') = dropAssignments drop_if assigs + + drop_if a@(r,_,_) = a `conflicts` last || getUnique r `elem` multilive + + final_middle = foldl blockSnoc middle' dropped_last + + sunk' = mapUnion sunk $ + mapFromList [ (l, filterAssignments (getLive l) assigs') + | l <- successors last ] + + +filterAssignments :: RegSet -> [Assignment] -> [Assignment] +filterAssignments live assigs = reverse (go assigs []) + where go [] kept = kept + go (a@(r,_,_):as) kept | needed = go as (a:kept) + | otherwise = go as kept + where + needed = r `Set.member` live || any (a `conflicts`) (map toNode kept) + + +walk :: [CmmNode O O] -> Block CmmNode O O -> [Assignment] + -> (Block CmmNode O O, [Assignment]) + +walk [] block as = (block, as) +walk (n:ns) block as + | Just a <- shouldSink n = walk ns block (a : as) + | otherwise = walk ns block' as' + where + (dropped, as') = dropAssignments (`conflicts` n) as + block' = foldl blockSnoc block dropped `blockSnoc` n + +shouldSink (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprAddr e) + where no_local_regs = foldRegsUsed (\_ _ -> False) True e +shouldSink _other = Nothing + +toNode :: Assignment -> CmmNode O O +toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs + +dropAssignments :: (Assignment -> Bool) -> [Assignment] -> ([CmmNode O O], [Assignment]) +dropAssignments should_drop assigs + = (dropped, reverse kept) + where + (dropped,kept) = go assigs [] [] + + go [] dropped kept = (dropped, kept) + go (assig : rest) dropped kept + | conflict = go rest (toNode assig : dropped) kept + | otherwise = go rest dropped (assig:kept) + where + conflict = should_drop assig || any (assig `conflicts`) dropped + +-- | @conflicts (r,e) stmt@ is @False@ if and only if the assignment +-- @r = e@ can be safely commuted past @stmt@. +-- +-- We only sink "r = G" assignments right now, so conflicts is very simple: +-- +conflicts :: Assignment -> CmmNode O x -> Bool +(_, rhs, _ ) `conflicts` CmmAssign reg _ | reg `regUsedIn` rhs = True +(_, _, addr) `conflicts` CmmStore addr' _ | addrConflicts addr (loadAddr addr') = True +(r, _, _) `conflicts` node + = foldRegsUsed (\b r' -> r == r' || b) False node + +-- An abstraction of the addresses read or written. +data AbsAddr = NoAddr | HeapAddr | StackAddr | AnyAddr + +bothAddrs :: AbsAddr -> AbsAddr -> AbsAddr +bothAddrs NoAddr x = x +bothAddrs x NoAddr = x +bothAddrs HeapAddr HeapAddr = HeapAddr +bothAddrs StackAddr StackAddr = StackAddr +bothAddrs _ _ = AnyAddr + +addrConflicts :: AbsAddr -> AbsAddr -> Bool +addrConflicts NoAddr _ = False +addrConflicts _ NoAddr = False +addrConflicts HeapAddr StackAddr = False +addrConflicts StackAddr HeapAddr = False +addrConflicts _ _ = True + +exprAddr :: CmmExpr -> AbsAddr -- here NoAddr means "no reads" +exprAddr (CmmLoad addr _) = loadAddr addr +exprAddr (CmmMachOp _ es) = foldr bothAddrs NoAddr (map exprAddr es) +exprAddr _ = NoAddr + +absAddr :: CmmExpr -> AbsAddr -- here NoAddr means "don't know" +absAddr (CmmLoad addr _) = bothAddrs HeapAddr (loadAddr addr) -- (1) +absAddr (CmmMachOp _ es) = foldr bothAddrs NoAddr (map absAddr es) +absAddr (CmmReg r) = regAddr r +absAddr (CmmRegOff r _) = regAddr r +absAddr _ = NoAddr + +loadAddr :: CmmExpr -> AbsAddr +loadAddr e = case absAddr e of + NoAddr -> HeapAddr -- (2) + a -> a + +-- (1) we assume that an address read from memory is a heap address. +-- We never read a stack address from memory. +-- +-- (2) loading from an unknown address is assumed to be a heap load. + +regAddr :: CmmReg -> AbsAddr +regAddr (CmmGlobal Sp) = StackAddr +regAddr (CmmGlobal Hp) = HeapAddr +regAddr _ = NoAddr + +-- After sinking, if we have an assignment to a temporary that is used +-- exactly once, then it will either be of the form +-- +-- x = E +-- .. stmt involving x .. +-- +-- OR +-- +-- x = E +-- .. stmt conflicting with E .. + +-- So the idea in peepholeInline is to spot the first case +-- (recursively) and inline x. We start with the set of live +-- registers and move backwards through the block. +-- +-- ToDo: doesn't inline into the last node +-- +cmmPeepholeInline :: CmmGraph -> CmmGraph +cmmPeepholeInline graph = ofBlockList (g_entry graph) $ map do_block (toBlockList graph) + where + liveness = cmmLiveness graph + + do_block :: Block CmmNode C C -> Block CmmNode C C + do_block block = blockJoin first (go rmiddle live_middle) last + where + (first, middle, last) = blockSplit block + rmiddle = reverse (blockToList middle) + + live = Set.unions [ mapFindWithDefault Set.empty l liveness | l <- successors last ] + + live_middle = gen_kill last live + + go :: [CmmNode O O] -> RegSet -> Block CmmNode O O + go [] _ = emptyBlock + go [stmt] _ = blockCons stmt emptyBlock + go (stmt : rest) live = tryInline stmt usages live rest + where + usages :: UniqFM Int + usages = foldRegsUsed addUsage emptyUFM stmt + + addUsage :: UniqFM Int -> LocalReg -> UniqFM Int + addUsage m r = addToUFM_C (+) m r 1 + + tryInline stmt usages live stmts@(CmmAssign (CmmLocal l) rhs : rest) + | not (l `elemRegSet` live), + Just 1 <- lookupUFM usages l = tryInline stmt' usages' live' rest + where live' = foldRegsUsed extendRegSet live rhs + usages' = foldRegsUsed addUsage usages rhs + + stmt' = mapExpDeep inline stmt + where inline (CmmReg (CmmLocal l')) | l == l' = rhs + inline (CmmRegOff (CmmLocal l') off) | l == l' = cmmOffset rhs off + inline other = other + + tryInline stmt _usages live stmts + = go stmts (gen_kill stmt live) `blockSnoc` stmt + diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 3c13bb4704..9e772d23e8 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -186,6 +186,7 @@ Library CmmParse CmmProcPoint CmmRewriteAssignments + CmmSink CmmType CmmUtils CmmLayoutStack |