diff options
author | Jan Stolarek <jan.stolarek@p.lodz.pl> | 2014-02-01 19:15:06 +0100 |
---|---|---|
committer | Jan Stolarek <jan.stolarek@p.lodz.pl> | 2014-02-01 19:15:06 +0100 |
commit | 78afa2078e474c9e8fd3d0f347c5652f296d5248 (patch) | |
tree | 701d37ac7a4edfac628fcb676ef81489abfd781a | |
parent | 99c3ed81ac53629771b00a0abbe37c989ea45cd6 (diff) | |
download | haskell-78afa2078e474c9e8fd3d0f347c5652f296d5248.tar.gz |
Nuke dead code
* CmmRewriteAddignments module was replaced by CmmSink a long
time ago. That module is now available at
https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/Hoopl/Examples
wiki page.
* removeDeadAssignments function was not used and it was also
moved to the above page.
* I also nuked some commented out debugging code that was not
used for 1,5 year.
-rw-r--r-- | compiler/cmm/CmmLayoutStack.hs | 29 | ||||
-rw-r--r-- | compiler/cmm/CmmLive.hs | 28 | ||||
-rw-r--r-- | compiler/cmm/CmmRewriteAssignments.hs | 628 | ||||
-rw-r--r-- | compiler/ghc.cabal.in | 1 |
4 files changed, 5 insertions, 681 deletions
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs index 95483a2f52..bdc947829d 100644 --- a/compiler/cmm/CmmLayoutStack.hs +++ b/compiler/cmm/CmmLayoutStack.hs @@ -189,16 +189,10 @@ cmmLayoutStack :: DynFlags -> ProcPointSet -> ByteOff -> CmmGraph cmmLayoutStack dflags procpoints entry_args graph0@(CmmGraph { g_entry = entry }) = do - -- pprTrace "cmmLayoutStack" (ppr entry_args) $ return () - - -- We need liveness info. We could do removeDeadAssignments at - -- the same time, but it buys nothing over doing cmmSink later, - -- and costs a lot more than just cmmLocalLiveness. - -- (graph, liveness) <- removeDeadAssignments graph0 + -- We need liveness info. Dead assignments are removed later + -- by the sinking pass. let (graph, liveness) = (graph0, cmmLocalLiveness dflags graph0) - - -- pprTrace "liveness" (ppr liveness) $ return () - let blocks = postorderDfs graph + blocks = postorderDfs graph (final_stackmaps, _final_high_sp, new_blocks) <- mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) -> @@ -206,12 +200,9 @@ cmmLayoutStack dflags procpoints entry_args rec_stackmaps rec_high_sp blocks new_blocks' <- mapM (lowerSafeForeignCall dflags) new_blocks - - -- pprTrace ("Sp HWM") (ppr _final_high_sp) $ return () return (ofBlockList entry new_blocks', final_stackmaps) - layout :: DynFlags -> BlockSet -- proc points -> BlockEnv CmmLocalLive -- liveness @@ -252,8 +243,6 @@ layout dflags procpoints liveness entry entry_args final_stackmaps final_sp_high (pprPanic "no stack map for" (ppr entry_lbl)) entry_lbl acc_stackmaps - -- pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return () - -- (a) Update the stack map to include the effects of -- assignments in this block let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0 @@ -273,8 +262,6 @@ layout dflags procpoints liveness entry entry_args final_stackmaps final_sp_high <- handleLastNode dflags procpoints liveness cont_info acc_stackmaps stack1 middle0 last0 - -- pprTrace "layout(out)" (ppr out) $ return () - -- (d) Manifest Sp: run over the nodes in the block and replace -- CmmStackSlot with CmmLoad from Sp with a concrete offset. -- @@ -514,11 +501,8 @@ handleLastNode dflags procpoints liveness cont_info stackmaps = do let cont_args = mapFindWithDefault 0 l cont_info (stack2, assigs) = - --pprTrace "first visit to proc point" - -- (ppr l <+> ppr stack1) $ setupStackFrame dflags l liveness (sm_ret_off stack0) - cont_args stack0 - -- + cont_args stack0 (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 assigs return (l, tmp_lbl, stack2, block) @@ -682,8 +666,6 @@ allocate :: DynFlags -> ByteOff -> LocalRegSet -> StackMap allocate dflags ret_off live stackmap@StackMap{ sm_sp = sp0 , sm_regs = regs0 } = - -- pprTrace "allocate" (ppr live $$ ppr stackmap) $ - -- we only have to save regs that are not already in a slot let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live) regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0 @@ -923,8 +905,7 @@ elimStackStores stackmap stackmaps area_off nodes CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r)) | Just (_,off) <- lookupUFM (sm_regs stackmap) r , area_off area + m == off - -> -- pprTrace "eliminated a node!" (ppr r) $ - go stackmap ns + -> go stackmap ns _otherwise -> n : go (procMiddle stackmaps n stackmap) ns diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs index 7d674b76a2..e66ab73f8a 100644 --- a/compiler/cmm/CmmLive.hs +++ b/compiler/cmm/CmmLive.hs @@ -11,7 +11,6 @@ module CmmLive , cmmGlobalLiveness , liveLattice , noLiveOnEntry, xferLive, gen, kill, gen_kill - , removeDeadAssignments ) where @@ -98,30 +97,3 @@ xferLive dflags = mkBTransfer3 fst mid lst mid n f = gen_kill dflags n f lst :: CmmNode O C -> FactBase (CmmLive r) -> CmmLive r lst n f = gen_kill dflags n $ joinOutFacts liveLattice n f - ------------------------------------------------------------------------------ --- Removing assignments to dead variables ------------------------------------------------------------------------------ - -removeDeadAssignments :: DynFlags -> CmmGraph - -> UniqSM (CmmGraph, BlockEnv CmmLocalLive) -removeDeadAssignments dflags g = - dataflowPassBwd g [] $ analRewBwd liveLattice (xferLive dflags) rewrites - where rewrites = mkBRewrite3 nothing middle nothing - -- SDM: no need for deepBwdRw here, we only rewrite to empty - -- Beware: deepBwdRw with one polymorphic function seems more - -- reasonable here, but GHC panics while compiling, see bug - -- #4045. - middle :: CmmNode O O -> Fact O CmmLocalLive -> CmmReplGraph O O - middle (CmmAssign (CmmLocal reg') _) live - | not (reg' `elemRegSet` live) - = return $ Just emptyGraph - -- XXX maybe this should be somewhere else... - middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs - = return $ Just emptyGraph - middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs - = return $ Just emptyGraph - middle _ _ = return Nothing - - nothing :: CmmNode e x -> Fact x CmmLocalLive -> CmmReplGraph e x - nothing _ _ = return Nothing diff --git a/compiler/cmm/CmmRewriteAssignments.hs b/compiler/cmm/CmmRewriteAssignments.hs deleted file mode 100644 index 3c0a05b949..0000000000 --- a/compiler/cmm/CmmRewriteAssignments.hs +++ /dev/null @@ -1,628 +0,0 @@ -{-# LANGUAGE ViewPatterns #-} -{-# LANGUAGE GADTs #-} -{-# LANGUAGE FlexibleContexts #-} - -{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} - --- This module implements generalized code motion for assignments to --- local registers, inlining and sinking when possible. It also does --- some amount of rewriting for stores to register slots, which are --- effectively equivalent to local registers. -module CmmRewriteAssignments - ( rewriteAssignments - ) where - -import StgCmmUtils -- XXX layering violation - -import Cmm -import CmmUtils -import CmmOpt - -import DynFlags -import UniqSupply -import UniqFM -import Unique -import BlockId - -import Hoopl -import Compiler.Hoopl ((<*>), mkMiddle, mkLast) -import Data.Maybe -import Control.Monad -import Prelude hiding (succ, zip) - ----------------------------------------------------------------- ---- Main function - -rewriteAssignments :: DynFlags -> CmmGraph -> UniqSM CmmGraph -rewriteAssignments dflags g = do - -- Because we need to act on forwards and backwards information, we - -- first perform usage analysis and bake this information into the - -- graph (backwards transform), and then do a forwards transform - -- to actually perform inlining and sinking. - g' <- annotateUsage dflags g - g'' <- liftM fst $ dataflowPassFwd g' [(g_entry g, fact_bot assignmentLattice)] $ - analRewFwd assignmentLattice - (assignmentTransfer dflags) - (assignmentRewrite dflags `thenFwdRw` machOpFoldRewrite dflags) - return (modifyGraph eraseRegUsage g'') - ----------------------------------------------------------------- ---- Usage information - --- We decorate all register assignments with approximate usage --- information, that is, the maximum number of times the register is --- referenced while it is live along all outgoing control paths. --- This analysis provides a precise upper bound for usage, so if a --- register is never referenced, we can remove it, as that assignment is --- dead. --- --- This analysis is very similar to liveness analysis; we just keep a --- little extra info. (Maybe we should move it to CmmLive, and subsume --- the old liveness analysis.) --- --- There are a few subtleties here: --- --- - If a register goes dead, and then becomes live again, the usages --- of the disjoint live range don't count towards the original range. --- --- a = 1; // used once --- b = a; --- a = 2; // used once --- c = a; --- --- - A register may be used multiple times, but these all reside in --- different control paths, such that any given execution only uses --- it once. In that case, the usage count may still be 1. --- --- a = 1; // used once --- if (b) { --- c = a + 3; --- } else { --- c = a + 1; --- } --- --- This policy corresponds to an inlining strategy that does not --- duplicate computation but may increase binary size. --- --- - If we naively implement a usage count, we have a counting to --- infinity problem across joins. Furthermore, knowing that --- something is used 2 or more times in one runtime execution isn't --- particularly useful for optimizations (inlining may be beneficial, --- but there's no way of knowing that without register pressure --- information.) --- --- while (...) { --- // first iteration, b used once --- // second iteration, b used twice --- // third iteration ... --- a = b; --- } --- // b used zero times --- --- There is an orthogonal question, which is that for every runtime --- execution, the register may be used only once, but if we inline it --- in every conditional path, the binary size might increase a lot. --- But tracking this information would be tricky, because it violates --- the finite lattice restriction Hoopl requires for termination; --- we'd thus need to supply an alternate proof, which is probably --- something we should defer until we actually have an optimization --- that would take advantage of this. (This might also interact --- strangely with liveness information.) --- --- a = ...; --- // a is used one time, but in X different paths --- case (b) of --- 1 -> ... a ... --- 2 -> ... a ... --- 3 -> ... a ... --- ... --- --- - Memory stores to local register slots (CmmStore (CmmStackSlot --- (LocalReg _) 0) _) have similar behavior to local registers, --- in that these locations are all disjoint from each other. Thus, --- we attempt to inline them too. Note that because these are only --- generated as part of the spilling process, most of the time this --- will refer to a local register and the assignment will immediately --- die on the subsequent call. However, if we manage to replace that --- local register with a memory location, it means that we've managed --- to preserve a value on the stack without having to move it to --- another memory location again! We collect usage information just --- to be safe in case extra computation is involved. - -data RegUsage = SingleUse | ManyUse - deriving (Ord, Eq, Show) --- Absence in map = ZeroUse - -{- --- minBound is bottom, maxBound is top, least-upper-bound is max --- ToDo: Put this in Hoopl. Note that this isn't as useful as I --- originally hoped, because you usually want to leave out the bottom --- element when you have things like this put in maps. Maybe f is --- useful on its own as a combining function. -boundedOrdLattice :: (Bounded a, Ord a) => String -> DataflowLattice a -boundedOrdLattice n = DataflowLattice n minBound f - where f _ (OldFact x) (NewFact y) - | x >= y = (NoChange, x) - | otherwise = (SomeChange, y) --} - --- Custom node type we'll rewrite to. CmmAssign nodes to local --- registers are replaced with AssignLocal nodes. -data WithRegUsage n e x where - -- Plain will not contain CmmAssign nodes immediately after - -- transformation, but as we rewrite assignments, we may have - -- assignments here: these are assignments that should not be - -- rewritten! - Plain :: n e x -> WithRegUsage n e x - AssignLocal :: LocalReg -> CmmExpr -> RegUsage -> WithRegUsage n O O - -instance UserOfRegs LocalReg (n e x) => UserOfRegs LocalReg (WithRegUsage n e x) where - foldRegsUsed dflags f z (Plain n) = foldRegsUsed dflags f z n - foldRegsUsed dflags f z (AssignLocal _ e _) = foldRegsUsed dflags f z e - -instance DefinerOfRegs LocalReg (n e x) => DefinerOfRegs LocalReg (WithRegUsage n e x) where - foldRegsDefd dflags f z (Plain n) = foldRegsDefd dflags f z n - foldRegsDefd dflags f z (AssignLocal r _ _) = foldRegsDefd dflags f z r - -instance NonLocal n => NonLocal (WithRegUsage n) where - entryLabel (Plain n) = entryLabel n - successors (Plain n) = successors n - -liftRegUsage :: Graph n e x -> Graph (WithRegUsage n) e x -liftRegUsage = mapGraph Plain - -eraseRegUsage :: Graph (WithRegUsage CmmNode) e x -> Graph CmmNode e x -eraseRegUsage = mapGraph f - where f :: WithRegUsage CmmNode e x -> CmmNode e x - f (AssignLocal l e _) = CmmAssign (CmmLocal l) e - f (Plain n) = n - -type UsageMap = UniqFM RegUsage - -usageLattice :: DataflowLattice UsageMap -usageLattice = DataflowLattice "usage counts for registers" emptyUFM (joinUFM f) - where f _ (OldFact x) (NewFact y) - | x >= y = (NoChange, x) - | otherwise = (SomeChange, y) - --- We reuse the names 'gen' and 'kill', although we're doing something --- slightly different from the Dragon Book -usageTransfer :: DynFlags -> BwdTransfer (WithRegUsage CmmNode) UsageMap -usageTransfer dflags = mkBTransfer3 first middle last - where first _ f = f - middle :: WithRegUsage CmmNode O O -> UsageMap -> UsageMap - middle n f = gen_kill n f - last :: WithRegUsage CmmNode O C -> FactBase UsageMap -> UsageMap - -- Checking for CmmCall/CmmForeignCall is unnecessary, because - -- spills/reloads have already occurred by the time we do this - -- analysis. - -- XXX Deprecated warning is puzzling: what label are we - -- supposed to use? - -- ToDo: With a bit more cleverness here, we can avoid - -- disappointment and heartbreak associated with the inability - -- to inline into CmmCall and CmmForeignCall by - -- over-estimating the usage to be ManyUse. - last n f = gen_kill n (joinOutFacts usageLattice n f) - gen_kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap - gen_kill a = gen a . kill a - gen :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap - gen a f = foldLocalRegsUsed dflags increaseUsage f a - kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap - kill a f = foldLocalRegsDefd dflags delFromUFM f a - increaseUsage f r = addToUFM_C combine f r SingleUse - where combine _ _ = ManyUse - -usageRewrite :: BwdRewrite UniqSM (WithRegUsage CmmNode) UsageMap -usageRewrite = mkBRewrite3 first middle last - where first _ _ = return Nothing - middle :: Monad m => WithRegUsage CmmNode O O -> UsageMap -> m (Maybe (Graph (WithRegUsage CmmNode) O O)) - middle (Plain (CmmAssign (CmmLocal l) e)) f - = return . Just - $ case lookupUFM f l of - Nothing -> emptyGraph - Just usage -> mkMiddle (AssignLocal l e usage) - middle _ _ = return Nothing - last _ _ = return Nothing - -type CmmGraphWithRegUsage = GenCmmGraph (WithRegUsage CmmNode) -annotateUsage :: DynFlags -> CmmGraph -> UniqSM (CmmGraphWithRegUsage) -annotateUsage dflags vanilla_g = - let g = modifyGraph liftRegUsage vanilla_g - in liftM fst $ dataflowPassBwd g [(g_entry g, fact_bot usageLattice)] $ - analRewBwd usageLattice (usageTransfer dflags) usageRewrite - ----------------------------------------------------------------- ---- Assignment tracking - --- The idea is to maintain a map of local registers do expressions, --- such that the value of that register is the same as the value of that --- expression at any given time. We can then do several things, --- as described by Assignment. - --- Assignment describes the various optimizations that are valid --- at a given point in the program. -data Assignment = --- This assignment can always be inlined. It is cheap or single-use. - AlwaysInline CmmExpr --- This assignment should be sunk down to its first use. (This will --- increase code size if the register is used in multiple control flow --- paths, but won't increase execution time, and the reduction of --- register pressure is worth it, I think.) - | AlwaysSink CmmExpr --- We cannot safely optimize occurrences of this local register. (This --- corresponds to top in the lattice structure.) - | NeverOptimize - --- Extract the expression that is being assigned to -xassign :: Assignment -> Maybe CmmExpr -xassign (AlwaysInline e) = Just e -xassign (AlwaysSink e) = Just e -xassign NeverOptimize = Nothing - --- Extracts the expression, but only if they're the same constructor -xassign2 :: (Assignment, Assignment) -> Maybe (CmmExpr, CmmExpr) -xassign2 (AlwaysInline e, AlwaysInline e') = Just (e, e') -xassign2 (AlwaysSink e, AlwaysSink e') = Just (e, e') -xassign2 _ = Nothing - --- Note: We'd like to make decisions about "not optimizing" as soon as --- possible, because this will make running the transfer function more --- efficient. -type AssignmentMap = UniqFM Assignment - -assignmentLattice :: DataflowLattice AssignmentMap -assignmentLattice = DataflowLattice "assignments for registers" emptyUFM (joinUFM add) - where add _ (OldFact old) (NewFact new) - = case (old, new) of - (NeverOptimize, _) -> (NoChange, NeverOptimize) - (_, NeverOptimize) -> (SomeChange, NeverOptimize) - (xassign2 -> Just (e, e')) - | e == e' -> (NoChange, old) - | otherwise -> (SomeChange, NeverOptimize) - _ -> (SomeChange, NeverOptimize) - --- Deletes sinks from assignment map, because /this/ is the place --- where it will be sunk to. -deleteSinks :: UserOfRegs LocalReg n => DynFlags -> n -> AssignmentMap -> AssignmentMap -deleteSinks dflags n m = foldLocalRegsUsed dflags (adjustUFM f) m n - where f (AlwaysSink _) = NeverOptimize - f old = old - --- Invalidates any expressions that use a register. -invalidateUsersOf :: CmmReg -> AssignmentMap -> AssignmentMap --- foldUFM_Directly :: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a -invalidateUsersOf reg m = foldUFM_Directly f m m -- [foldUFM performance] - where f u (xassign -> Just e) m | reg `regUsedIn` e = addToUFM_Directly m u NeverOptimize - f _ _ m = m -{- This requires the entire spine of the map to be continually rebuilt, - - which causes crazy memory usage! -invalidateUsersOf reg = mapUFM (invalidateUsers' reg) - where invalidateUsers' reg (xassign -> Just e) | reg `regUsedIn` e = NeverOptimize - invalidateUsers' _ old = old --} - --- Note [foldUFM performance] --- These calls to fold UFM no longer leak memory, but they do cause --- pretty killer amounts of allocation. So they'll be something to --- optimize; we need an algorithmic change to prevent us from having to --- traverse the /entire/ map continually. - -middleAssignment :: DynFlags -> WithRegUsage CmmNode O O -> AssignmentMap - -> AssignmentMap - --- Algorithm for annotated assignments: --- 1. Delete any sinking assignments that were used by this instruction --- 2. Add the assignment to our list of valid local assignments with --- the correct optimization policy. --- 3. Look for all assignments that reference that register and --- invalidate them. -middleAssignment dflags n@(AssignLocal r e usage) assign - = invalidateUsersOf (CmmLocal r) . add . deleteSinks dflags n $ assign - where add m = addToUFM m r - $ case usage of - SingleUse -> AlwaysInline e - ManyUse -> decide e - decide CmmLit{} = AlwaysInline e - decide CmmReg{} = AlwaysInline e - decide CmmLoad{} = AlwaysSink e - decide CmmStackSlot{} = AlwaysSink e - decide CmmMachOp{} = AlwaysSink e - -- We'll always inline simple operations on the global - -- registers, to reduce register pressure: Sp - 4 or Hp - 8 - -- EZY: Justify this optimization more carefully. - decide CmmRegOff{} = AlwaysInline e - --- Algorithm for unannotated assignments of global registers: --- 1. Delete any sinking assignments that were used by this instruction --- 2. Look for all assignments that reference this register and --- invalidate them. -middleAssignment dflags (Plain n@(CmmAssign reg@(CmmGlobal _) _)) assign - = invalidateUsersOf reg . deleteSinks dflags n $ assign - --- Algorithm for unannotated assignments of *local* registers: do --- nothing (it's a reload, so no state should have changed) -middleAssignment _ (Plain (CmmAssign (CmmLocal _) _)) assign = assign - --- Algorithm for stores: --- 1. Delete any sinking assignments that were used by this instruction --- 2. Look for all assignments that load from memory locations that --- were clobbered by this store and invalidate them. -middleAssignment dflags (Plain n@(CmmStore lhs rhs)) assign - = let m = deleteSinks dflags n assign - in foldUFM_Directly f m m -- [foldUFM performance] - where f u (xassign -> Just x) m | clobbers dflags (lhs, rhs) (u, x) = addToUFM_Directly m u NeverOptimize - f _ _ m = m -{- Also leaky - = mapUFM_Directly p . deleteSinks n $ assign - -- ToDo: There's a missed opportunity here: even if a memory - -- access we're attempting to sink gets clobbered at some - -- location, it's still /better/ to sink it to right before the - -- point where it gets clobbered. How might we do this? - -- Unfortunately, it's too late to change the assignment... - where p r (xassign -> Just x) | (lhs, rhs) `clobbers` (r, x) = NeverOptimize - p _ old = old --} - --- Assumption: Unsafe foreign calls don't clobber memory --- Since foreign calls clobber caller saved registers, we need --- invalidate any assignments that reference those global registers. --- This is kind of expensive. (One way to optimize this might be to --- store extra information about expressions that allow this and other --- checks to be done cheaply.) -middleAssignment dflags (Plain n@(CmmUnsafeForeignCall{})) assign - = deleteCallerSaves (foldLocalRegsDefd dflags (\m r -> addToUFM m r NeverOptimize) (deleteSinks dflags n assign) n) - where deleteCallerSaves m = foldUFM_Directly f m m - f u (xassign -> Just x) m | wrapRecExpf g x False = addToUFM_Directly m u NeverOptimize - f _ _ m = m - g (CmmReg (CmmGlobal r)) _ | callerSaves platform r = True - g (CmmRegOff (CmmGlobal r) _) _ | callerSaves platform r = True - g _ b = b - platform = targetPlatform dflags - -middleAssignment _ (Plain (CmmComment {})) assign - = assign - --- Assumptions: --- * Writes using Hp do not overlap with any other memory locations --- (An important invariant being relied on here is that we only ever --- use Hp to allocate values on the heap, which appears to be the --- case given hpReg usage, and that our heap writing code doesn't --- do anything stupid like overlapping writes.) --- * Stack slots do not overlap with any other memory locations --- * Stack slots for different areas do not overlap --- * Stack slots within the same area and different offsets may --- overlap; we need to do a size check (see 'overlaps'). --- * Register slots only overlap with themselves. (But this shouldn't --- happen in practice, because we'll fail to inline a reload across --- the next spill.) --- * Non stack-slot stores always conflict with each other. (This is --- not always the case; we could probably do something special for Hp) -clobbers :: DynFlags - -> (CmmExpr, CmmExpr) -- (lhs, rhs) of clobbering CmmStore - -> (Unique, CmmExpr) -- (register, expression) that may be clobbered - -> Bool -clobbers _ (CmmRegOff (CmmGlobal Hp) _, _) (_, _) = False -clobbers _ (CmmReg (CmmGlobal Hp), _) (_, _) = False --- ToDo: Also catch MachOp case -clobbers _ (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _) - | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?) -clobbers dflags (CmmStackSlot a o, rhs) (_, expr) = f expr - where f (CmmLoad (CmmStackSlot a' o') t) - = (a, o, widthInBytes (cmmExprWidth dflags rhs)) `overlaps` (a', o', widthInBytes (typeWidth t)) - f (CmmLoad e _) = containsStackSlot e - f (CmmMachOp _ es) = or (map f es) - f _ = False - -- Maybe there's an invariant broken if this actually ever - -- returns True - containsStackSlot (CmmLoad{}) = True -- load of a load, all bets off - containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es) - containsStackSlot (CmmStackSlot{}) = True - containsStackSlot _ = False -clobbers _ _ (_, e) = f e - where f (CmmLoad (CmmStackSlot _ _) _) = False - f (CmmLoad{}) = True -- conservative - f (CmmMachOp _ es) = or (map f es) - f _ = False - --- Check for memory overlapping. --- Diagram: --- 4 8 12 --- s -w- o --- [ I32 ] --- [ F64 ] --- s' -w'- o' -type CallSubArea = (Area, Int, Int) -- area, offset, width -overlaps :: CallSubArea -> CallSubArea -> Bool -overlaps (a, _, _) (a', _, _) | a /= a' = False -overlaps (_, o, w) (_, o', w') = - let s = o - w - s' = o' - w' - in (s' < o) && (s < o) -- Not LTE, because [ I32 ][ I32 ] is OK - -lastAssignment :: DynFlags -> WithRegUsage CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)] -lastAssignment _ (Plain (CmmCall _ (Just k) _ _ _ _)) assign = [(k, invalidateVolatile k assign)] -lastAssignment _ (Plain (CmmForeignCall {succ=k})) assign = [(k, invalidateVolatile k assign)] -lastAssignment dflags l assign = map (\id -> (id, deleteSinks dflags l assign)) $ successors l - --- Invalidates any expressions that have volatile contents: essentially, --- all terminals volatile except for literals and loads of stack slots --- that do not correspond to the call area for 'k' (the current call --- area is volatile because overflow return parameters may be written --- there.) --- Note: mapUFM could be expensive, but hopefully block boundaries --- aren't too common. If it is a problem, replace with something more --- clever. -invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap -invalidateVolatile k m = mapUFM p m - where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize - where exp CmmLit{} = True - exp (CmmLoad (CmmStackSlot (Young k') _) _) - | k' == k = False - exp (CmmLoad (CmmStackSlot _ _) _) = True - exp (CmmMachOp _ es) = and (map exp es) - exp _ = False - p _ = NeverOptimize -- probably shouldn't happen with AlwaysSink - -assignmentTransfer :: DynFlags - -> FwdTransfer (WithRegUsage CmmNode) AssignmentMap -assignmentTransfer dflags - = mkFTransfer3 (flip const) - (middleAssignment dflags) - ((mkFactBase assignmentLattice .) . lastAssignment dflags) - --- Note [Soundness of inlining] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- In the Hoopl paper, the soundness condition on rewrite functions is --- described as follows: --- --- "If it replaces a node n by a replacement graph g, then g must --- be observationally equivalent to n under the assumptions --- expressed by the incoming dataflow fact f. Moreover, analysis of --- g must produce output fact(s) that are at least as informative --- as the fact(s) produced by applying the transfer function to n." --- --- We consider the second condition in more detail here. It says given --- the rewrite R(n, f) = g, then for any incoming fact f' consistent --- with f (f' >= f), then running the transfer function T(f', n) <= T(f', g). --- For inlining this is not necessarily the case: --- --- n = "x = a + 2" --- f = f' = {a = y} --- g = "x = y + 2" --- T(f', n) = {x = a + 2, a = y} --- T(f', g) = {x = y + 2, a = y} --- --- y + 2 and a + 2 are not obviously comparable, and a naive --- implementation of the lattice would say they are incomparable. --- At best, this means we may be over-conservative, at worst, it means --- we may not terminate. --- --- However, in the original Lerner-Grove-Chambers paper, soundness and --- termination are separated, and only equivalence of facts is required --- for soundness. Monotonicity of the transfer function is not required --- for termination (as the calculation of least-upper-bound prevents --- this from being a problem), but it means we won't necessarily find --- the least-fixed point. - --- Note [Coherency of annotations] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- Is it possible for our usage annotations to become invalid after we --- start performing transformations? As the usage info only provides --- an upper bound, we only need to consider cases where the usages of --- a register may increase due to transformations--e.g. any reference --- to a local register in an AlwaysInline or AlwaysSink instruction, whose --- originating assignment was single use (we don't care about the --- many use case, because it is the top of the lattice). But such a --- case is not possible, because we always inline any single use --- register. QED. --- --- TODO: A useful lint option would be to check this invariant that --- there is never a local register in the assignment map that is --- single-use. - --- Note [Soundness of store rewriting] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- Its soundness depends on the invariant that no assignment is made to --- the local register before its store is accessed. This is clearly --- true with unoptimized spill-reload code, and as the store will always --- be rewritten first (if possible), there is no chance of it being --- propagated down before getting written (possibly with incorrect --- values from the assignment map, due to reassignment of the local --- register.) This is probably not locally sound. - -assignmentRewrite :: DynFlags -> FwdRewrite UniqSM (WithRegUsage CmmNode) AssignmentMap -assignmentRewrite dflags = mkFRewrite3 first middle last - where - first _ _ = return Nothing - middle :: WithRegUsage CmmNode O O -> AssignmentMap -> GenCmmReplGraph (WithRegUsage CmmNode) O O - middle (Plain m) assign = return $ rewrite assign (precompute assign m) mkMiddle m - middle (AssignLocal l e u) assign = return $ rewriteLocal assign (precompute assign (CmmAssign (CmmLocal l) e)) l e u - last (Plain l) assign = return $ rewrite assign (precompute assign l) mkLast l - -- Tuple is (inline?, reloads for sinks) - precompute :: AssignmentMap -> CmmNode O x -> (Bool, [WithRegUsage CmmNode O O]) - precompute assign n = foldLocalRegsUsed dflags f (False, []) n -- duplicates are harmless - where f (i, l) r = case lookupUFM assign r of - Just (AlwaysSink e) -> (i, (Plain (CmmAssign (CmmLocal r) e)):l) - Just (AlwaysInline _) -> (True, l) - Just NeverOptimize -> (i, l) - -- This case can show up when we have - -- limited optimization fuel. - Nothing -> (i, l) - rewrite :: AssignmentMap - -> (Bool, [WithRegUsage CmmNode O O]) - -> (WithRegUsage CmmNode O x -> Graph (WithRegUsage CmmNode) O x) - -> CmmNode O x - -> Maybe (Graph (WithRegUsage CmmNode) O x) - rewrite _ (False, []) _ _ = Nothing - -- Note [CmmCall Inline Hack] - -- Conservative hack: don't do any inlining on what will - -- be translated into an OldCmm CmmCalls, since the code - -- produced here tends to be unproblematic and I need to write - -- lint passes to ensure that we don't put anything in the - -- arguments that could be construed as a global register by - -- some later translation pass. (For example, slots will turn - -- into dereferences of Sp). See [Register parameter passing]. - -- ToDo: Fix this up to only bug out if all inlines were for - -- CmmExprs with global registers (we can't use the - -- straightforward mapExpDeep call, in this case.) ToDo: We miss - -- an opportunity here, where all possible inlinings should - -- instead be sunk. - rewrite _ (True, []) _ n | not (inlinable n) = Nothing -- see [CmmCall Inline Hack] - rewrite assign (i, xs) mk n = Just $ mkMiddles xs <*> mk (Plain (inline i assign n)) - - rewriteLocal :: AssignmentMap - -> (Bool, [WithRegUsage CmmNode O O]) - -> LocalReg -> CmmExpr -> RegUsage - -> Maybe (Graph (WithRegUsage CmmNode) O O) - rewriteLocal _ (False, []) _ _ _ = Nothing - rewriteLocal assign (i, xs) l e u = Just $ mkMiddles xs <*> mkMiddle n' - where n' = AssignLocal l e' u - e' = if i then wrapRecExp (inlineExp assign) e else e - -- inlinable check omitted, since we can always inline into - -- assignments. - - inline :: Bool -> AssignmentMap -> CmmNode e x -> CmmNode e x - inline False _ n = n - inline True _ n | not (inlinable n) = n -- see [CmmCall Inline Hack] - inline True assign n = mapExpDeep (inlineExp assign) n - - inlineExp assign old@(CmmReg (CmmLocal r)) - = case lookupUFM assign r of - Just (AlwaysInline x) -> x - _ -> old - inlineExp assign old@(CmmRegOff (CmmLocal r) i) - = case lookupUFM assign r of - Just (AlwaysInline x) -> - case x of - (CmmRegOff r' i') -> CmmRegOff r' (i + i') - _ -> CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)] - where rep = typeWidth (localRegType r) - _ -> old - -- See Note [Soundness of store rewriting] - inlineExp _ old = old - - inlinable :: CmmNode e x -> Bool - inlinable (CmmCall{}) = False - inlinable (CmmForeignCall{}) = False - inlinable (CmmUnsafeForeignCall{}) = False - inlinable _ = True - --- Need to interleave this with inlining, because machop folding results --- in literals, which we can inline more aggressively, and inlining --- gives us opportunities for more folding. However, we don't need any --- facts to do MachOp folding. -machOpFoldRewrite :: DynFlags -> FwdRewrite UniqSM (WithRegUsage CmmNode) a -machOpFoldRewrite dflags = mkFRewrite3 first middle last - where first _ _ = return Nothing - middle :: WithRegUsage CmmNode O O -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O O - middle (Plain m) _ = return (fmap (mkMiddle . Plain) (foldNode m)) - middle (AssignLocal l e r) _ = return (fmap f (wrapRecExpM foldExp e)) - where f e' = mkMiddle (AssignLocal l e' r) - last :: WithRegUsage CmmNode O C -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O C - last (Plain l) _ = return (fmap (mkLast . Plain) (foldNode l)) - foldNode :: CmmNode e x -> Maybe (CmmNode e x) - foldNode n = mapExpDeepM foldExp n - foldExp (CmmMachOp op args) = cmmMachOpFoldM dflags op args - foldExp _ = Nothing - --- ToDo: Outputable instance for UsageMap and AssignmentMap diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index a5d9785a43..f70a8e4b30 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -183,7 +183,6 @@ Library CmmOpt CmmParse CmmProcPoint - CmmRewriteAssignments CmmSink CmmType CmmUtils |