summaryrefslogtreecommitdiff
path: root/compiler/cmm
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/cmm')
-rw-r--r--compiler/cmm/BlockId.hs6
-rw-r--r--compiler/cmm/Cmm.hs10
-rw-r--r--compiler/cmm/CmmBuildInfoTables.hs255
-rw-r--r--compiler/cmm/CmmCommonBlockElim.hs136
-rw-r--r--compiler/cmm/CmmContFlowOpt.hs289
-rw-r--r--compiler/cmm/CmmCvt.hs22
-rw-r--r--compiler/cmm/CmmExpr.hs125
-rw-r--r--compiler/cmm/CmmInfo.hs17
-rw-r--r--compiler/cmm/CmmLayoutStack.hs1045
-rw-r--r--compiler/cmm/CmmLint.hs227
-rw-r--r--compiler/cmm/CmmLive.hs54
-rw-r--r--compiler/cmm/CmmNode.hs79
-rw-r--r--compiler/cmm/CmmOpt.hs121
-rw-r--r--compiler/cmm/CmmParse.y42
-rw-r--r--compiler/cmm/CmmPipeline.hs165
-rw-r--r--compiler/cmm/CmmProcPoint.hs329
-rw-r--r--compiler/cmm/CmmRewriteAssignments.hs17
-rw-r--r--compiler/cmm/CmmSpillReload.hs166
-rw-r--r--compiler/cmm/CmmUtils.hs120
-rw-r--r--compiler/cmm/Hoopl.hs124
-rw-r--r--compiler/cmm/Hoopl/Dataflow.hs890
-rw-r--r--compiler/cmm/MkGraph.hs531
-rw-r--r--compiler/cmm/OldCmm.hs22
-rw-r--r--compiler/cmm/OldCmmLint.hs209
-rw-r--r--compiler/cmm/OldPprCmm.hs19
-rw-r--r--compiler/cmm/OptimizationFuel.hs77
-rw-r--r--compiler/cmm/PprCmm.hs2
-rw-r--r--compiler/cmm/PprCmmExpr.hs8
-rw-r--r--compiler/cmm/SMRep.lhs6
-rw-r--r--compiler/cmm/cmm-notes41
30 files changed, 3377 insertions, 1777 deletions
diff --git a/compiler/cmm/BlockId.hs b/compiler/cmm/BlockId.hs
index feeacb553d..d5a8e045bf 100644
--- a/compiler/cmm/BlockId.hs
+++ b/compiler/cmm/BlockId.hs
@@ -31,15 +31,9 @@ compilation unit in which it appears.
type BlockId = Hoopl.Label
-instance Uniquable BlockId where
- getUnique label = getUnique (uniqueToInt $ lblToUnique label)
-
mkBlockId :: Unique -> BlockId
mkBlockId unique = uniqueToLbl $ intToUnique $ getKey unique
-instance Outputable BlockId where
- ppr label = ppr (getUnique label)
-
retPtLbl :: BlockId -> CLabel
retPtLbl label = mkReturnPtLabel $ getUnique label
diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs
index f1318c1dc9..d70fd8c835 100644
--- a/compiler/cmm/Cmm.hs
+++ b/compiler/cmm/Cmm.hs
@@ -69,8 +69,6 @@ type CmmGroup = GenCmmGroup CmmStatics CmmTopInfo CmmGraph
-- (a) C--, i.e. populated with various C-- constructs
-- (Cmm and RawCmm in OldCmm.hs)
-- (b) Native code, populated with data/instructions
---
--- A second family of instances based on Hoopl is in Cmm.hs.
-- | A top-level chunk, abstracted over the type of the contents of
-- the basic blocks (Cmm or instructions are the likely instantiations).
@@ -103,11 +101,15 @@ type CmmBwdRewrite f = BwdRewrite FuelUniqSM CmmNode f
-- Info Tables
-----------------------------------------------------------------------------
-data CmmTopInfo = TopInfo {info_tbl :: CmmInfoTable, stack_info :: CmmStackInfo}
+data CmmTopInfo = TopInfo { info_tbl :: CmmInfoTable
+ , stack_info :: CmmStackInfo }
data CmmStackInfo
= StackInfo {
- arg_space :: ByteOff, -- XXX: comment?
+ arg_space :: ByteOff,
+ -- number of bytes of arguments on the stack on entry to the
+ -- the proc. This is filled in by StgCmm.codeGen, and used
+ -- by the stack allocator later.
updfr_space :: Maybe ByteOff -- XXX: comment?
}
diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs
index 011947f55d..2378988b68 100644
--- a/compiler/cmm/CmmBuildInfoTables.hs
+++ b/compiler/cmm/CmmBuildInfoTables.hs
@@ -14,12 +14,10 @@
{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
module CmmBuildInfoTables
( CAFSet, CAFEnv, cafAnal, localCAFInfo, mkTopCAFInfo
- , setInfoTableSRT, setInfoTableStackMap
+ , setInfoTableSRT
, TopSRT, emptySRT, srtToData
, bundleCAFs
- , lowerSafeForeignCalls
- , cafTransfers, liveSlotTransfers
- , mkLiveness )
+ , cafTransfers )
where
#include "HsVersions.h"
@@ -39,7 +37,6 @@ import Bitmap
import CLabel
import Cmm
import CmmUtils
-import CmmStackLayout
import Module
import FastString
import ForeignCall
@@ -54,129 +51,25 @@ import Outputable
import SMRep
import UniqSupply
-import Compiler.Hoopl
+import Hoopl
import Data.Map (Map)
import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
import qualified FiniteMap as Map
+#if __GLASGOW_HASKELL__ < 704
+foldSet = Set.fold
+#else
+foldSet = Set.foldr
+#endif
+
----------------------------------------------------------------
-- Building InfoTables
-----------------------------------------------------------------------
--- Stack Maps
-
--- Given a block ID, we return a representation of the layout of the stack,
--- as suspended before entering that block.
--- (For a return site to a function call, the layout does not include the
--- parameter passing area (or the "return address" on the stack)).
--- If the element is `Nothing`, then it represents a word of the stack that
--- does not contain a live pointer.
--- If the element is `Just` a register, then it represents a live spill slot
--- for a pointer; we assume that a pointer is the size of a word.
--- The head of the list represents the young end of the stack where the infotable
--- pointer for the block `Bid` is stored.
--- The infotable pointer itself is not included in the list.
--- Call areas are also excluded from the list: besides the stuff in the update
--- frame (and the return infotable), call areas should never be live across
--- function calls.
-
--- RTS Invariant: All pointers must be word-aligned because each bit in the bitmap
--- represents a word. Consequently, we have to be careful when we see a live slot
--- on the stack: if we have packed multiple sub-word values into a word,
--- we have to make sure that we only mark the entire word as a non-pointer.
-
--- Also, don't forget to stop at the old end of the stack (oldByte),
--- which may differ depending on whether there is an update frame.
-
-type RegSlotInfo
- = ( Int -- Offset from oldest byte of Old area
- , LocalReg -- The register
- , Int) -- Width of the register
-
-live_ptrs :: ByteOff -> BlockEnv SubAreaSet -> AreaMap -> BlockId -> StackLayout
-live_ptrs oldByte slotEnv areaMap bid =
- -- pprTrace "live_ptrs for" (ppr bid <+> text (show oldByte ++ "-" ++ show youngByte) <+>
- -- ppr liveSlots) $
- -- pprTrace ("stack layout for " ++ show bid ++ ": ") (ppr res) $ res
- res
- where
- res = mkLiveness (reverse $ slotsToList youngByte liveSlots [])
-
- slotsToList :: Int -> [RegSlotInfo] -> [Maybe LocalReg] -> [Maybe LocalReg]
- -- n starts at youngByte and is decremented down to oldByte
- -- Returns a list, one element per word, with
- -- (Just r) meaning 'pointer register r is saved here',
- -- Nothing meaning 'non-pointer or empty'
-
- slotsToList n [] results | n == oldByte = results -- at old end of stack frame
-
- slotsToList n (s : _) _ | n == oldByte =
- pprPanic "slot left off live_ptrs" (ppr s <+> ppr oldByte <+>
- ppr n <+> ppr liveSlots <+> ppr youngByte)
-
- slotsToList n _ _ | n < oldByte =
- panic "stack slots not allocated on word boundaries?"
-
- slotsToList n l@((n', r, w) : rst) results =
- if n == (n' + w) then -- slot's young byte is at n
- ASSERT (not (isPtr r) ||
- (n `mod` wORD_SIZE == 0 && w == wORD_SIZE)) -- ptrs must be aligned
- slotsToList next (dropWhile (non_ptr_younger_than next) rst)
- (stack_rep : results)
- else slotsToList next (dropWhile (non_ptr_younger_than next) l)
- (Nothing : results)
- where next = n - wORD_SIZE
- stack_rep = if isPtr r then Just r else Nothing
-
- slotsToList n [] results = slotsToList (n - wORD_SIZE) [] (Nothing : results)
-
- non_ptr_younger_than next (n', r, w) =
- n' + w > next &&
- ASSERT (not (isPtr r))
- True
- isPtr = isGcPtrType . localRegType
-
- liveSlots :: [RegSlotInfo]
- liveSlots = sortBy (\ (off,_,_) (off',_,_) -> compare off' off)
- (Map.foldRightWithKey (\_ -> flip $ foldl add_slot) [] slots)
-
- add_slot :: [RegSlotInfo] -> SubArea -> [RegSlotInfo]
- add_slot rst (a@(RegSlot r@(LocalReg _ ty)), off, w) =
- if off == w && widthInBytes (typeWidth ty) == w then
- (expectJust "add_slot" (Map.lookup a areaMap), r, w) : rst
- else panic "live_ptrs: only part of a variable live at a proc point"
- add_slot rst (CallArea Old, _, _) =
- rst -- the update frame (or return infotable) should be live
- -- would be nice to check that only that part of the callarea is live...
- add_slot rst ((CallArea _), _, _) =
- rst
- -- JD: THIS ISN'T CURRENTLY A CORRECTNESS PROBLEM, BUT WE SHOULD REALLY
- -- MAKE LIVENESS INFO AROUND CALLS MORE PRECISE -- FOR NOW, A 32-BIT
- -- FLOAT PADS OUT TO 64 BITS, BUT WE ASSUME THE WHOLE PARAMETER-PASSING
- -- AREA IS LIVE (WHICH IT ISN'T...). WE SHOULD JUST PUT THE LIVE AREAS
- -- IN THE CALL NODES, WHICH SHOULD EVENTUALLY HAVE LIVE REGISTER AS WELL,
- -- SO IT'S ALL GOING IN THE SAME DIRECTION.
- -- pprPanic "CallAreas must not be live across function calls" (ppr bid <+> ppr c)
-
- slots :: SubAreaSet -- The SubAreaSet for 'bid'
- slots = expectJust "live_ptrs slots" $ mapLookup bid slotEnv
- youngByte = expectJust "live_ptrs bid_pos" $ Map.lookup (CallArea (Young bid)) areaMap
-
--- Construct the stack maps for a procedure _if_ it needs an infotable.
--- When wouldn't a procedure need an infotable? If it is a procpoint that
--- is not the successor of a call.
-setInfoTableStackMap :: SlotEnv -> AreaMap -> CmmDecl -> CmmDecl
-setInfoTableStackMap slotEnv areaMap
- t@(CmmProc (TopInfo {stack_info=StackInfo {updfr_space = Just updfr_off}}) _
- (CmmGraph {g_entry = eid}))
- = updInfo (const (live_ptrs updfr_off slotEnv areaMap eid)) id t
-setInfoTableStackMap _ _ t = t
-
-
-
------------------------------------------------------------------------
-- SRTs
-- WE NEED AN EXAMPLE HERE.
@@ -191,14 +84,14 @@ setInfoTableStackMap _ _ t = t
-----------------------------------------------------------------------
-- Finding the CAFs used by a procedure
-type CAFSet = Map CLabel ()
+type CAFSet = Set CLabel
type CAFEnv = BlockEnv CAFSet
-- First, an analysis to find live CAFs.
cafLattice :: DataflowLattice CAFSet
-cafLattice = DataflowLattice "live cafs" Map.empty add
- where add _ (OldFact old) (NewFact new) = case old `Map.union` new of
- new' -> (changeIf $ Map.size new' > Map.size old, new')
+cafLattice = DataflowLattice "live cafs" Set.empty add
+ where add _ (OldFact old) (NewFact new) = case old `Set.union` new of
+ new' -> (changeIf $ Set.size new' > Set.size old, new')
cafTransfers :: BwdTransfer CmmNode CAFSet
cafTransfers = mkBTransfer3 first middle last
@@ -210,11 +103,11 @@ cafTransfers = mkBTransfer3 first middle last
CmmLit (CmmLabelOff c _) -> add c set
CmmLit (CmmLabelDiffOff c1 c2 _) -> add c1 $ add c2 set
_ -> set
- add l s = if hasCAF l then Map.insert (toClosureLbl l) () s
+ add l s = if hasCAF l then Set.insert (toClosureLbl l) s
else s
-cafAnal :: CmmGraph -> FuelUniqSM CAFEnv
-cafAnal g = liftM snd $ dataflowPassBwd g [] $ analBwd cafLattice cafTransfers
+cafAnal :: Platform -> CmmGraph -> CAFEnv
+cafAnal g = dataflowAnalBwd g [] $ analBwd cafLattice cafTransfers
-----------------------------------------------------------------------
-- Building the SRTs
@@ -266,13 +159,13 @@ srtToData srt = [CmmData RelocatableReadOnlyData (Statics (lbl srt) tbl)]
buildSRTs :: TopSRT -> Map CLabel CAFSet -> CAFSet ->
FuelUniqSM (TopSRT, Maybe CmmDecl, C_SRT)
buildSRTs topSRT topCAFMap cafs =
- do let liftCAF lbl () z = -- get CAFs for functions without static closures
- case Map.lookup lbl topCAFMap of Just cafs -> z `Map.union` cafs
- Nothing -> Map.insert lbl () z
+ do let liftCAF lbl z = -- get CAFs for functions without static closures
+ case Map.lookup lbl topCAFMap of Just cafs -> z `Set.union` cafs
+ Nothing -> Set.insert lbl z
-- For each label referring to a function f without a static closure,
-- replace it with the CAFs that are reachable from f.
sub_srt topSRT localCafs =
- let cafs = Map.keys (Map.foldRightWithKey liftCAF Map.empty localCafs)
+ let cafs = Set.elems (foldSet liftCAF Set.empty localCafs)
mkSRT topSRT =
do localSRTs <- procpointSRT (lbl topSRT) (elt_map topSRT) cafs
return (topSRT, localSRTs)
@@ -373,21 +266,21 @@ mkTopCAFInfo localCAFs = foldl addToTop Map.empty g
Map.insert l (flatten env cafset) env
addToTop env (CyclicSCC nodes) =
let (lbls, cafsets) = unzip nodes
- cafset = lbls `Map.deleteList` foldl Map.union Map.empty cafsets
+ cafset = foldr Set.delete (foldl Set.union Set.empty cafsets) lbls
in foldl (\env l -> Map.insert l (flatten env cafset) env) env lbls
- flatten env cafset = Map.foldRightWithKey (lookup env) Map.empty cafset
- lookup env caf () cafset' =
- case Map.lookup caf env of Just cafs -> Map.foldRightWithKey add cafset' cafs
- Nothing -> add caf () cafset'
- add caf () cafset' = Map.insert caf () cafset'
+ flatten env cafset = foldSet (lookup env) Set.empty cafset
+ lookup env caf cafset' =
+ case Map.lookup caf env of Just cafs -> foldSet add cafset' cafs
+ Nothing -> add caf cafset'
+ add caf cafset' = Set.insert caf cafset'
g = stronglyConnCompFromEdgedVertices
- (map (\n@(l, cafs) -> (n, l, Map.keys cafs)) localCAFs)
+ (map (\n@(l, cafs) -> (n, l, Set.elems cafs)) localCAFs)
-- Bundle the CAFs used at a procpoint.
bundleCAFs :: CAFEnv -> CmmDecl -> (CAFSet, CmmDecl)
bundleCAFs cafEnv t@(CmmProc _ _ (CmmGraph {g_entry=entry})) =
(expectJust "bundleCAFs" (mapLookup entry cafEnv), t)
-bundleCAFs _ t = (Map.empty, t)
+bundleCAFs _ t = (Set.empty, t)
-- Construct the SRTs for the given procedure.
setInfoTableSRT :: Map CLabel CAFSet -> TopSRT -> (CAFSet, CmmDecl) ->
@@ -418,91 +311,3 @@ updInfoTbl toVars toSrt info_tbl@(CmmInfoTable {})
StackRep ls -> StackRep (toVars ls)
other -> other }
updInfoTbl _ _ t@CmmNonInfoTable = t
-
-----------------------------------------------------------------
--- Safe foreign calls: We need to insert the code that suspends and resumes
--- the thread before and after a safe foreign call.
--- Why do we do this so late in the pipeline?
--- Because we need this code to appear without interrruption: you can't rely on the
--- value of the stack pointer between the call and resetting the thread state;
--- you need to have an infotable on the young end of the stack both when
--- suspending the thread and making the foreign call.
--- All of this is much easier if we insert the suspend and resume calls here.
-
--- At the same time, we prepare for the stages of the compiler that
--- build the proc points. We have to do this at the same time because
--- the safe foreign calls need special treatment with respect to infotables.
--- A safe foreign call needs an infotable even though it isn't
--- a procpoint. The following datatype captures the information
--- needed to generate the infotables along with the Cmm data and procedures.
-
--- JD: Why not do this while splitting procedures?
-lowerSafeForeignCalls :: AreaMap -> CmmDecl -> FuelUniqSM CmmDecl
-lowerSafeForeignCalls _ t@(CmmData _ _) = return t
-lowerSafeForeignCalls areaMap (CmmProc info l g@(CmmGraph {g_entry=entry})) = do
- let block b mblocks = mblocks >>= lowerSafeCallBlock entry areaMap b
- blocks <- foldGraphBlocks block (return mapEmpty) g
- return $ CmmProc info l (ofBlockMap entry blocks)
-
--- If the block ends with a safe call in the block, lower it to an unsafe
--- call (with appropriate saves and restores before and after).
-lowerSafeCallBlock :: BlockId -> AreaMap -> CmmBlock -> BlockEnv CmmBlock
- -> FuelUniqSM (BlockEnv CmmBlock)
-lowerSafeCallBlock entry areaMap b blocks =
- case blockToNodeList b of
- (JustC (CmmEntry id), m, JustC l@(CmmForeignCall {})) -> lowerSafeForeignCall entry areaMap blocks id m l
- _ -> return $ insertBlock b blocks
-
--- Late in the code generator, we want to insert the code necessary
--- to lower a safe foreign call to a sequence of unsafe calls.
-lowerSafeForeignCall :: BlockId -> AreaMap -> BlockEnv CmmBlock -> BlockId -> [CmmNode O O] -> CmmNode O C
- -> FuelUniqSM (BlockEnv CmmBlock)
-lowerSafeForeignCall entry areaMap blocks bid m
- (CmmForeignCall {tgt=tgt, res=rs, args=as, succ=succ, updfr = updfr_off, intrbl = intrbl}) =
- do let newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
- -- Both 'id' and 'new_base' are KindNonPtr because they're
- -- RTS-only objects and are not subject to garbage collection
- id <- newTemp bWord
- new_base <- newTemp (cmmRegType (CmmGlobal BaseReg))
- let (caller_save, caller_load) = callerSaveVolatileRegs
- load_tso <- newTemp gcWord -- TODO FIXME NOW
- load_stack <- newTemp gcWord -- TODO FIXME NOW
- let (<**>) = (M.<*>)
- let suspendThread = foreignLbl "suspendThread"
- resumeThread = foreignLbl "resumeThread"
- foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit name)))
- suspend = saveThreadState <**>
- caller_save <**>
- mkUnsafeCall (ForeignTarget suspendThread
- (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint]))
- [id] [CmmReg (CmmGlobal BaseReg), CmmLit (CmmInt (fromIntegral (fromEnum intrbl)) wordWidth)]
- midCall = mkUnsafeCall tgt rs as
- resume = mkUnsafeCall (ForeignTarget resumeThread
- (ForeignConvention CCallConv [AddrHint] [AddrHint]))
- [new_base] [CmmReg (CmmLocal id)] <**>
- -- Assign the result to BaseReg: we
- -- might now have a different Capability!
- mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <**>
- caller_load <**>
- loadThreadState load_tso load_stack
- -- We have to save the return value on the stack because its next use
- -- may appear in a different procedure due to procpoint splitting...
- saveRetVals = foldl (<**>) emptyAGraph $ map (M.mkMiddle . spill) rs
- spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r)
- regSlot r@(LocalReg _ _) = CmmRegOff (CmmGlobal Sp) (sp_off - offset)
- where offset = w + expectJust "lowerForeign" (Map.lookup (RegSlot r) areaMap)
- sp_off = wORD_SIZE + expectJust "lowerForeign" (Map.lookup (CallArea area) areaMap)
- area = if succ == entry then Old else Young succ
- w = widthInBytes $ typeWidth $ localRegType r
- -- Note: The successor must be a procpoint, and we have already split,
- -- so we use a jump, not a branch.
- succLbl = CmmLit (CmmLabel (infoTblLbl succ))
- jump = CmmCall { cml_target = succLbl, cml_cont = Nothing
- , cml_args = widthInBytes wordWidth ,cml_ret_args = 0
- , cml_ret_off = updfr_off}
- graph' <- liftUniq $ labelAGraph bid $ catAGraphs (map M.mkMiddle m) <**>
- suspend <**> midCall <**>
- resume <**> saveRetVals <**> M.mkLast jump
- return $ blocks `mapUnion` toBlockMap graph'
-lowerSafeForeignCall _ _ _ _ _ _ = panic "lowerSafeForeignCall was passed something else"
-
diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs
index abbfd01156..4df7304acf 100644
--- a/compiler/cmm/CmmCommonBlockElim.hs
+++ b/compiler/cmm/CmmCommonBlockElim.hs
@@ -13,22 +13,23 @@ where
import BlockId
import Cmm
import CmmUtils
+import CmmContFlowOpt
import Prelude hiding (iterate, succ, unzip, zip)
-import Compiler.Hoopl
+import Hoopl hiding (ChangeFlag)
import Data.Bits
import qualified Data.List as List
import Data.Word
import FastString
-import Control.Monad
import Outputable
import UniqFM
-import Unique
my_trace :: String -> SDoc -> a -> a
my_trace = if False then pprTrace else \_ _ a -> a
--- Eliminate common blocks:
+-- -----------------------------------------------------------------------------
+-- Eliminate common blocks
+
-- If two blocks are identical except for the label on the first node,
-- then we can eliminate one of the blocks. To ensure that the semantics
-- of the program are preserved, we have to rewrite each predecessor of the
@@ -42,59 +43,50 @@ my_trace = if False then pprTrace else \_ _ a -> a
-- TODO: Use optimization fuel
elimCommonBlocks :: CmmGraph -> CmmGraph
-elimCommonBlocks g =
- upd_graph g . snd $ iterate common_block reset hashed_blocks
- (emptyUFM, mapEmpty)
- where hashed_blocks = map (\b -> (hash_block b, b)) (reverse (postorderDfs g))
- reset (_, subst) = (emptyUFM, subst)
+elimCommonBlocks g = replaceLabels env g
+ where
+ env = iterate hashed_blocks mapEmpty
+ hashed_blocks = map (\b -> (hash_block b, b)) $ postorderDfs g
-- Iterate over the blocks until convergence
-iterate :: (t -> a -> (Bool, t)) -> (t -> t) -> [a] -> t -> t
-iterate upd reset blocks state =
- case foldl upd' (False, state) blocks of
- (True, state') -> iterate upd reset blocks (reset state')
- (False, state') -> state'
- where upd' (b, s) a = let (b', s') = upd s a in (b || b', s') -- lift to track changes
+iterate :: [(HashCode,CmmBlock)] -> BlockEnv BlockId -> BlockEnv BlockId
+iterate blocks subst =
+ case foldl common_block (False, emptyUFM, subst) blocks of
+ (changed, _, subst)
+ | changed -> iterate blocks subst
+ | otherwise -> subst
+
+type State = (ChangeFlag, UniqFM [CmmBlock], BlockEnv BlockId)
+
+type ChangeFlag = Bool
+type HashCode = Int
-- Try to find a block that is equal (or ``common'') to b.
-type BidMap = BlockEnv BlockId
-type State = (UniqFM [CmmBlock], BidMap)
-common_block :: (Outputable h, Uniquable h) => State -> (h, CmmBlock) -> (Bool, State)
-common_block (bmap, subst) (hash, b) =
+common_block :: State -> (HashCode, CmmBlock) -> State
+common_block (old_change, bmap, subst) (hash, b) =
case lookupUFM bmap hash of
Just bs -> case (List.find (eqBlockBodyWith (eqBid subst) b) bs,
mapLookup bid subst) of
(Just b', Nothing) -> addSubst b'
(Just b', Just b'') | entryLabel b' /= b'' -> addSubst b'
- _ -> (False, (addToUFM bmap hash (b : bs), subst))
- Nothing -> (False, (addToUFM bmap hash [b], subst))
+ | otherwise -> (old_change, bmap, subst)
+ _ -> (old_change, addToUFM bmap hash (b : bs), subst)
+ Nothing -> (old_change, addToUFM bmap hash [b], subst)
where bid = entryLabel b
- addSubst b' = my_trace "found new common block" (ppr (entryLabel b')) $
- (True, (bmap, mapInsert bid (entryLabel b') subst))
-
--- Given the map ``subst'' from BlockId -> BlockId, we rewrite the graph.
-upd_graph :: CmmGraph -> BidMap -> CmmGraph
-upd_graph g subst = mapGraphNodes (id, middle, last) g
- where middle = mapExpDeep exp
- last l = last' (mapExpDeep exp l)
- last' :: CmmNode O C -> CmmNode O C
- last' (CmmBranch bid) = CmmBranch $ sub bid
- last' (CmmCondBranch p t f) = cond p (sub t) (sub f)
- last' (CmmCall t (Just bid) a r o) = CmmCall t (Just $ sub bid) a r o
- last' l@(CmmCall _ Nothing _ _ _) = l
- last' (CmmForeignCall t r a bid u i) = CmmForeignCall t r a (sub bid) u i
- last' (CmmSwitch e bs) = CmmSwitch e $ map (liftM sub) bs
- cond p t f = if t == f then CmmBranch t else CmmCondBranch p t f
- exp (CmmStackSlot (CallArea (Young id)) off) =
- CmmStackSlot (CallArea (Young (sub id))) off
- exp (CmmLit (CmmBlock id)) = CmmLit (CmmBlock (sub id))
- exp e = e
- sub = lookupBid subst
+ addSubst b' = my_trace "found new common block" (ppr bid <> char '=' <> ppr (entryLabel b')) $
+ (True, bmap, mapInsert bid (entryLabel b') subst)
+
+
+-- -----------------------------------------------------------------------------
+-- Hashing and equality on blocks
+
+-- Below here is mostly boilerplate: hashing blocks ignoring labels,
+-- and comparing blocks modulo a label mapping.
-- To speed up comparisons, we hash each basic block modulo labels.
-- The hashing is a bit arbitrary (the numbers are completely arbitrary),
-- but it should be fast and good enough.
-hash_block :: CmmBlock -> Int
+hash_block :: CmmBlock -> HashCode
hash_block block =
fromIntegral (foldBlockNodesB3 (hash_fst, hash_mid, hash_lst) block (0 :: Word32) .&. (0x7fffffff :: Word32))
-- UniqFM doesn't like negative Ints
@@ -103,11 +95,11 @@ hash_block block =
hash_lst m h = hash_node m + h `shiftL` 1
hash_node :: CmmNode O x -> Word32
- hash_node (CmmComment (FastString u _ _ _ _)) = cvt u
+ hash_node (CmmComment (FastString u _ _ _ _)) = 0 -- don't care
hash_node (CmmAssign r e) = hash_reg r + hash_e e
hash_node (CmmStore e e') = hash_e e + hash_e e'
hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as
- hash_node (CmmBranch _) = 23 -- would be great to hash these properly
+ hash_node (CmmBranch _) = 23 -- NB. ignore the label
hash_node (CmmCondBranch p _ _) = hash_e p
hash_node (CmmCall e _ _ _ _) = hash_e e
hash_node (CmmForeignCall t _ _ _ _ _) = hash_tgt t
@@ -143,18 +135,60 @@ hash_block block =
-- Utilities: equality and substitution on the graph.
-- Given a map ``subst'' from BlockID -> BlockID, we define equality.
-eqBid :: BidMap -> BlockId -> BlockId -> Bool
+eqBid :: BlockEnv BlockId -> BlockId -> BlockId -> Bool
eqBid subst bid bid' = lookupBid subst bid == lookupBid subst bid'
-lookupBid :: BidMap -> BlockId -> BlockId
+lookupBid :: BlockEnv BlockId -> BlockId -> BlockId
lookupBid subst bid = case mapLookup bid subst of
Just bid -> lookupBid subst bid
Nothing -> bid
--- Equality on the body of a block, modulo a function mapping block IDs to block IDs.
+-- Middle nodes and expressions can contain BlockIds, in particular in
+-- CmmStackSlot and CmmBlock, so we have to use a special equality for
+-- these.
+--
+eqMiddleWith :: (BlockId -> BlockId -> Bool)
+ -> CmmNode O O -> CmmNode O O -> Bool
+eqMiddleWith eqBid (CmmComment _) (CmmComment _) = True
+eqMiddleWith eqBid (CmmAssign r1 e1) (CmmAssign r2 e2)
+ = r1 == r2 && eqExprWith eqBid e1 e2
+eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2)
+ = eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2
+eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1)
+ (CmmUnsafeForeignCall t2 r2 a2)
+ = t1 == t2 && r1 == r2 && and (zipWith (eqExprWith eqBid) a1 a2)
+eqMiddleWith _ _ _ = False
+
+eqExprWith :: (BlockId -> BlockId -> Bool)
+ -> CmmExpr -> CmmExpr -> Bool
+eqExprWith eqBid = eq
+ where
+ CmmLit l1 `eq` CmmLit l2 = eqLit l1 l2
+ CmmLoad e1 _ `eq` CmmLoad e2 _ = e1 `eq` e2
+ CmmReg r1 `eq` CmmReg r2 = r1==r2
+ CmmRegOff r1 i1 `eq` CmmRegOff r2 i2 = r1==r2 && i1==i2
+ CmmMachOp op1 es1 `eq` CmmMachOp op2 es2 = op1==op2 && es1 `eqs` es2
+ CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2
+ _e1 `eq` _e2 = False
+
+ xs `eqs` ys = and (zipWith eq xs ys)
+
+ eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2
+ eqLit l1 l2 = l1 == l2
+
+ eqArea Old Old = True
+ eqArea (Young id1) (Young id2) = eqBid id1 id2
+ eqArea _ _ = False
+
+-- Equality on the body of a block, modulo a function mapping block
+-- IDs to block IDs.
eqBlockBodyWith :: (BlockId -> BlockId -> Bool) -> CmmBlock -> CmmBlock -> Bool
-eqBlockBodyWith eqBid block block' = middles == middles' && eqLastWith eqBid last last'
- where (_, middles , JustC last :: MaybeC C (CmmNode O C)) = blockToNodeList block
- (_, middles', JustC last' :: MaybeC C (CmmNode O C)) = blockToNodeList block'
+eqBlockBodyWith eqBid block block'
+ = and (zipWith (eqMiddleWith eqBid) (blockToList m) (blockToList m')) &&
+ eqLastWith eqBid l l'
+ where (_,m,l) = blockSplit block
+ (_,m',l') = blockSplit block'
+
+
eqLastWith :: (BlockId -> BlockId -> Bool) -> CmmNode O C -> CmmNode O C -> Bool
eqLastWith eqBid (CmmBranch bid1) (CmmBranch bid2) = eqBid bid1 bid2
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs
index 73ce57e93f..36e7b8ec62 100644
--- a/compiler/cmm/CmmContFlowOpt.hs
+++ b/compiler/cmm/CmmContFlowOpt.hs
@@ -2,19 +2,19 @@
{-# OPTIONS_GHC -fno-warn-warnings-deprecations -fno-warn-incomplete-patterns #-}
module CmmContFlowOpt
- ( runCmmContFlowOpts
- , removeUnreachableBlocks, replaceBranches
+ ( cmmCfgOpts
+ , cmmCfgOptsProc
+ , removeUnreachableBlocks
+ , replaceLabels
)
where
import BlockId
import Cmm
import CmmUtils
-import Digraph
import Maybes
-import Outputable
-import Compiler.Hoopl
+import Hoopl
import Control.Monad
import Prelude hiding (succ, unzip, zip)
@@ -24,104 +24,158 @@ import Prelude hiding (succ, unzip, zip)
--
-----------------------------------------------------------------------------
-runCmmContFlowOpts :: CmmGroup -> CmmGroup
-runCmmContFlowOpts = map (optProc cmmCfgOpts)
-
cmmCfgOpts :: CmmGraph -> CmmGraph
-cmmCfgOpts = removeUnreachableBlocks . blockConcat . branchChainElim
- -- Here branchChainElim can ultimately be replaced
- -- with a more exciting combination of optimisations
+cmmCfgOpts = removeUnreachableBlocks . blockConcat
+
+cmmCfgOptsProc :: CmmDecl -> CmmDecl
+cmmCfgOptsProc = optProc cmmCfgOpts
optProc :: (g -> g) -> GenCmmDecl d h g -> GenCmmDecl d h g
optProc opt (CmmProc info lbl g) = CmmProc info lbl (opt g)
optProc _ top = top
+
-----------------------------------------------------------------------------
--
--- Branch Chain Elimination
+-- Block concatenation
--
-----------------------------------------------------------------------------
--- | Remove any basic block of the form L: goto L', and replace L with
--- L' everywhere else, unless L is the successor of a call instruction
--- and L' is the entry block. You don't want to set the successor of a
--- function call to the entry block because there is no good way to
--- store both the infotables for the call and from the callee, while
--- putting the stack pointer in a consistent place.
+-- This optimisation does two things:
+-- - If a block finishes with an unconditional branch, then we may
+-- be able to concatenate the block it points to and remove the
+-- branch. We do this either if the destination block is small
+-- (e.g. just another branch), or if this is the only jump to
+-- this particular destination block.
+--
+-- - If a block finishes in a call whose continuation block is a
+-- goto, then we can shortcut the destination, making the
+-- continuation block the destination of the goto.
+--
+-- Both transformations are improved by working from the end of the
+-- graph towards the beginning, because we may be able to perform many
+-- shortcuts in one go.
+
+
+-- We need to walk over the blocks from the end back to the
+-- beginning. We are going to maintain the "current" graph
+-- (BlockEnv CmmBlock) as we go, and also a mapping from BlockId
+-- to BlockId, representing continuation labels that we have
+-- renamed. This latter mapping is important because we might
+-- shortcut a CmmCall continuation. For example:
+--
+-- Sp[0] = L
+-- call g returns to L
+--
+-- L: goto M
--
--- JD isn't quite sure when it's safe to share continuations for different
--- function calls -- have to think about where the SP will be,
--- so we'll table that problem for now by leaving all call successors alone.
-
-branchChainElim :: CmmGraph -> CmmGraph
-branchChainElim g
- | null lone_branch_blocks = g -- No blocks to remove
- | otherwise = {- pprTrace "branchChainElim" (ppr forest) $ -}
- replaceLabels (mapFromList edges) g
+-- M: ...
+--
+-- So when we shortcut the L block, we need to replace not only
+-- the continuation of the call, but also references to L in the
+-- code (e.g. the assignment Sp[0] = L). So we keep track of
+-- which labels we have renamed and apply the mapping at the end
+-- with replaceLabels.
+
+blockConcat :: CmmGraph -> CmmGraph
+blockConcat g@CmmGraph { g_entry = entry_id }
+ = replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks
where
- blocks = toBlockList g
-
- lone_branch_blocks :: [(BlockId, BlockId)]
- -- each (L,K) is a block of the form
- -- L : goto K
- lone_branch_blocks = mapCatMaybes isLoneBranch blocks
-
- call_succs = foldl add emptyBlockSet blocks
- where add :: BlockSet -> CmmBlock -> BlockSet
- add succs b =
- case lastNode b of
- (CmmCall _ (Just k) _ _ _) -> setInsert k succs
- (CmmForeignCall {succ=k}) -> setInsert k succs
- _ -> succs
-
- isLoneBranch :: CmmBlock -> Maybe (BlockId, BlockId)
- isLoneBranch block
- | (JustC (CmmEntry id), [], JustC (CmmBranch target)) <- blockToNodeList block
- , not (setMember id call_succs)
- = Just (id,target)
- | otherwise
- = Nothing
-
- -- We build a graph from lone_branch_blocks (every node has only
- -- one out edge). Then we
- -- - topologically sort the graph: if from A we can reach B,
- -- then A occurs before B in the result list.
- -- - depth-first search starting from the nodes in this list.
- -- This gives us a [[node]], in which each list is a dependency
- -- chain.
- -- - for each list [a1,a2,...an] replace branches to ai with an.
- --
- -- This approach nicely deals with cycles by ignoring them.
- -- Branches in a cycle will be redirected to somewhere in the
- -- cycle, but we don't really care where. A cycle should be dead code,
- -- and so will be eliminated by removeUnreachableBlocks.
- --
- fromNode (b,_) = b
- toNode a = (a,a)
-
- all_block_ids :: LabelSet
- all_block_ids = setFromList (map fst lone_branch_blocks)
- `setUnion`
- setFromList (map snd lone_branch_blocks)
-
- forest = dfsTopSortG $ graphFromVerticesAndAdjacency nodes lone_branch_blocks
- where nodes = map toNode $ setElems $ all_block_ids
-
- edges = [ (fromNode y, fromNode x)
- | (x:xs) <- map reverse forest, y <- xs ]
+ -- we might be able to shortcut the entry BlockId itself
+ new_entry
+ | Just entry_blk <- mapLookup entry_id new_blocks
+ , Just dest <- canShortcut entry_blk
+ = dest
+ | otherwise
+ = entry_id
-----------------------------------------------------------------
+ blocks = postorderDfs g
+
+ (new_blocks, shortcut_map) =
+ foldr maybe_concat (toBlockMap g, mapEmpty) blocks
+
+ maybe_concat :: CmmBlock
+ -> (BlockEnv CmmBlock, BlockEnv BlockId)
+ -> (BlockEnv CmmBlock, BlockEnv BlockId)
+ maybe_concat block unchanged@(blocks, shortcut_map)
+ | CmmBranch b' <- last
+ , Just blk' <- mapLookup b' blocks
+ , shouldConcatWith b' blk'
+ = (mapInsert bid (splice head blk') blocks, shortcut_map)
+
+ -- calls: if we can shortcut the continuation label, then
+ -- we must *also* remember to substitute for the label in the
+ -- code, because we will push it somewhere.
+ | Just b' <- callContinuation_maybe last
+ , Just blk' <- mapLookup b' blocks
+ , Just dest <- canShortcut blk'
+ = (blocks, mapInsert b' dest shortcut_map)
+ -- replaceLabels will substitute dest for b' everywhere, later
+
+ -- non-calls: see if we can shortcut any of the successors.
+ | Nothing <- callContinuation_maybe last
+ = ( mapInsert bid (blockJoinTail head shortcut_last) blocks
+ , shortcut_map )
+
+ | otherwise
+ = (blocks, shortcut_map)
+ where
+ (head, last) = blockSplitTail block
+ bid = entryLabel block
+ shortcut_last = mapSuccessors shortcut last
+ shortcut l =
+ case mapLookup l blocks of
+ Just b | Just dest <- canShortcut b -> dest
+ _otherwise -> l
+
+ shouldConcatWith b block
+ | num_preds b == 1 = True -- only one predecessor: go for it
+ | okToDuplicate block = True -- short enough to duplicate
+ | otherwise = False
+ where num_preds bid = mapLookup bid backEdges `orElse` 0
+
+ canShortcut :: CmmBlock -> Maybe BlockId
+ canShortcut block
+ | (_, middle, CmmBranch dest) <- blockSplit block
+ , isEmptyBlock middle
+ = Just dest
+ | otherwise
+ = Nothing
+
+ backEdges :: BlockEnv Int -- number of predecessors for each block
+ backEdges = mapInsertWith (+) entry_id 1 $ -- add 1 for the entry id
+ mapMap setSize $ predMap blocks
+
+ splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
+ splice head rest = head `blockAppend` snd (blockSplitHead rest)
+
+
+callContinuation_maybe :: CmmNode O C -> Maybe BlockId
+callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
+callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
+callContinuation_maybe _ = Nothing
+
+okToDuplicate :: CmmBlock -> Bool
+okToDuplicate block
+ = case blockSplit block of (_, m, _) -> isEmptyBlock m
+ -- cheap and cheerful; we might expand this in the future to
+ -- e.g. spot blocks that represent a single instruction or two
+
+------------------------------------------------------------------------
+-- Map over the CmmGraph, replacing each label with its mapping in the
+-- supplied BlockEnv.
replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph
-replaceLabels env =
- replace_eid . mapGraphNodes1 txnode
+replaceLabels env g
+ | mapNull env = g
+ | otherwise = replace_eid $ mapGraphNodes1 txnode g
where
replace_eid g = g {g_entry = lookup (g_entry g)}
lookup id = mapLookup id env `orElse` id
txnode :: CmmNode e x -> CmmNode e x
txnode (CmmBranch bid) = CmmBranch (lookup bid)
- txnode (CmmCondBranch p t f) = CmmCondBranch (exp p) (lookup t) (lookup f)
+ txnode (CmmCondBranch p t f) = mkCmmCondBranch (exp p) (lookup t) (lookup f)
txnode (CmmSwitch e arms) = CmmSwitch (exp e) (map (liftM lookup) arms)
txnode (CmmCall t k a res r) = CmmCall (exp t) (liftM lookup k) a res r
txnode fc@CmmForeignCall{} = fc{ args = map exp (args fc)
@@ -130,90 +184,25 @@ replaceLabels env =
exp :: CmmExpr -> CmmExpr
exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
- exp (CmmStackSlot (CallArea (Young id)) i) = CmmStackSlot (CallArea (Young (lookup id))) i
+ exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
exp e = e
-
-replaceBranches :: BlockEnv BlockId -> CmmGraph -> CmmGraph
-replaceBranches env g = mapGraphNodes (id, id, last) g
- where
- last :: CmmNode O C -> CmmNode O C
- last (CmmBranch id) = CmmBranch (lookup id)
- last (CmmCondBranch e ti fi) = CmmCondBranch e (lookup ti) (lookup fi)
- last (CmmSwitch e tbl) = CmmSwitch e (map (fmap lookup) tbl)
- last l@(CmmCall {}) = l
- last l@(CmmForeignCall {}) = l
- lookup id = fmap lookup (mapLookup id env) `orElse` id
- -- XXX: this is a recursive lookup, it follows chains until the lookup
- -- returns Nothing, at which point we return the last BlockId
+mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C
+mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f
----------------------------------------------------------------
-- Build a map from a block to its set of predecessors. Very useful.
+
predMap :: [CmmBlock] -> BlockEnv BlockSet
predMap blocks = foldr add_preds mapEmpty blocks -- find the back edges
where add_preds block env = foldl (add (entryLabel block)) env (successors block)
add bid env b' =
mapInsert b' (setInsert bid (mapLookup b' env `orElse` setEmpty)) env
------------------------------------------------------------------------------
---
--- Block concatenation
---
------------------------------------------------------------------------------
-
--- If a block B branches to a label L, L is not the entry block,
--- and L has no other predecessors,
--- then we can splice the block starting with L onto the end of B.
--- Order matters, so we work bottom up (reverse postorder DFS).
--- This optimization can be inhibited by unreachable blocks, but
--- the reverse postorder DFS returns only reachable blocks.
---
--- To ensure correctness, we have to make sure that the BlockId of the block
--- we are about to eliminate is not named in another instruction.
---
--- Note: This optimization does _not_ subsume branch chain elimination.
-
-blockConcat :: CmmGraph -> CmmGraph
-blockConcat g@(CmmGraph {g_entry=eid}) =
- replaceLabels concatMap $ ofBlockMap (g_entry g) blocks'
- where
- blocks = postorderDfs g
-
- (blocks', concatMap) =
- foldr maybe_concat (toBlockMap g, mapEmpty) $ blocks
-
- maybe_concat :: CmmBlock -> (LabelMap CmmBlock, LabelMap Label) -> (LabelMap CmmBlock, LabelMap Label)
- maybe_concat b unchanged@(blocks', concatMap) =
- let bid = entryLabel b
- in case blockToNodeList b of
- (JustC h, m, JustC (CmmBranch b')) ->
- if canConcatWith b' then
- (mapInsert bid (splice blocks' h m b') blocks',
- mapInsert b' bid concatMap)
- else unchanged
- _ -> unchanged
-
- num_preds bid = liftM setSize (mapLookup bid backEdges) `orElse` 0
-
- canConcatWith b' = b' /= eid && num_preds b' == 1
-
- backEdges = predMap blocks
-
- splice :: forall map n e x.
- IsMap map =>
- map (Block n e x) -> n C O -> [n O O] -> KeyOf map -> Block n C x
- splice blocks' h m bid' =
- case mapLookup bid' blocks' of
- Nothing -> panic "unknown successor block"
- Just block | (_, m', l') <- blockToNodeList block
- -> blockOfNodeList (JustC h, (m ++ m'), l')
-
-----------------------------------------------------------------------------
--
-- Removing unreachable blocks
---
------------------------------------------------------------------------------
removeUnreachableBlocks :: CmmGraph -> CmmGraph
removeUnreachableBlocks g
diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs
index 80c6079aac..e72eee041c 100644
--- a/compiler/cmm/CmmCvt.hs
+++ b/compiler/cmm/CmmCvt.hs
@@ -12,29 +12,25 @@ import CmmUtils
import qualified OldCmm as Old
import OldPprCmm ()
-import Compiler.Hoopl hiding ((<*>), mkLabel, mkBranch)
+import Hoopl hiding ((<*>), mkLabel, mkBranch)
import Data.Maybe
import Maybes
import Outputable
cmmOfZgraph :: CmmGroup -> Old.CmmGroup
cmmOfZgraph tops = map mapTop tops
- where mapTop (CmmProc h l g) = CmmProc (Old.CmmInfo Nothing Nothing (info_tbl h)) l (ofZgraph g)
+ where mapTop (CmmProc h l g) = CmmProc (info_tbl h) l (ofZgraph g)
mapTop (CmmData s ds) = CmmData s ds
data ValueDirection = Arguments | Results
-add_hints :: Convention -> ValueDirection -> [a] -> [Old.CmmHinted a]
+add_hints :: ForeignTarget -> ValueDirection -> [a] -> [Old.CmmHinted a]
add_hints conv vd args = zipWith Old.CmmHinted args (get_hints conv vd)
-get_hints :: Convention -> ValueDirection -> [ForeignHint]
-get_hints (Foreign (ForeignConvention _ hints _)) Arguments = hints
-get_hints (Foreign (ForeignConvention _ _ hints)) Results = hints
-get_hints _other_conv _vd = repeat NoHint
-
-get_conv :: ForeignTarget -> Convention
-get_conv (PrimTarget _) = NativeNodeCall -- JD: SUSPICIOUS
-get_conv (ForeignTarget _ fc) = Foreign fc
+get_hints :: ForeignTarget -> ValueDirection -> [ForeignHint]
+get_hints (ForeignTarget _ (ForeignConvention _ hints _)) Arguments = hints
+get_hints (ForeignTarget _ (ForeignConvention _ _ hints)) Results = hints
+get_hints (PrimTarget _) _vd = repeat NoHint
cmm_target :: ForeignTarget -> Old.CmmCallTarget
cmm_target (PrimTarget op) = Old.CmmPrim op Nothing
@@ -89,8 +85,8 @@ ofZgraph g = Old.ListGraph $ mapMaybe convert_block $ postorderDfs g
CmmUnsafeForeignCall (PrimTarget MO_Touch) _ _ -> Old.CmmNop
CmmUnsafeForeignCall target ress args ->
Old.CmmCall (cmm_target target)
- (add_hints (get_conv target) Results ress)
- (add_hints (get_conv target) Arguments args)
+ (add_hints target Results ress)
+ (add_hints target Arguments args)
Old.CmmMayReturn
last :: CmmNode O C -> () -> [Old.CmmStmt]
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index 6eb91e89ba..939d4b7ca9 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -14,11 +14,11 @@ module CmmExpr
, GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node, baseReg
, VGcPtr(..), vgcFlag -- Temporary!
, DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed
- , DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed
, RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
- , plusRegSet, minusRegSet, timesRegSet
- , regUsedIn, regSlot
- , Area(..), AreaId(..), SubArea, SubAreaSet, AreaMap, isStackSlotOf
+ , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet
+ , regSetToList
+ , regUsedIn
+ , Area(..)
, module CmmMachOp
, module CmmType
)
@@ -31,9 +31,10 @@ import CmmMachOp
import BlockId
import CLabel
import Unique
-import UniqSet
import Data.Map (Map)
+import Data.Set (Set)
+import qualified Data.Set as Set
-----------------------------------------------------------------------------
-- CmmExpr
@@ -42,11 +43,12 @@ import Data.Map (Map)
data CmmExpr
= CmmLit CmmLit -- Literal
- | CmmLoad CmmExpr CmmType -- Read memory location
- | CmmReg CmmReg -- Contents of register
+ | CmmLoad !CmmExpr !CmmType -- Read memory location
+ | CmmReg !CmmReg -- Contents of register
| CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.)
- | CmmStackSlot Area Int -- addressing expression of a stack slot
- | CmmRegOff CmmReg Int
+ | CmmStackSlot Area {-# UNPACK #-} !Int
+ -- addressing expression of a stack slot
+ | CmmRegOff !CmmReg Int
-- CmmRegOff reg i
-- ** is shorthand only, meaning **
-- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
@@ -62,20 +64,16 @@ instance Eq CmmExpr where -- Equality ignores the types
_e1 == _e2 = False
data CmmReg
- = CmmLocal LocalReg
+ = CmmLocal {-# UNPACK #-} !LocalReg
| CmmGlobal GlobalReg
deriving( Eq, Ord )
-- | A stack area is either the stack slot where a variable is spilled
-- or the stack space where function arguments and results are passed.
data Area
- = RegSlot LocalReg
- | CallArea AreaId
- deriving (Eq, Ord)
-
-data AreaId
= Old -- See Note [Old Area]
- | Young BlockId
+ | Young {-# UNPACK #-} !BlockId -- Invariant: must be a continuation BlockId
+ -- See Note [Continuation BlockId] in CmmNode.
deriving (Eq, Ord)
{- Note [Old Area]
@@ -94,15 +92,8 @@ necessarily at the young end of the Old area.
End of note -}
-type SubArea = (Area, Int, Int) -- area, offset, width
-type SubAreaSet = Map Area [SubArea]
-
-type AreaMap = Map Area Int
- -- Byte offset of the oldest byte of the Area,
- -- relative to the oldest byte of the Old Area
-
data CmmLit
- = CmmInt Integer Width
+ = CmmInt !Integer Width
-- Interpretation: the 2's complement representation of the value
-- is truncated to the specified size. This is easier than trying
-- to keep the value within range, because we don't know whether
@@ -120,7 +111,11 @@ data CmmLit
-- It is also used inside the NCG during when generating
-- position-independent code.
| CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset
- | CmmBlock BlockId -- Code label
+
+ | CmmBlock {-# UNPACK #-} !BlockId -- Code label
+ -- Invariant: must be a continuation BlockId
+ -- See Note [Continuation BlockId] in CmmNode.
+
| CmmHighStackMark -- stands for the max stack space used during a procedure
deriving Eq
@@ -163,7 +158,7 @@ maybeInvertCmmExpr _ = Nothing
-----------------------------------------------------------------------------
data LocalReg
- = LocalReg !Unique CmmType
+ = LocalReg {-# UNPACK #-} !Unique CmmType
-- ^ Parameters:
-- 1. Identifier
-- 2. Type
@@ -189,22 +184,35 @@ localRegType (LocalReg _ rep) = rep
-----------------------------------------------------------------------------
-- | Sets of local registers
-type RegSet = UniqSet LocalReg
+
+-- These are used for dataflow facts, and a common operation is taking
+-- the union of two RegSets and then asking whether the union is the
+-- same as one of the inputs. UniqSet isn't good here, because
+-- sizeUniqSet is O(n) whereas Set.size is O(1), so we use ordinary
+-- Sets.
+
+type RegSet = Set LocalReg
emptyRegSet :: RegSet
+nullRegSet :: RegSet -> Bool
elemRegSet :: LocalReg -> RegSet -> Bool
extendRegSet :: RegSet -> LocalReg -> RegSet
deleteFromRegSet :: RegSet -> LocalReg -> RegSet
mkRegSet :: [LocalReg] -> RegSet
minusRegSet, plusRegSet, timesRegSet :: RegSet -> RegSet -> RegSet
-
-emptyRegSet = emptyUniqSet
-elemRegSet = elementOfUniqSet
-extendRegSet = addOneToUniqSet
-deleteFromRegSet = delOneFromUniqSet
-mkRegSet = mkUniqSet
-minusRegSet = minusUniqSet
-plusRegSet = unionUniqSets
-timesRegSet = intersectUniqSets
+sizeRegSet :: RegSet -> Int
+regSetToList :: RegSet -> [LocalReg]
+
+emptyRegSet = Set.empty
+nullRegSet = Set.null
+elemRegSet = Set.member
+extendRegSet = flip Set.insert
+deleteFromRegSet = flip Set.delete
+mkRegSet = Set.fromList
+minusRegSet = Set.difference
+plusRegSet = Set.union
+timesRegSet = Set.intersection
+sizeRegSet = Set.size
+regSetToList = Set.toList
class UserOfLocalRegs a where
foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b
@@ -236,7 +244,7 @@ instance DefinerOfLocalRegs LocalReg where
foldRegsDefd f z r = f z r
instance UserOfLocalRegs RegSet where
- foldRegsUsed f = foldUniqSet (flip f)
+ foldRegsUsed f = Set.fold (flip f)
instance UserOfLocalRegs CmmExpr where
foldRegsUsed f z e = expr z e
@@ -271,49 +279,6 @@ reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es
_ `regUsedIn` CmmStackSlot _ _ = False
-----------------------------------------------------------------------------
--- Stack slots
------------------------------------------------------------------------------
-
-isStackSlotOf :: CmmExpr -> LocalReg -> Bool
-isStackSlotOf (CmmStackSlot (RegSlot r) _) r' = r == r'
-isStackSlotOf _ _ = False
-
-regSlot :: LocalReg -> CmmExpr
-regSlot r = CmmStackSlot (RegSlot r) (widthInBytes $ typeWidth $ localRegType r)
-
------------------------------------------------------------------------------
--- Stack slot use information for expressions and other types [_$_]
------------------------------------------------------------------------------
-
--- Fold over the area, the offset into the area, and the width of the subarea.
-class UserOfSlots a where
- foldSlotsUsed :: (b -> SubArea -> b) -> b -> a -> b
-
-class DefinerOfSlots a where
- foldSlotsDefd :: (b -> SubArea -> b) -> b -> a -> b
-
-instance UserOfSlots CmmExpr where
- foldSlotsUsed f z e = expr z e
- where expr z (CmmLit _) = z
- expr z (CmmLoad (CmmStackSlot a i) ty) = f z (a, i, widthInBytes $ typeWidth ty)
- expr z (CmmLoad addr _) = foldSlotsUsed f z addr
- expr z (CmmReg _) = z
- expr z (CmmMachOp _ exprs) = foldSlotsUsed f z exprs
- expr z (CmmRegOff _ _) = z
- expr z (CmmStackSlot _ _) = z
-
-instance UserOfSlots a => UserOfSlots [a] where
- foldSlotsUsed _ set [] = set
- foldSlotsUsed f set (x:xs) = foldSlotsUsed f (foldSlotsUsed f set x) xs
-
-instance DefinerOfSlots a => DefinerOfSlots [a] where
- foldSlotsDefd _ set [] = set
- foldSlotsDefd f set (x:xs) = foldSlotsDefd f (foldSlotsDefd f set x) xs
-
-instance DefinerOfSlots SubArea where
- foldSlotsDefd f z a = f z a
-
------------------------------------------------------------------------------
-- Global STG registers
-----------------------------------------------------------------------------
diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs
index fe0c104d1c..a171faa057 100644
--- a/compiler/cmm/CmmInfo.hs
+++ b/compiler/cmm/CmmInfo.hs
@@ -19,6 +19,8 @@ import CmmUtils
import CLabel
import SMRep
import Bitmap
+import Stream (Stream)
+import qualified Stream
import Maybes
import Constants
@@ -40,10 +42,16 @@ mkEmptyContInfoTable info_lbl
, cit_prof = NoProfilingInfo
, cit_srt = NoC_SRT }
-cmmToRawCmm :: Platform -> [Old.CmmGroup] -> IO [Old.RawCmmGroup]
+cmmToRawCmm :: Platform -> Stream IO Old.CmmGroup ()
+ -> IO (Stream IO Old.RawCmmGroup ())
cmmToRawCmm platform cmms
= do { uniqs <- mkSplitUniqSupply 'i'
- ; return (initUs_ uniqs (mapM (concatMapM (mkInfoTable platform)) cmms)) }
+ ; let do_one uniqs cmm = do
+ case initUs uniqs $ concatMapM (mkInfoTable platform) cmm of
+ (b,uniqs') -> return (uniqs',b)
+ -- NB. strictness fixes a space leak. DO NOT REMOVE.
+ ; return (Stream.mapAccumL do_one uniqs cmms >> return ())
+ }
-- Make a concrete info table, represented as a list of CmmStatic
-- (it can't be simply a list of Word, because the SRT field is
@@ -82,7 +90,7 @@ mkInfoTable :: Platform -> CmmDecl -> UniqSM [RawCmmDecl]
mkInfoTable _ (CmmData sec dat)
= return [CmmData sec dat]
-mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks)
+mkInfoTable platform (CmmProc info entry_label blocks)
| CmmNonInfoTable <- info -- Code without an info table. Easy.
= return [CmmProc Nothing entry_label blocks]
@@ -91,7 +99,8 @@ mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks)
; return (top_decls ++
mkInfoTableAndCode info_lbl info_cts
entry_label blocks) }
- | otherwise = panic "mkInfoTable" -- Patern match overlap check not clever enough
+ | otherwise = panic "mkInfoTable"
+ -- Patern match overlap check not clever enough
-----------------------------------------------------
type InfoTableContents = ( [CmmLit] -- The standard part
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
new file mode 100644
index 0000000000..660506e7dc
--- /dev/null
+++ b/compiler/cmm/CmmLayoutStack.hs
@@ -0,0 +1,1045 @@
+{-# LANGUAGE RecordWildCards, GADTs #-}
+module CmmLayoutStack (
+ cmmLayoutStack, setInfoTableStackMap, cmmSink
+ ) where
+
+import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX
+import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX
+
+import Cmm
+import BlockId
+import CLabel
+import CmmUtils
+import MkGraph
+import Module
+import ForeignCall
+import CmmLive
+import CmmProcPoint
+import SMRep
+import Hoopl hiding ((<*>), mkLast, mkMiddle)
+import OptimizationFuel
+import Constants
+import UniqSupply
+import Maybes
+import UniqFM
+import Util
+
+import FastString
+import Outputable
+import Data.Map (Map)
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import Control.Monad.Fix
+import Data.Array as Array
+import Data.Bits
+import Data.List (nub, partition)
+import Control.Monad (liftM)
+
+#include "HsVersions.h"
+
+
+data StackSlot = Occupied | Empty
+ -- Occupied: a return address or part of an update frame
+
+instance Outputable StackSlot where
+ ppr Occupied = ptext (sLit "XXX")
+ ppr Empty = ptext (sLit "---")
+
+-- All stack locations are expressed as positive byte offsets from the
+-- "base", which is defined to be the address above the return address
+-- on the stack on entry to this CmmProc.
+--
+-- Lower addresses have higher StackLocs.
+--
+type StackLoc = ByteOff
+
+{-
+ A StackMap describes the stack at any given point. At a continuation
+ it has a particular layout, like this:
+
+ | | <- base
+ |-------------|
+ | ret0 | <- base + 8
+ |-------------|
+ . upd frame . <- base + sm_ret_off
+ |-------------|
+ | |
+ . vars .
+ . (live/dead) .
+ | | <- base + sm_sp - sm_args
+ |-------------|
+ | ret1 |
+ . ret vals . <- base + sm_sp (<--- Sp points here)
+ |-------------|
+
+Why do we include the final return address (ret0) in our stack map? I
+have absolutely no idea, but it seems to be done that way consistently
+in the rest of the code generator, so I played along here. --SDM
+
+Note that we will be constructing an info table for the continuation
+(ret1), which needs to describe the stack down to, but not including,
+the update frame (or ret0, if there is no update frame).
+-}
+
+data StackMap = StackMap
+ { sm_sp :: StackLoc
+ -- ^ the offset of Sp relative to the base on entry
+ -- to this block.
+ , sm_args :: ByteOff
+ -- ^ the number of bytes of arguments in the area for this block
+ -- Defn: the offset of young(L) relative to the base is given by
+ -- (sm_sp - sm_args) of the StackMap for block L.
+ , sm_ret_off :: ByteOff
+ -- ^ Number of words of stack that we do not describe with an info
+ -- table, because it contains an update frame.
+ , sm_regs :: UniqFM (LocalReg,StackLoc)
+ -- ^ regs on the stack
+ }
+
+instance Outputable StackMap where
+ ppr StackMap{..} =
+ text "Sp = " <> int sm_sp $$
+ text "sm_args = " <> int sm_args $$
+ text "sm_ret_off = " <> int sm_ret_off $$
+ text "sm_regs = " <> ppr (eltsUFM sm_regs)
+
+
+cmmLayoutStack :: ProcPointSet -> ByteOff -> CmmGraph
+ -> FuelUniqSM (CmmGraph, BlockEnv StackMap)
+cmmLayoutStack procpoints entry_args
+ graph0@(CmmGraph { g_entry = entry })
+ = do
+ pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
+ (graph, liveness) <- removeDeadAssignments graph0
+ pprTrace "liveness" (ppr liveness) $ return ()
+ let blocks = postorderDfs graph
+
+ (final_stackmaps, final_high_sp, new_blocks) <- liftUniq $
+ mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
+ layout procpoints liveness entry entry_args
+ rec_stackmaps rec_high_sp blocks
+
+ new_blocks' <- liftUniq $ mapM lowerSafeForeignCall new_blocks
+
+ pprTrace ("Sp HWM") (ppr final_high_sp) $
+ return (ofBlockList entry new_blocks', final_stackmaps)
+
+
+
+layout :: BlockSet -- proc points
+ -> BlockEnv CmmLive -- liveness
+ -> BlockId -- entry
+ -> ByteOff -- stack args on entry
+
+ -> BlockEnv StackMap -- [final] stack maps
+ -> ByteOff -- [final] Sp high water mark
+
+ -> [CmmBlock] -- [in] blocks
+
+ -> UniqSM
+ ( BlockEnv StackMap -- [out] stack maps
+ , ByteOff -- [out] Sp high water mark
+ , [CmmBlock] -- [out] new blocks
+ )
+
+layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks
+ = go blocks init_stackmap entry_args []
+ where
+ (updfr, cont_info) = collectContInfo blocks
+
+ init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args
+ , sm_args = entry_args
+ , sm_ret_off = updfr
+ , sm_regs = emptyUFM
+ }
+
+ go [] acc_stackmaps acc_hwm acc_blocks
+ = return (acc_stackmaps, acc_hwm, acc_blocks)
+
+ go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
+ = do
+ let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0
+
+ let stack0@StackMap { sm_sp = sp0 }
+ = mapFindWithDefault
+ (pprPanic "no stack map for" (ppr entry_lbl))
+ entry_lbl acc_stackmaps
+
+ pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()
+
+ -- (a) Update the stack map to include the effects of
+ -- assignments in this block
+ let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
+
+ -- (b) Insert assignments to reload all the live variables if this
+ -- block is a proc point
+ let middle1 = if entry_lbl `setMember` procpoints
+ then foldr blockCons middle0 (insertReloads stack0)
+ else middle0
+
+ -- (c) Look at the last node and if we are making a call or
+ -- jumping to a proc point, we must save the live
+ -- variables, adjust Sp, and construct the StackMaps for
+ -- each of the successor blocks. See handleLastNode for
+ -- details.
+ (middle2, sp_off, last1, fixup_blocks, out)
+ <- handleLastNode procpoints liveness cont_info
+ acc_stackmaps stack1 middle0 last0
+
+ pprTrace "layout(out)" (ppr out) $ return ()
+
+ -- (d) Manifest Sp: run over the nodes in the block and replace
+ -- CmmStackSlot with CmmLoad from Sp with a concrete offset.
+ --
+ -- our block:
+ -- middle1 -- the original middle nodes
+ -- middle2 -- live variable saves from handleLastNode
+ -- Sp = Sp + sp_off -- Sp adjustment goes here
+ -- last1 -- the last node
+ --
+ let middle_pre = blockToList $ foldl blockSnoc middle1 middle2
+
+ sp_high = final_hwm - entry_args
+ -- The stack check value is adjusted by the Sp offset on
+ -- entry to the proc, which is entry_args. We are
+ -- assuming that we only do a stack check at the
+ -- beginning of a proc, and we don't modify Sp before the
+ -- check.
+
+ final_blocks = manifestSp final_stackmaps stack0 sp0 sp_high entry0
+ middle_pre sp_off last1 fixup_blocks
+
+ acc_stackmaps' = mapUnion acc_stackmaps out
+
+ hwm' = maximum (acc_hwm : (sp0 - sp_off) : map sm_sp (mapElems out))
+
+ go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks)
+
+
+-- -----------------------------------------------------------------------------
+
+-- This doesn't seem right somehow. We need to find out whether this
+-- proc will push some update frame material at some point, so that we
+-- can avoid using that area of the stack for spilling. The
+-- updfr_space field of the CmmProc *should* tell us, but it doesn't
+-- (I think maybe it gets filled in later when we do proc-point
+-- splitting).
+--
+-- So we'll just take the max of all the cml_ret_offs. This could be
+-- unnecessarily pessimistic, but probably not in the code we
+-- generate.
+
+collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff)
+collectContInfo blocks
+ = (maximum ret_offs, mapFromList (catMaybes mb_argss))
+ where
+ (mb_argss, ret_offs) = mapAndUnzip get_cont blocks
+
+ get_cont b =
+ case lastNode b of
+ CmmCall { cml_cont = Just l, .. }
+ -> (Just (l, cml_ret_args), cml_ret_off)
+ CmmForeignCall { .. }
+ -> (Just (succ, 0), updfr) -- ??
+ _other -> (Nothing, 0)
+
+
+-- -----------------------------------------------------------------------------
+-- Updating the StackMap from middle nodes
+
+-- Look for loads from stack slots, and update the StackMap. This is
+-- purelyu for optimisation reasons, so that we can avoid saving a
+-- variable back to a different stack slot if it is already on the
+-- stack.
+--
+-- This happens a lot: for example when function arguments are passed
+-- on the stack and need to be immediately saved across a call, we
+-- want to just leave them where they are on the stack.
+--
+procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap
+procMiddle stackmaps node sm
+ = case node of
+ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
+ -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
+ where loc = getStackLoc area off stackmaps
+ CmmAssign (CmmLocal r) _other
+ -> sm { sm_regs = delFromUFM (sm_regs sm) r }
+ _other
+ -> sm
+
+getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc
+getStackLoc Old n _ = n
+getStackLoc (Young l) n stackmaps =
+ case mapLookup l stackmaps of
+ Nothing -> pprPanic "getStackLoc" (ppr l)
+ Just sm -> sm_sp sm - sm_args sm + n
+
+
+-- -----------------------------------------------------------------------------
+-- Handling stack allocation for a last node
+
+-- We take a single last node and turn it into:
+--
+-- C1 (some statements)
+-- Sp = Sp + N
+-- C2 (some more statements)
+-- call f() -- the actual last node
+--
+-- plus possibly some more blocks (we may have to add some fixup code
+-- between the last node and the continuation).
+--
+-- C1: is the code for saving the variables across this last node onto
+-- the stack, if the continuation is a call or jumps to a proc point.
+--
+-- C2: if the last node is a safe foreign call, we have to inject some
+-- extra code that goes *after* the Sp adjustment.
+
+handleLastNode
+ :: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff
+ -> BlockEnv StackMap -> StackMap
+ -> Block CmmNode O O
+ -> CmmNode O C
+ -> UniqSM
+ ( [CmmNode O O] -- nodes to go *before* the Sp adjustment
+ , ByteOff -- amount to adjust Sp
+ , CmmNode O C -- new last node
+ , [CmmBlock] -- new blocks
+ , BlockEnv StackMap -- stackmaps for the continuations
+ )
+
+handleLastNode procpoints liveness cont_info stackmaps
+ stack0@StackMap { sm_sp = sp0 } middle last
+ = case last of
+ -- At each return / tail call,
+ -- adjust Sp to point to the last argument pushed, which
+ -- is cml_args, after popping any other junk from the stack.
+ CmmCall{ cml_cont = Nothing, .. } -> do
+ let sp_off = sp0 - cml_args
+ return ([], sp_off, last, [], mapEmpty)
+
+ -- At each CmmCall with a continuation:
+ CmmCall{ cml_cont = Just cont_lbl, .. } ->
+ return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off
+
+ CmmForeignCall{ succ = cont_lbl, .. } -> do
+ return $ lastCall cont_lbl wORD_SIZE wORD_SIZE (sm_ret_off stack0)
+ -- one word each for args and results: the return address
+
+ CmmBranch{..} -> handleProcPoints
+ CmmCondBranch{..} -> handleProcPoints
+ CmmSwitch{..} -> handleProcPoints
+
+ where
+ -- Calls and ForeignCalls are handled the same way:
+ lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
+ -> ( [CmmNode O O]
+ , ByteOff
+ , CmmNode O C
+ , [CmmBlock]
+ , BlockEnv StackMap
+ )
+ lastCall lbl cml_args cml_ret_args cml_ret_off
+ = ( assignments
+ , spOffsetForCall sp0 cont_stack cml_args
+ , last
+ , [] -- no new blocks
+ , mapSingleton lbl cont_stack )
+ where
+ (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off
+
+
+ prepareStack lbl cml_ret_args cml_ret_off
+ | Just cont_stack <- mapLookup lbl stackmaps
+ -- If we have already seen this continuation before, then
+ -- we just have to make the stack look the same:
+ = (fixupStack stack0 cont_stack, cont_stack)
+ -- Otherwise, we have to allocate the stack frame
+ | otherwise
+ = (save_assignments, new_cont_stack)
+ where
+ (new_cont_stack, save_assignments)
+ = setupStackFrame lbl liveness cml_ret_off cml_ret_args stack0
+
+
+ -- proc point, we have to set up the stack to match what the proc
+ -- point is expecting.
+ --
+ handleProcPoints :: UniqSM ( [CmmNode O O]
+ , ByteOff
+ , CmmNode O C
+ , [CmmBlock]
+ , BlockEnv StackMap )
+
+ handleProcPoints
+ -- Note [diamond proc point]
+ | Just l <- futureContinuation middle
+ , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
+ = do
+ let cont_args = mapFindWithDefault 0 l cont_info
+ (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0)
+ out = mapFromList [ (l', cont_stack)
+ | l' <- successors last ]
+ return ( assigs
+ , spOffsetForCall sp0 cont_stack wORD_SIZE
+ , last
+ , []
+ , out)
+
+ | otherwise = do
+ pps <- mapM handleProcPoint (successors last)
+ let lbl_map :: LabelMap Label
+ lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
+ fix_lbl l = mapLookup l lbl_map `orElse` l
+ return ( []
+ , 0
+ , mapSuccessors fix_lbl last
+ , concat [ blk | (_,_,_,blk) <- pps ]
+ , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
+
+ -- For each proc point that is a successor of this block
+ -- (a) if the proc point already has a stackmap, we need to
+ -- shuffle the current stack to make it look the same.
+ -- We have to insert a new block to make this happen.
+ -- (b) otherwise, call "allocate live stack0" to make the
+ -- stack map for the proc point
+ handleProcPoint :: BlockId
+ -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
+ handleProcPoint l
+ | not (l `setMember` procpoints) = return (l, l, stack0, [])
+ | otherwise = do
+ tmp_lbl <- liftM mkBlockId $ getUniqueM
+ let
+ (stack2, assigs) =
+ case mapLookup l stackmaps of
+ Just pp_sm -> (pp_sm, fixupStack stack0 pp_sm)
+ Nothing ->
+ pprTrace "first visit to proc point"
+ (ppr l <+> ppr stack1) $
+ (stack1, assigs)
+ where
+ cont_args = mapFindWithDefault 0 l cont_info
+ (stack1, assigs) =
+ setupStackFrame l liveness (sm_ret_off stack0)
+ cont_args stack0
+
+ sp_off = sp0 - sm_sp stack2
+
+ block = blockJoin (CmmEntry tmp_lbl)
+ (maybeAddSpAdj sp_off (blockFromList assigs))
+ (CmmBranch l)
+ --
+ return (l, tmp_lbl, stack2, [block])
+
+
+
+-- Sp is currently pointing to current_sp,
+-- we want it to point to
+-- (sm_sp cont_stack - sm_args cont_stack + args)
+-- so the difference is
+-- sp0 - (sm_sp cont_stack - sm_args cont_stack + args)
+spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff
+spOffsetForCall current_sp cont_stack args
+ = current_sp - (sm_sp cont_stack - sm_args cont_stack + args)
+
+
+-- | create a sequence of assignments to establish the new StackMap,
+-- given the old StackMap.
+fixupStack :: StackMap -> StackMap -> [CmmNode O O]
+fixupStack old_stack new_stack = concatMap move new_locs
+ where
+ old_map :: Map LocalReg ByteOff
+ old_map = Map.fromList (stackSlotRegs old_stack)
+ new_locs = stackSlotRegs new_stack
+
+ move (r,n)
+ | Just m <- Map.lookup r old_map, n == m = []
+ | otherwise = [CmmStore (CmmStackSlot Old n)
+ (CmmReg (CmmLocal r))]
+
+
+
+setupStackFrame
+ :: BlockId -- label of continuation
+ -> BlockEnv CmmLive -- liveness
+ -> ByteOff -- updfr
+ -> ByteOff -- bytes of return values on stack
+ -> StackMap -- current StackMap
+ -> (StackMap, [CmmNode O O])
+
+setupStackFrame lbl liveness updfr_off ret_args stack0
+ = (cont_stack, assignments)
+ where
+ -- get the set of LocalRegs live in the continuation
+ live = mapFindWithDefault Set.empty lbl liveness
+
+ -- the stack from the base to updfr_off is off-limits.
+ -- our new stack frame contains:
+ -- * saved live variables
+ -- * the return address [young(C) + 8]
+ -- * the args for the call,
+ -- which are replaced by the return values at the return
+ -- point.
+
+ -- everything up to updfr_off is off-limits
+ -- stack1 contains updfr_off, plus everything we need to save
+ (stack1, assignments) = allocate updfr_off live stack0
+
+ -- And the Sp at the continuation is:
+ -- sm_sp stack1 + ret_args
+ cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args
+ , sm_args = ret_args
+ , sm_ret_off = updfr_off
+ }
+
+
+-- -----------------------------------------------------------------------------
+-- Note [diamond proc point]
+--
+-- This special case looks for the pattern we get from a typical
+-- tagged case expression:
+--
+-- Sp[young(L1)] = L1
+-- if (R1 & 7) != 0 goto L1 else goto L2
+-- L2:
+-- call [R1] returns to L1
+-- L1: live: {y}
+-- x = R1
+--
+-- If we let the generic case handle this, we get
+--
+-- Sp[-16] = L1
+-- if (R1 & 7) != 0 goto L1a else goto L2
+-- L2:
+-- Sp[-8] = y
+-- Sp = Sp - 16
+-- call [R1] returns to L1
+-- L1a:
+-- Sp[-8] = y
+-- Sp = Sp - 16
+-- goto L1
+-- L1:
+-- x = R1
+--
+-- The code for saving the live vars is duplicated in each branch, and
+-- furthermore there is an extra jump in the fast path (assuming L1 is
+-- a proc point, which it probably is if there is a heap check).
+--
+-- So to fix this we want to set up the stack frame before the
+-- conditional jump. How do we know when to do this, and when it is
+-- safe? The basic idea is, when we see the assignment
+--
+-- Sp[young(L)] = L
+--
+-- we know that
+-- * we are definitely heading for L
+-- * there can be no more reads from another stack area, because young(L)
+-- overlaps with it.
+--
+-- We don't necessarily know that everything live at L is live now
+-- (some might be assigned between here and the jump to L). So we
+-- simplify and only do the optimisation when we see
+--
+-- (1) a block containing an assignment of a return address L
+-- (2) ending in a branch where one (and only) continuation goes to L,
+-- and no other continuations go to proc points.
+--
+-- then we allocate the stack frame for L at the end of the block,
+-- before the branch.
+--
+-- We could generalise (2), but that would make it a bit more
+-- complicated to handle, and this currently catches the common case.
+
+futureContinuation :: Block CmmNode O O -> Maybe BlockId
+futureContinuation middle = foldBlockNodesB f middle Nothing
+ where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
+ f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
+ = Just l
+ f _ r = r
+
+-- -----------------------------------------------------------------------------
+-- Saving live registers
+
+-- | Given a set of live registers and a StackMap, save all the registers
+-- on the stack and return the new StackMap and the assignments to do
+-- the saving.
+--
+allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O])
+allocate ret_off live stackmap@StackMap{ sm_sp = sp0
+ , sm_regs = regs0 }
+ =
+ pprTrace "allocate" (ppr live $$ ppr stackmap) $
+
+ -- we only have to save regs that are not already in a slot
+ let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
+ regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0
+ in
+
+ -- make a map of the stack
+ let stack = reverse $ Array.elems $
+ accumArray (\_ x -> x) Empty (1, toWords (max sp0 ret_off)) $
+ ret_words ++ live_words
+ where ret_words =
+ [ (x, Occupied)
+ | x <- [ 1 .. toWords ret_off] ]
+ live_words =
+ [ (toWords x, Occupied)
+ | (r,off) <- eltsUFM regs1,
+ let w = localRegBytes r,
+ x <- [ off, off-wORD_SIZE .. off - w + 1] ]
+ in
+
+ -- Pass over the stack: find slots to save all the new live variables,
+ -- choosing the oldest slots first (hence a foldr).
+ let
+ save slot ([], stack, n, assigs, regs) -- no more regs to save
+ = ([], slot:stack, n `plusW` 1, assigs, regs)
+ save slot (to_save, stack, n, assigs, regs)
+ = case slot of
+ Occupied -> (to_save, Occupied:stack, n `plusW` 1, assigs, regs)
+ Empty
+ | Just (stack', r, to_save') <-
+ select_save to_save (slot:stack)
+ -> let assig = CmmStore (CmmStackSlot Old n')
+ (CmmReg (CmmLocal r))
+ n' = n `plusW` 1
+ in
+ (to_save', stack', n', assig : assigs, (r,(r,n')):regs)
+
+ | otherwise
+ -> (to_save, slot:stack, n `plusW` 1, assigs, regs)
+
+ -- we should do better here: right now we'll fit the smallest first,
+ -- but it would make more sense to fit the biggest first.
+ select_save :: [LocalReg] -> [StackSlot]
+ -> Maybe ([StackSlot], LocalReg, [LocalReg])
+ select_save regs stack = go regs []
+ where go [] _no_fit = Nothing
+ go (r:rs) no_fit
+ | Just rest <- dropEmpty words stack
+ = Just (replicate words Occupied ++ rest, r, rs++no_fit)
+ | otherwise
+ = go rs (r:no_fit)
+ where words = localRegWords r
+
+ -- fill in empty slots as much as possible
+ (still_to_save, save_stack, n, save_assigs, save_regs)
+ = foldr save (to_save, [], 0, [], []) stack
+
+ -- push any remaining live vars on the stack
+ (push_sp, push_assigs, push_regs)
+ = foldr push (n, [], []) still_to_save
+ where
+ push r (n, assigs, regs)
+ = (n', assig : assigs, (r,(r,n')) : regs)
+ where
+ n' = n + localRegBytes r
+ assig = CmmStore (CmmStackSlot Old n')
+ (CmmReg (CmmLocal r))
+
+ trim_sp
+ | not (null push_regs) = push_sp
+ | otherwise
+ = n `plusW` (- length (takeWhile isEmpty save_stack))
+
+ final_regs = regs1 `addListToUFM` push_regs
+ `addListToUFM` save_regs
+
+ in
+ -- XXX should be an assert
+ if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
+
+ if (trim_sp .&. (wORD_SIZE - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else
+
+ ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
+ , push_assigs ++ save_assigs )
+
+
+-- -----------------------------------------------------------------------------
+-- Manifesting Sp
+
+-- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The
+-- block looks like this:
+--
+-- middle_pre -- the middle nodes
+-- Sp = Sp + sp_off -- Sp adjustment goes here
+-- last -- the last node
+--
+-- And we have some extra blocks too (that don't contain Sp adjustments)
+--
+-- The adjustment for middle_pre will be different from that for
+-- middle_post, because the Sp adjustment intervenes.
+--
+manifestSp
+ :: BlockEnv StackMap -- StackMaps for other blocks
+ -> StackMap -- StackMap for this block
+ -> ByteOff -- Sp on entry to the block
+ -> ByteOff -- SpHigh
+ -> CmmNode C O -- first node
+ -> [CmmNode O O] -- middle
+ -> ByteOff -- sp_off
+ -> CmmNode O C -- last node
+ -> [CmmBlock] -- new blocks
+ -> [CmmBlock] -- final blocks with Sp manifest
+
+manifestSp stackmaps stack0 sp0 sp_high
+ first middle_pre sp_off last fixup_blocks
+ = final_block : fixup_blocks'
+ where
+ area_off = getAreaOff stackmaps
+
+ adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
+ adj_pre_sp = mapExpDeep (areaToSp sp0 sp_high area_off)
+ adj_post_sp = mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off)
+
+ final_middle = maybeAddSpAdj sp_off $
+ blockFromList $
+ map adj_pre_sp $
+ elimStackStores stack0 stackmaps area_off $
+ middle_pre
+
+ final_last = optStackCheck (adj_post_sp last)
+
+ final_block = blockJoin first final_middle final_last
+
+ fixup_blocks' = map (blockMapNodes3 (id, adj_post_sp, id)) fixup_blocks
+
+
+getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc)
+getAreaOff _ Old = 0
+getAreaOff stackmaps (Young l) =
+ case mapLookup l stackmaps of
+ Just sm -> sm_sp sm - sm_args sm
+ Nothing -> pprPanic "getAreaOff" (ppr l)
+
+
+maybeAddSpAdj :: ByteOff -> Block CmmNode O O -> Block CmmNode O O
+maybeAddSpAdj 0 block = block
+maybeAddSpAdj sp_off block
+ = block `blockSnoc` CmmAssign spReg (cmmOffset (CmmReg spReg) sp_off)
+
+
+{-
+Sp(L) is the Sp offset on entry to block L relative to the base of the
+OLD area.
+
+SpArgs(L) is the size of the young area for L, i.e. the number of
+arguments.
+
+ - in block L, each reference to [old + N] turns into
+ [Sp + Sp(L) - N]
+
+ - in block L, each reference to [young(L') + N] turns into
+ [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
+
+ - be careful with the last node of each block: Sp has already been adjusted
+ to be Sp + Sp(L) - Sp(L')
+-}
+
+areaToSp :: ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
+areaToSp sp_old _sp_hwm area_off (CmmStackSlot area n) =
+ cmmOffset (CmmReg spReg) (sp_old - area_off area - n)
+areaToSp _ sp_hwm _ (CmmLit CmmHighStackMark) = CmmLit (mkIntCLit sp_hwm)
+areaToSp _ _ _ (CmmMachOp (MO_U_Lt _) -- Note [null stack check]
+ [CmmMachOp (MO_Sub _)
+ [ CmmReg (CmmGlobal Sp)
+ , CmmLit (CmmInt 0 _)],
+ CmmReg (CmmGlobal SpLim)]) = CmmLit (CmmInt 0 wordWidth)
+areaToSp _ _ _ other = other
+
+-- -----------------------------------------------------------------------------
+-- Note [null stack check]
+--
+-- If the high-water Sp is zero, then we end up with
+--
+-- if (Sp - 0 < SpLim) then .. else ..
+--
+-- and possibly some dead code for the failure case. Optimising this
+-- away depends on knowing that SpLim <= Sp, so it is really the job
+-- of the stack layout algorithm, hence we do it now. This is also
+-- convenient because control-flow optimisation later will drop the
+-- dead code.
+
+optStackCheck :: CmmNode O C -> CmmNode O C
+optStackCheck n = -- Note [null stack check]
+ case n of
+ CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false
+ other -> other
+
+
+-- -----------------------------------------------------------------------------
+
+-- | Eliminate stores of the form
+--
+-- Sp[area+n] = r
+--
+-- when we know that r is already in the same slot as Sp[area+n]. We
+-- could do this in a later optimisation pass, but that would involve
+-- a separate analysis and we already have the information to hand
+-- here. It helps clean up some extra stack stores in common cases.
+--
+-- Note that we may have to modify the StackMap as we walk through the
+-- code using procMiddle, since an assignment to a variable in the
+-- StackMap will invalidate its mapping there.
+--
+elimStackStores :: StackMap
+ -> BlockEnv StackMap
+ -> (Area -> ByteOff)
+ -> [CmmNode O O]
+ -> [CmmNode O O]
+elimStackStores stackmap stackmaps area_off nodes
+ = go stackmap nodes
+ where
+ go _stackmap [] = []
+ go stackmap (n:ns)
+ = case n of
+ CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
+ | Just (_,off) <- lookupUFM (sm_regs stackmap) r
+ , area_off area + m == off
+ -> pprTrace "eliminated a node!" (ppr r) $ go stackmap ns
+ _otherwise
+ -> n : go (procMiddle stackmaps n stackmap) ns
+
+
+-- -----------------------------------------------------------------------------
+-- Update info tables to include stack liveness
+
+
+setInfoTableStackMap :: BlockEnv StackMap -> CmmDecl -> CmmDecl
+setInfoTableStackMap stackmaps
+ (CmmProc top_info@TopInfo{..} l g@CmmGraph{g_entry = eid})
+ = CmmProc top_info{ info_tbl = fix_info info_tbl } l g
+ where
+ fix_info info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
+ info_tbl { cit_rep = StackRep (get_liveness eid) }
+ fix_info other = other
+
+ get_liveness :: BlockId -> Liveness
+ get_liveness lbl
+ = case mapLookup lbl stackmaps of
+ Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl)
+ Just sm -> stackMapToLiveness sm
+
+setInfoTableStackMap _ d = d
+
+
+stackMapToLiveness :: StackMap -> Liveness
+stackMapToLiveness StackMap{..} =
+ reverse $ Array.elems $
+ accumArray (\_ x -> x) True (toWords sm_ret_off + 1,
+ toWords (sm_sp - sm_args)) live_words
+ where
+ live_words = [ (toWords off, False)
+ | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ]
+
+
+-- -----------------------------------------------------------------------------
+-- Lowering safe foreign calls
+
+{-
+Note [lower safe foreign calls]
+
+We start with
+
+ Sp[young(L1)] = L1
+ ,-----------------------
+ | r1 = foo(x,y,z) returns to L1
+ '-----------------------
+ L1:
+ R1 = r1 -- copyIn, inserted by mkSafeCall
+ ...
+
+the stack layout algorithm will arrange to save and reload everything
+live across the call. Our job now is to expand the call so we get
+
+ Sp[young(L1)] = L1
+ ,-----------------------
+ | SAVE_THREAD_STATE()
+ | token = suspendThread(BaseReg, interruptible)
+ | r = foo(x,y,z)
+ | BaseReg = resumeThread(token)
+ | LOAD_THREAD_STATE()
+ | R1 = r -- copyOut
+ | jump L1
+ '-----------------------
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ ...
+
+Note the copyOut, which saves the results in the places that L1 is
+expecting them (see Note {safe foreign call convention]).
+-}
+
+lowerSafeForeignCall :: CmmBlock -> UniqSM CmmBlock
+lowerSafeForeignCall block
+ | (entry, middle, CmmForeignCall { .. }) <- blockSplit block
+ = do
+ -- Both 'id' and 'new_base' are KindNonPtr because they're
+ -- RTS-only objects and are not subject to garbage collection
+ id <- newTemp bWord
+ new_base <- newTemp (cmmRegType (CmmGlobal BaseReg))
+ let (caller_save, caller_load) = callerSaveVolatileRegs
+ load_tso <- newTemp gcWord
+ load_stack <- newTemp gcWord
+ let suspend = saveThreadState <*>
+ caller_save <*>
+ mkMiddle (callSuspendThread id intrbl)
+ midCall = mkUnsafeCall tgt res args
+ resume = mkMiddle (callResumeThread new_base id) <*>
+ -- Assign the result to BaseReg: we
+ -- might now have a different Capability!
+ mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
+ caller_load <*>
+ loadThreadState load_tso load_stack
+ -- Note: The successor must be a procpoint, and we have already split,
+ -- so we use a jump, not a branch.
+ succLbl = CmmLit (CmmLabel (infoTblLbl succ))
+
+ (ret_args, copyout) = copyOutOflow NativeReturn Jump (Young succ)
+ (map (CmmReg . CmmLocal) res)
+ updfr (0, [])
+
+ jump = CmmCall { cml_target = succLbl
+ , cml_cont = Just succ
+ , cml_args = widthInBytes wordWidth
+ , cml_ret_args = ret_args
+ , cml_ret_off = updfr }
+
+ graph' <- lgraphOfAGraph $ suspend <*>
+ midCall <*>
+ resume <*>
+ copyout <*>
+ mkLast jump
+
+ case toBlockList graph' of
+ [one] -> let (_, middle', last) = blockSplit one
+ in return (blockJoin entry (middle `blockAppend` middle') last)
+ _ -> panic "lowerSafeForeignCall0"
+
+ -- Block doesn't end in a safe foreign call:
+ | otherwise = return block
+
+
+foreignLbl :: FastString -> CmmExpr
+foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name))
+
+newTemp :: CmmType -> UniqSM LocalReg
+newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
+
+callSuspendThread :: LocalReg -> Bool -> CmmNode O O
+callSuspendThread id intrbl =
+ CmmUnsafeForeignCall
+ (ForeignTarget (foreignLbl (fsLit "suspendThread"))
+ (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint]))
+ [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))]
+
+callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
+callResumeThread new_base id =
+ CmmUnsafeForeignCall
+ (ForeignTarget (foreignLbl (fsLit "resumeThread"))
+ (ForeignConvention CCallConv [AddrHint] [AddrHint]))
+ [new_base] [CmmReg (CmmLocal id)]
+
+-- -----------------------------------------------------------------------------
+
+plusW :: ByteOff -> WordOff -> ByteOff
+plusW b w = b + w * wORD_SIZE
+
+dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
+dropEmpty 0 ss = Just ss
+dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
+dropEmpty _ _ = Nothing
+
+isEmpty :: StackSlot -> Bool
+isEmpty Empty = True
+isEmpty _ = False
+
+localRegBytes :: LocalReg -> ByteOff
+localRegBytes r = roundUpToWords (widthInBytes (typeWidth (localRegType r)))
+
+localRegWords :: LocalReg -> WordOff
+localRegWords = toWords . localRegBytes
+
+toWords :: ByteOff -> WordOff
+toWords x = x `quot` wORD_SIZE
+
+
+insertReloads :: StackMap -> [CmmNode O O]
+insertReloads stackmap =
+ [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp)
+ (localRegType r))
+ | (r,sp) <- stackSlotRegs stackmap
+ ]
+
+
+stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
+stackSlotRegs sm = eltsUFM (sm_regs sm)
+
+-- -----------------------------------------------------------------------------
+
+-- If we do this *before* stack layout, we might be able to avoid
+-- saving some things across calls/procpoints.
+--
+-- *but*, that will invalidate the liveness analysis, and we'll have
+-- to re-do it.
+
+cmmSink :: CmmGraph -> FuelUniqSM CmmGraph
+cmmSink graph = do
+ let liveness = cmmLiveness graph
+ return $ cmmSink' liveness graph
+
+cmmSink' :: BlockEnv CmmLive -> CmmGraph -> CmmGraph
+cmmSink' liveness graph
+ = ofBlockList (g_entry graph) $ sink mapEmpty $ postorderDfs graph
+ where
+
+ sink :: BlockEnv [(LocalReg, CmmExpr)] -> [CmmBlock] -> [CmmBlock]
+ sink _ [] = []
+ sink sunk (b:bs) =
+ pprTrace "sink" (ppr l) $
+ blockJoin first final_middle last : sink sunk' bs
+ where
+ l = entryLabel b
+ (first, middle, last) = blockSplit b
+ (middle', assigs) = walk (blockToList middle) emptyBlock
+ (mapFindWithDefault [] l sunk)
+
+ (dropped_last, assigs') = partition (`conflictsWithLast` last) assigs
+
+ final_middle = foldl blockSnoc middle' (toNodes dropped_last)
+
+ sunk' = mapUnion sunk $
+ mapFromList [ (l, filt assigs' (getLive l))
+ | l <- successors last ]
+ where
+ getLive l = mapFindWithDefault Set.empty l liveness
+ filt as live = [ (r,e) | (r,e) <- as, r `Set.member` live ]
+
+
+walk :: [CmmNode O O] -> Block CmmNode O O -> [(LocalReg, CmmExpr)]
+ -> (Block CmmNode O O, [(LocalReg, CmmExpr)])
+
+walk [] acc as = (acc, as)
+walk (n:ns) acc as
+ | Just a <- collect_it = walk ns acc (a:as)
+ | otherwise = walk ns (foldr (flip blockSnoc) acc (n:drop_nodes)) as'
+ where
+ collect_it = case n of
+ CmmAssign (CmmLocal r) e@(CmmReg (CmmGlobal _)) -> Just (r,e)
+-- CmmAssign (CmmLocal r) e@(CmmLoad addr _) |
+-- foldRegsUsed (\b r -> False) True addr -> Just (r,e)
+ _ -> Nothing
+
+ drop_nodes = toNodes dropped
+ (dropped, as') = partition should_drop as
+ where should_drop a = a `conflicts` n
+
+toNodes as = [ CmmAssign (CmmLocal r) rhs | (r,rhs) <- as ]
+
+-- We only sink "r = G" assignments right now, so conflicts is very simple:
+(r, rhs) `conflicts` CmmAssign reg _ | reg `regUsedIn` rhs = True
+--(r, CmmLoad _ _) `conflicts` CmmStore _ _ = True
+(r, _) `conflicts` node
+ = foldRegsUsed (\b r' -> r == r' || b) False node
+
+(r, _) `conflictsWithLast` node
+ = foldRegsUsed (\b r' -> r == r' || b) False node
diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs
index 01ebac6254..cd0558616e 100644
--- a/compiler/cmm/CmmLint.hs
+++ b/compiler/cmm/CmmLint.hs
@@ -1,67 +1,71 @@
-----------------------------------------------------------------------------
--
--- (c) The University of Glasgow 2004-2006
+-- (c) The University of Glasgow 2011
--
-- CmmLint: checking the correctness of Cmm statements and expressions
--
-----------------------------------------------------------------------------
-
-{-# OPTIONS -fno-warn-tabs #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and
--- detab the module (please do the detabbing in a separate patch). See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
--- for details
-
+{-# LANGUAGE GADTs #-}
module CmmLint (
- cmmLint, cmmLintTop
+ cmmLint, cmmLintDecl, cmmLintGraph
) where
+import Hoopl
+import Cmm
+import CmmUtils
+import PprCmm ()
import BlockId
-import OldCmm
+import FastString
import CLabel
import Outputable
-import OldPprCmm()
import Constants
-import FastString
-import Platform
import Data.Maybe
+-- Things to check:
+-- - invariant on CmmBlock in CmmExpr (see comment there)
+-- - check for branches to blocks that don't exist
+-- - check types
+
-- -----------------------------------------------------------------------------
-- Exported entry points:
cmmLint :: (Outputable d, Outputable h)
- => Platform -> GenCmmGroup d h (ListGraph CmmStmt) -> Maybe SDoc
-cmmLint platform tops = runCmmLint platform (mapM_ (lintCmmDecl platform)) tops
+ => GenCmmGroup d h (ListGraph CmmStmt) -> Maybe SDoc
+cmmLint tops = runCmmLint (mapM_ lintCmmDecl) tops
-cmmLintTop :: (Outputable d, Outputable h)
- => Platform -> GenCmmDecl d h (ListGraph CmmStmt) -> Maybe SDoc
-cmmLintTop platform top = runCmmLint platform (lintCmmDecl platform) top
+cmmLintGraph :: CmmGraph -> Maybe SDoc
+cmmLintGraph g = runCmmLint lintCmmGraph g
-runCmmLint :: Outputable a
- => Platform -> (a -> CmmLint b) -> a -> Maybe SDoc
-runCmmLint _ l p =
+runCmmLint :: Outputable a => (a -> CmmLint b) -> a -> Maybe SDoc
+runCmmLint l p =
case unCL (l p) of
- Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"),
- nest 2 err,
- ptext $ sLit ("Program was:"),
- nest 2 (ppr p)])
- Right _ -> Nothing
-
-lintCmmDecl :: Platform -> (GenCmmDecl h i (ListGraph CmmStmt)) -> CmmLint ()
-lintCmmDecl platform (CmmProc _ lbl (ListGraph blocks))
- = addLintInfo (text "in proc " <> pprCLabel platform lbl) $
- let labels = foldl (\s b -> setInsert (blockId b) s) setEmpty blocks
- in mapM_ (lintCmmBlock platform labels) blocks
-
-lintCmmDecl _ (CmmData {})
+ Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"),
+ nest 2 err,
+ ptext $ sLit ("Program was:"),
+ nest 2 (ppr p)])
+ Right _ -> Nothing
+
+lintCmmDecl :: GenCmmDecl h i CmmGraph -> CmmLint ()
+lintCmmDecl (CmmProc _ lbl g)
+ = addLintInfo (text "in proc " <> ppr lbl) $ lintCmmGraph g
+lintCmmDecl (CmmData {})
= return ()
-lintCmmBlock :: Platform -> BlockSet -> GenBasicBlock CmmStmt -> CmmLint ()
-lintCmmBlock platform labels (BasicBlock id stmts)
- = addLintInfo (text "in basic block " <> ppr id) $
- mapM_ (lintCmmStmt platform labels) stmts
+
+lintCmmGraph :: CmmGraph -> CmmLint ()
+lintCmmGraph g = mapM_ (lintCmmBlock labels) blocks
+ where
+ blocks = toBlockList g
+ labels = setFromList (map entryLabel blocks)
+
+
+lintCmmBlock :: BlockSet -> CmmBlock -> CmmLint ()
+lintCmmBlock labels block
+ = addLintInfo (\_ -> text "in basic block " <> ppr (entryLabel block)) $ do
+ let (_, middle, last) = blockSplit block
+ mapM_ lintCmmMiddle (blockToList middle)
+ lintCmmLast labels last
-- -----------------------------------------------------------------------------
-- lintCmmExpr
@@ -69,24 +73,24 @@ lintCmmBlock platform labels (BasicBlock id stmts)
-- Checks whether a CmmExpr is "type-correct", and check for obvious-looking
-- byte/word mismatches.
-lintCmmExpr :: Platform -> CmmExpr -> CmmLint CmmType
-lintCmmExpr platform (CmmLoad expr rep) = do
- _ <- lintCmmExpr platform expr
+lintCmmExpr :: CmmExpr -> CmmLint CmmType
+lintCmmExpr (CmmLoad expr rep) = do
+ _ <- lintCmmExpr expr
-- Disabled, if we have the inlining phase before the lint phase,
-- we can have funny offsets due to pointer tagging. -- EZY
-- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $
-- cmmCheckWordAddress expr
return rep
-lintCmmExpr platform expr@(CmmMachOp op args) = do
- tys <- mapM (lintCmmExpr platform) args
+lintCmmExpr expr@(CmmMachOp op args) = do
+ tys <- mapM lintCmmExpr args
if map (typeWidth . cmmExprType) args == machOpArgReps op
- then cmmCheckMachOp op args tys
- else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op)
-lintCmmExpr platform (CmmRegOff reg offset)
- = lintCmmExpr platform (CmmMachOp (MO_Add rep)
- [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)])
+ then cmmCheckMachOp op args tys
+ else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op)
+lintCmmExpr (CmmRegOff reg offset)
+ = lintCmmExpr (CmmMachOp (MO_Add rep)
+ [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)])
where rep = typeWidth (cmmRegType reg)
-lintCmmExpr _ expr =
+lintCmmExpr expr =
return (cmmExprType expr)
-- Check for some common byte/word mismatches (eg. Sp + 1)
@@ -119,43 +123,61 @@ notNodeReg :: CmmExpr -> Bool
notNodeReg (CmmReg reg) | reg == nodeReg = False
notNodeReg _ = True
-lintCmmStmt :: Platform -> BlockSet -> CmmStmt -> CmmLint ()
-lintCmmStmt platform labels = lint
- where lint (CmmNop) = return ()
- lint (CmmComment {}) = return ()
- lint stmt@(CmmAssign reg expr) = do
- erep <- lintCmmExpr platform expr
- let reg_ty = cmmRegType reg
+lintCmmMiddle :: CmmNode O O -> CmmLint ()
+lintCmmMiddle node = case node of
+ CmmComment _ -> return ()
+
+ CmmAssign reg expr -> do
+ erep <- lintCmmExpr expr
+ let reg_ty = cmmRegType reg
if (erep `cmmEqType_ignoring_ptrhood` reg_ty)
then return ()
- else cmmLintAssignErr stmt erep reg_ty
- lint (CmmStore l r) = do
- _ <- lintCmmExpr platform l
- _ <- lintCmmExpr platform r
+ else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
+
+ CmmStore l r -> do
+ _ <- lintCmmExpr l
+ _ <- lintCmmExpr r
return ()
- lint (CmmCall target _res args _) =
- do lintTarget platform labels target
- mapM_ (lintCmmExpr platform . hintlessCmm) args
- lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr platform e >> checkCond e
- lint (CmmSwitch e branches) = do
+
+ CmmUnsafeForeignCall target _formals actuals -> do
+ lintTarget target
+ mapM_ lintCmmExpr actuals
+
+
+lintCmmLast :: BlockSet -> CmmNode O C -> CmmLint ()
+lintCmmLast labels node = case node of
+ CmmBranch id -> checkTarget id
+
+ CmmCondBranch e t f -> do
+ mapM_ checkTarget [t,f]
+ _ <- lintCmmExpr e
+ checkCond e
+
+ CmmSwitch e branches -> do
mapM_ checkTarget $ catMaybes branches
- erep <- lintCmmExpr platform e
+ erep <- lintCmmExpr e
if (erep `cmmEqType_ignoring_ptrhood` bWord)
then return ()
- else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e <>
- text " :: " <> ppr erep)
- lint (CmmJump e _) = lintCmmExpr platform e >> return ()
- lint (CmmReturn) = return ()
- lint (CmmBranch id) = checkTarget id
- checkTarget id = if setMember id labels then return ()
- else cmmLintErr (text "Branch to nonexistent id" <+> ppr id)
-
-lintTarget :: Platform -> BlockSet -> CmmCallTarget -> CmmLint ()
-lintTarget platform _ (CmmCallee e _) = do _ <- lintCmmExpr platform e
- return ()
-lintTarget _ _ (CmmPrim _ Nothing) = return ()
-lintTarget platform labels (CmmPrim _ (Just stmts))
- = mapM_ (lintCmmStmt platform labels) stmts
+ else cmmLintErr (text "switch scrutinee is not a word: " <>
+ ppr e <> text " :: " <> ppr erep)
+
+ CmmCall { cml_target = target, cml_cont = cont } -> do
+ _ <- lintCmmExpr target
+ maybe (return ()) checkTarget cont
+
+ CmmForeignCall tgt _ args succ _ _ -> do
+ lintTarget tgt
+ mapM_ lintCmmExpr args
+ checkTarget succ
+ where
+ checkTarget id
+ | setMember id labels = return ()
+ | otherwise = cmmLintErr (\_ -> text "Branch to nonexistent id" <+> ppr id)
+
+
+lintTarget :: ForeignTarget -> CmmLint ()
+lintTarget (ForeignTarget e _) = lintCmmExpr e >> return ()
+lintTarget (PrimTarget {}) = return ()
checkCond :: CmmExpr -> CmmLint ()
@@ -163,7 +185,7 @@ checkCond (CmmMachOp mop _) | isComparisonMachOp mop = return ()
checkCond (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth = return () -- constant values
checkCond expr
= cmmLintErr (hang (text "expression is not a conditional:") 2
- (ppr expr))
+ (ppr expr))
-- -----------------------------------------------------------------------------
-- CmmLint monad
@@ -173,37 +195,36 @@ checkCond expr
newtype CmmLint a = CmmLint { unCL :: Either SDoc a }
instance Monad CmmLint where
- CmmLint m >>= k = CmmLint $ case m of
- Left e -> Left e
- Right a -> unCL (k a)
- return a = CmmLint (Right a)
+ CmmLint m >>= k = CmmLint $ \p -> case m p of
+ Left e -> Left e
+ Right a -> unCL (k a) p
+ return a = CmmLint (\_ -> Right a)
cmmLintErr :: SDoc -> CmmLint a
-cmmLintErr msg = CmmLint (Left msg)
+cmmLintErr msg = CmmLint (\p -> Left (msg p))
addLintInfo :: SDoc -> CmmLint a -> CmmLint a
-addLintInfo info thing = CmmLint $
- case unCL thing of
- Left err -> Left (hang info 2 err)
- Right a -> Right a
+addLintInfo info thing = CmmLint $ \p ->
+ case unCL thing p of
+ Left err -> Left (hang (info p) 2 err)
+ Right a -> Right a
cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a
cmmLintMachOpErr expr argsRep opExpectsRep
- = cmmLintErr (text "in MachOp application: " $$
- nest 2 (ppr expr) $$
- (text "op is expecting: " <+> ppr opExpectsRep) $$
- (text "arguments provide: " <+> ppr argsRep))
+ = cmmLintErr (text "in MachOp application: " $$
+ nest 2 (ppr expr) $$
+ (text "op is expecting: " <+> ppr opExpectsRep) $$
+ (text "arguments provide: " <+> ppr argsRep))
-cmmLintAssignErr :: CmmStmt -> CmmType -> CmmType -> CmmLint a
+cmmLintAssignErr :: CmmNode e x -> CmmType -> CmmType -> CmmLint a
cmmLintAssignErr stmt e_ty r_ty
- = cmmLintErr (text "in assignment: " $$
- nest 2 (vcat [ppr stmt,
- text "Reg ty:" <+> ppr r_ty,
- text "Rhs ty:" <+> ppr e_ty]))
-
-
+ = cmmLintErr (text "in assignment: " $$
+ nest 2 (vcat [ppr stmt,
+ text "Reg ty:" <+> ppr r_ty,
+ text "Rhs ty:" <+> ppr e_ty]))
+
cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a
cmmLintDubiousWordOffset expr
= cmmLintErr (text "offset is not a multiple of words: " $$
- nest 2 (ppr expr))
+ nest 2 (ppr expr))
diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs
index 9a5bb2d5ae..ac9c38b448 100644
--- a/compiler/cmm/CmmLive.hs
+++ b/compiler/cmm/CmmLive.hs
@@ -18,10 +18,9 @@ import Control.Monad
import OptimizationFuel
import PprCmmExpr ()
-import Compiler.Hoopl
+import Hoopl
import Maybes
import Outputable
-import UniqSet
-----------------------------------------------------------------------------
-- Calculating what variables are live on entry to a basic block
@@ -33,8 +32,10 @@ type CmmLive = RegSet
-- | The dataflow lattice
liveLattice :: DataflowLattice CmmLive
liveLattice = DataflowLattice "live LocalReg's" emptyRegSet add
- where add _ (OldFact old) (NewFact new) = case unionUniqSets old new of
- join -> (changeIf $ sizeUniqSet join > sizeUniqSet old, join)
+ where add _ (OldFact old) (NewFact new) =
+ (changeIf $ sizeRegSet join > sizeRegSet old, join)
+ where !join = plusRegSet old new
+
-- | A mapping from block labels to the variables live on entry
type BlockEntryLiveness = BlockEnv CmmLive
@@ -43,16 +44,17 @@ type BlockEntryLiveness = BlockEnv CmmLive
-- | Calculated liveness info for a CmmGraph
-----------------------------------------------------------------------------
-cmmLiveness :: CmmGraph -> FuelUniqSM BlockEntryLiveness
+cmmLiveness :: CmmGraph -> BlockEntryLiveness
cmmLiveness graph =
- liftM check $ liftM snd $ dataflowPassBwd graph [] $ analBwd liveLattice xferLive
+ check $ dataflowAnalBwd graph [] $ analBwd liveLattice xferLive
where entry = g_entry graph
- check facts = noLiveOnEntry entry (expectJust "check" $ mapLookup entry facts) facts
+ check facts = noLiveOnEntry entry
+ (expectJust "check" $ mapLookup entry facts) facts
-- | On entry to the procedure, there had better not be any LocalReg's live-in.
noLiveOnEntry :: BlockId -> CmmLive -> a -> a
noLiveOnEntry bid in_fact x =
- if isEmptyUniqSet in_fact then x
+ if nullRegSet in_fact then x
else pprPanic "LocalReg's live-in to graph" (ppr bid <+> ppr in_fact)
-- | The transfer equations use the traditional 'gen' and 'kill'
@@ -60,42 +62,42 @@ noLiveOnEntry bid in_fact x =
gen :: UserOfLocalRegs a => a -> RegSet -> RegSet
gen a live = foldRegsUsed extendRegSet live a
kill :: DefinerOfLocalRegs a => a -> RegSet -> RegSet
-kill a live = foldRegsDefd delOneFromUniqSet live a
+kill a live = foldRegsDefd deleteFromRegSet live a
-gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a) => a -> CmmLive -> CmmLive
+gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a)
+ => a -> CmmLive -> CmmLive
gen_kill a = gen a . kill a
-- | The transfer function
--- EZY: Bits of this analysis are duplicated in CmmSpillReload, though
--- it's not really easy to efficiently reuse all of this. Keep in mind
--- if you need to update this analysis.
xferLive :: BwdTransfer CmmNode CmmLive
xferLive = mkBTransfer3 fst mid lst
where fst _ f = f
mid :: CmmNode O O -> CmmLive -> CmmLive
mid n f = gen_kill n f
lst :: CmmNode O C -> FactBase CmmLive -> CmmLive
- -- slightly inefficient: kill is unnecessary for emptyRegSet
- lst n f = gen_kill n
- $ case n of CmmCall{} -> emptyRegSet
- CmmForeignCall{} -> emptyRegSet
- _ -> joinOutFacts liveLattice n f
+ lst n f = gen_kill n $ joinOutFacts liveLattice n f
-----------------------------------------------------------------------------
-- Removing assignments to dead variables
-----------------------------------------------------------------------------
-removeDeadAssignments :: CmmGraph -> FuelUniqSM CmmGraph
+removeDeadAssignments :: CmmGraph -> FuelUniqSM (CmmGraph, BlockEnv CmmLive)
removeDeadAssignments g =
- liftM fst $ dataflowPassBwd g [] $ analRewBwd liveLattice xferLive rewrites
- where rewrites = deepBwdRw3 nothing middle nothing
- -- Beware: deepBwdRw with one polymorphic function seems more reasonable here,
- -- but GHC panics while compiling, see bug #4045.
+ dataflowPassBwd g [] $ analRewBwd liveLattice xferLive rewrites
+ where rewrites = mkBRewrite3 nothing middle nothing
+ -- SDM: no need for deepBwdRw here, we only rewrite to empty
+ -- Beware: deepBwdRw with one polymorphic function seems more
+ -- reasonable here, but GHC panics while compiling, see bug
+ -- #4045.
middle :: CmmNode O O -> Fact O CmmLive -> CmmReplGraph O O
- middle (CmmAssign (CmmLocal reg') _) live | not (reg' `elemRegSet` live) = return $ Just emptyGraph
+ middle (CmmAssign (CmmLocal reg') _) live
+ | not (reg' `elemRegSet` live)
+ = return $ Just emptyGraph
-- XXX maybe this should be somewhere else...
- middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs = return $ Just emptyGraph
- middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs = return $ Just emptyGraph
+ middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs
+ = return $ Just emptyGraph
+ middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs
+ = return $ Just emptyGraph
middle _ _ = return Nothing
nothing :: CmmNode e x -> Fact x CmmLive -> CmmReplGraph e x
diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs
index 4844af9d9a..cd46794580 100644
--- a/compiler/cmm/CmmNode.hs
+++ b/compiler/cmm/CmmNode.hs
@@ -18,7 +18,7 @@ module CmmNode (
CmmNode(..), ForeignHint(..), CmmFormal, CmmActual,
UpdFrameOffset, Convention(..), ForeignConvention(..), ForeignTarget(..),
mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf,
- mapExpM, mapExpDeepM, wrapRecExpM
+ mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors
) where
import CmmExpr
@@ -35,15 +35,17 @@ import Prelude hiding (succ)
------------------------
-- CmmNode
+#define ULabel {-# UNPACK #-} !Label
+
data CmmNode e x where
- CmmEntry :: Label -> CmmNode C O
+ CmmEntry :: ULabel -> CmmNode C O
CmmComment :: FastString -> CmmNode O O
- CmmAssign :: CmmReg -> CmmExpr -> CmmNode O O
+ CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O
-- Assign to register
- CmmStore :: CmmExpr -> CmmExpr -> CmmNode O O
+ CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O
-- Assign to memory location. Size is
-- given by cmmExprType of the rhs.
@@ -60,11 +62,12 @@ data CmmNode e x where
-- bug for what can be put in arguments, see
-- Note [Register Parameter Passing]
- CmmBranch :: Label -> CmmNode O C -- Goto another block in the same procedure
+ CmmBranch :: ULabel -> CmmNode O C
+ -- Goto another block in the same procedure
CmmCondBranch :: { -- conditional branch
cml_pred :: CmmExpr,
- cml_true, cml_false :: Label
+ cml_true, cml_false :: ULabel
} -> CmmNode O C
CmmSwitch :: CmmExpr -> [Maybe Label] -> CmmNode O C -- Table branch
@@ -78,6 +81,11 @@ data CmmNode e x where
cml_cont :: Maybe Label,
-- Label of continuation (Nothing for return or tail call)
+ --
+ -- Note [Continuation BlockId]: these BlockIds are called
+ -- Continuation BlockIds, and are the only BlockIds that can
+ -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or
+ -- (CmmStackSlot (Young b) _).
-- ToDO: add this:
-- cml_args_regs :: [GlobalReg],
@@ -117,7 +125,7 @@ data CmmNode e x where
tgt :: ForeignTarget, -- call target and convention
res :: [CmmFormal], -- zero or more results
args :: [CmmActual], -- zero or more arguments; see Note [Register parameter passing]
- succ :: Label, -- Label of continuation
+ succ :: ULabel, -- Label of continuation
updfr :: UpdFrameOffset, -- where the update frame is (for building infotable)
intrbl:: Bool -- whether or not the call is interruptible
} -> CmmNode O C
@@ -218,14 +226,6 @@ data Convention
| GC -- Entry to the garbage collector: uses the node reg!
| PrimOpCall -- Calling prim ops
| PrimOpReturn -- Returning from prim ops
- | Foreign -- Foreign call/return
- ForeignConvention
- | Private
- -- Used for control transfers within a (pre-CPS) procedure All
- -- jump sites known, never pushed on the stack (hence no SRT)
- -- You can choose whatever calling convention you please
- -- (provided you make sure all the call sites agree)!
- -- This data type eventually to be extended to record the convention.
deriving( Eq )
data ForeignConvention
@@ -283,37 +283,6 @@ instance DefinerOfLocalRegs (CmmNode e x) where
fold f z n = foldRegsDefd f z n
-instance UserOfSlots (CmmNode e x) where
- foldSlotsUsed f z n = case n of
- CmmAssign _ expr -> fold f z expr
- CmmStore addr rval -> fold f (fold f z addr) rval
- CmmUnsafeForeignCall _ _ args -> fold f z args
- CmmCondBranch expr _ _ -> fold f z expr
- CmmSwitch expr _ -> fold f z expr
- CmmCall {cml_target=tgt} -> fold f z tgt
- CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args
- _ -> z
- where fold :: forall a b.
- UserOfSlots a =>
- (b -> SubArea -> b) -> b -> a -> b
- fold f z n = foldSlotsUsed f z n
-
-instance UserOfSlots ForeignTarget where
- foldSlotsUsed f z (ForeignTarget e _) = foldSlotsUsed f z e
- foldSlotsUsed _f z (PrimTarget _) = z
-
-instance DefinerOfSlots (CmmNode e x) where
- foldSlotsDefd f z n = case n of
- CmmStore (CmmStackSlot a i) expr -> f z (a, i, widthInBytes $ typeWidth $ cmmExprType expr)
- CmmForeignCall {res=res} -> fold f z $ map foreign_call_slot res
- _ -> z
- where
- fold :: forall a b.
- DefinerOfSlots a =>
- (b -> SubArea -> b) -> b -> a -> b
- fold f z n = foldSlotsDefd f z n
- foreign_call_slot r = case widthInBytes $ typeWidth $ localRegType r of w -> (RegSlot r, w, w)
-
-----------------------------------
-- mapping Expr in CmmNode
@@ -416,4 +385,20 @@ foldExp f (CmmCall {cml_target=tgt}) z = f tgt z
foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args
foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z
-foldExpDeep f = foldExp $ wrapRecExpf f
+foldExpDeep f = foldExp go
+ where -- go :: CmmExpr -> z -> z
+ go e@(CmmMachOp _ es) z = gos es $! f e z
+ go e@(CmmLoad addr _) z = go addr $! f e z
+ go e z = f e z
+
+ gos [] z = z
+ gos (e:es) z = gos es $! f e z
+
+-- -----------------------------------------------------------------------------
+
+mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C
+mapSuccessors f (CmmBranch bid) = CmmBranch (f bid)
+mapSuccessors f (CmmCondBranch p y n) = CmmCondBranch p (f y) (f n)
+mapSuccessors f (CmmSwitch e arms) = CmmSwitch e (map (fmap f) arms)
+mapSuccessors f n = n
+
diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs
index 8cc18fc1ca..7c7ed393d9 100644
--- a/compiler/cmm/CmmOpt.hs
+++ b/compiler/cmm/CmmOpt.hs
@@ -145,7 +145,7 @@ To inline _smi:
-}
countUses :: UserOfLocalRegs a => a -> UniqFM Int
-countUses a = foldRegsUsed (\m r -> addToUFM m r (count m r + 1)) emptyUFM a
+countUses a = foldRegsUsed (\m r -> addToUFM_C (+) m r 1) emptyUFM a
where count m r = lookupWithDefaultUFM m (0::Int) r
cmmMiniInline :: DynFlags -> [CmmBasicBlock] -> [CmmBasicBlock]
@@ -157,25 +157,16 @@ cmmMiniInlineStmts :: DynFlags -> UniqFM Int -> [CmmStmt] -> [CmmStmt]
cmmMiniInlineStmts _ _ [] = []
cmmMiniInlineStmts dflags uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)
-- not used: just discard this assignment
- | Nothing <- lookupUFM uses u
- = cmmMiniInlineStmts dflags uses stmts
+ | 0 <- lookupWithDefaultUFM uses 0 u
+ = cmmMiniInlineStmts uses stmts
- -- used (literal): try to inline at all the use sites
- | Just n <- lookupUFM uses u, isLit expr
- =
- ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $
- case lookForInlineLit u expr stmts of
- (m, stmts')
- | n == m -> cmmMiniInlineStmts dflags (delFromUFM uses u) stmts'
- | otherwise ->
- stmt : cmmMiniInlineStmts dflags (adjustUFM (\x -> x - m) uses u) stmts'
-
- -- used (foldable to literal): try to inline at all the use sites
+ -- used (foldable to small thing): try to inline at all the use sites
| Just n <- lookupUFM uses u,
- e@(CmmLit _) <- wrapRecExp foldExp expr
+ e <- wrapRecExp foldExp expr,
+ isTiny e
=
- ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $
- case lookForInlineLit u e stmts of
+ ncgDebugTrace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $
+ case lookForInlineMany u e stmts of
(m, stmts')
| n == m -> cmmMiniInlineStmts dflags (delFromUFM uses u) stmts'
| otherwise ->
@@ -188,6 +179,11 @@ cmmMiniInlineStmts dflags uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr)
ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $
cmmMiniInlineStmts dflags uses stmts'
where
+ isTiny (CmmLit _) = True
+ isTiny (CmmReg (CmmGlobal _)) = True
+ -- not CmmLocal: that might invalidate the usage analysis results
+ isTiny _ = False
+
platform = targetPlatform dflags
foldExp (CmmMachOp op args) = cmmMachOpFold platform op args
foldExp e = e
@@ -201,26 +197,28 @@ cmmMiniInlineStmts platform uses (stmt:stmts)
-- register, and a list of statements. Inlines the expression at all
-- use sites of the register. Returns the number of substituations
-- made and the, possibly modified, list of statements.
-lookForInlineLit :: Unique -> CmmExpr -> [CmmStmt] -> (Int, [CmmStmt])
-lookForInlineLit _ _ [] = (0, [])
-lookForInlineLit u expr stmts@(stmt : rest)
- | Just n <- lookupUFM (countUses stmt) u
- = case lookForInlineLit u expr rest of
- (m, stmts) -> let z = n + m
- in z `seq` (z, inlineStmt u expr stmt : stmts)
-
- | ok_to_skip
- = case lookForInlineLit u expr rest of
+lookForInlineMany :: Unique -> CmmExpr -> [CmmStmt] -> (Int, [CmmStmt])
+lookForInlineMany u expr stmts = lookForInlineMany' u expr regset stmts
+ where regset = foldRegsUsed extendRegSet emptyRegSet expr
+
+lookForInlineMany' :: Unique -> CmmExpr -> RegSet -> [CmmStmt] -> (Int, [CmmStmt])
+lookForInlineMany' _ _ _ [] = (0, [])
+lookForInlineMany' u expr regset stmts@(stmt : rest)
+ | Just n <- lookupUFM (countUses stmt) u, okToInline expr stmt
+ = let stmt' = inlineStmt u expr stmt in
+ if okToSkip stmt' u expr regset
+ then case lookForInlineMany' u expr regset rest of
+ (m, stmts) -> let z = n + m
+ in z `seq` (z, stmt' : stmts)
+ else (n, stmt' : rest)
+
+ | okToSkip stmt u expr regset
+ = case lookForInlineMany' u expr regset rest of
(n, stmts) -> (n, stmt : stmts)
| otherwise
= (0, stmts)
- where
- -- We skip over assignments to registers, unless the register
- -- being assigned to is the one we're inlining.
- ok_to_skip = case stmt of
- CmmAssign (CmmLocal (LocalReg u' _)) _ | u' == u -> False
- _other -> True
+
lookForInline :: Unique -> CmmExpr -> [CmmStmt] -> Maybe [CmmStmt]
lookForInline u expr stmts = lookForInline' u expr regset stmts
@@ -229,10 +227,10 @@ lookForInline u expr stmts = lookForInline' u expr regset stmts
lookForInline' :: Unique -> CmmExpr -> RegSet -> [CmmStmt] -> Maybe [CmmStmt]
lookForInline' _ _ _ [] = panic "lookForInline' []"
lookForInline' u expr regset (stmt : rest)
- | Just 1 <- lookupUFM (countUses stmt) u, ok_to_inline
+ | Just 1 <- lookupUFM (countUses stmt) u, okToInline expr stmt
= Just (inlineStmt u expr stmt : rest)
- | ok_to_skip
+ | okToSkip stmt u expr regset
= case lookForInline' u expr regset rest of
Nothing -> Nothing
Just stmts -> Just (stmt:stmts)
@@ -240,31 +238,36 @@ lookForInline' u expr regset (stmt : rest)
| otherwise
= Nothing
- where
- -- we don't inline into CmmCall if the expression refers to global
- -- registers. This is a HACK to avoid global registers clashing with
- -- C argument-passing registers, really the back-end ought to be able
- -- to handle it properly, but currently neither PprC nor the NCG can
- -- do it. See also CgForeignCall:load_args_into_temps.
- ok_to_inline = case stmt of
- CmmCall{} -> hasNoGlobalRegs expr
- _ -> True
-
- -- Expressions aren't side-effecting. Temporaries may or may not
- -- be single-assignment depending on the source (the old code
- -- generator creates single-assignment code, but hand-written Cmm
- -- and Cmm from the new code generator is not single-assignment.)
- -- So we do an extra check to make sure that the register being
- -- changed is not one we were relying on. I don't know how much of a
- -- performance hit this is (we have to create a regset for every
- -- instruction.) -- EZY
- ok_to_skip = case stmt of
- CmmNop -> True
- CmmComment{} -> True
- CmmAssign (CmmLocal r@(LocalReg u' _)) _rhs | u' /= u && not (r `elemRegSet` regset) -> True
- CmmAssign g@(CmmGlobal _) _rhs -> not (g `regUsedIn` expr)
- _other -> False
+-- we don't inline into CmmCall if the expression refers to global
+-- registers. This is a HACK to avoid global registers clashing with
+-- C argument-passing registers, really the back-end ought to be able
+-- to handle it properly, but currently neither PprC nor the NCG can
+-- do it. See also CgForeignCall:load_args_into_temps.
+okToInline :: CmmExpr -> CmmStmt -> Bool
+okToInline expr CmmCall{} = hasNoGlobalRegs expr
+okToInline _ _ = True
+
+-- Expressions aren't side-effecting. Temporaries may or may not
+-- be single-assignment depending on the source (the old code
+-- generator creates single-assignment code, but hand-written Cmm
+-- and Cmm from the new code generator is not single-assignment.)
+-- So we do an extra check to make sure that the register being
+-- changed is not one we were relying on. I don't know how much of a
+-- performance hit this is (we have to create a regset for every
+-- instruction.) -- EZY
+okToSkip stmt u expr regset
+ = case stmt of
+ CmmNop -> True
+ CmmComment{} -> True
+ CmmAssign (CmmLocal r@(LocalReg u' _)) _rhs | u' /= u && not (r `elemRegSet` regset) -> True
+ CmmAssign g@(CmmGlobal _) _rhs -> not (g `regUsedIn` expr)
+ CmmStore _ _ -> not_a_load expr
+ _other -> False
+ where
+ not_a_load (CmmMachOp _ args) = all not_a_load args
+ not_a_load (CmmLoad _ _) = False
+ not_a_load _ = True
inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index 075ed22ea9..f46d49e022 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -230,35 +230,31 @@ lits :: { [ExtFCode CmmExpr] }
cmmproc :: { ExtCode }
-- TODO: add real SRT/info tables to parsed Cmm
- : info maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}'
- { do ((entry_ret_label, info, live, formals, gc_block, frame), stmts) <-
+ : info maybe_formals_without_hints '{' body '}'
+ { do ((entry_ret_label, info, live, formals), stmts) <-
getCgStmtsEC' $ loopDecls $ do {
(entry_ret_label, info, live) <- $1;
formals <- sequence $2;
- gc_block <- $3;
- frame <- $4;
- $6;
- return (entry_ret_label, info, live, formals, gc_block, frame) }
+ $4;
+ return (entry_ret_label, info, live, formals) }
blks <- code (cgStmtsToBlocks stmts)
- code (emitInfoTableAndCode entry_ret_label (CmmInfo gc_block frame info) formals blks) }
+ code (emitInfoTableAndCode entry_ret_label info formals blks) }
| info maybe_formals_without_hints ';'
{ do (entry_ret_label, info, live) <- $1;
formals <- sequence $2;
- code (emitInfoTableAndCode entry_ret_label (CmmInfo Nothing Nothing info) formals []) }
+ code (emitInfoTableAndCode entry_ret_label info formals []) }
- | NAME maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}'
+ | NAME maybe_formals_without_hints '{' body '}'
{% withThisPackage $ \pkg ->
do newFunctionName $1 pkg
- ((formals, gc_block, frame), stmts) <-
+ (formals, stmts) <-
getCgStmtsEC' $ loopDecls $ do {
formals <- sequence $2;
- gc_block <- $3;
- frame <- $4;
- $6;
- return (formals, gc_block, frame) }
+ $4;
+ return formals }
blks <- code (cgStmtsToBlocks stmts)
- code (emitProc (CmmInfo gc_block frame CmmNonInfoTable) (mkCmmCodeLabel pkg $1) formals blks) }
+ code (emitProc CmmNonInfoTable (mkCmmCodeLabel pkg $1) formals blks) }
info :: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) }
: 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
@@ -599,18 +595,7 @@ formals_without_hints :: { [ExtFCode LocalReg] }
formal_without_hint :: { ExtFCode LocalReg }
: type NAME { newLocal $1 $2 }
-maybe_frame :: { ExtFCode (Maybe UpdateFrame) }
- : {- empty -} { return Nothing }
- | 'jump' expr '(' exprs0 ')' { do { target <- $2;
- args <- sequence $4;
- return $ Just (UpdateFrame target args) } }
-
-maybe_gc_block :: { ExtFCode (Maybe BlockId) }
- : {- empty -} { return Nothing }
- | 'goto' NAME
- { do l <- lookupLabel $2; return (Just l) }
-
-type :: { CmmType }
+type :: { CmmType }
: 'bits8' { b8 }
| typenot8 { $1 }
@@ -1073,7 +1058,8 @@ parseCmmFile dflags filename = do
let msg = mkPlainErrMsg dflags span err
return ((emptyBag, unitBag msg), Nothing)
POk pst code -> do
- cmm <- initC dflags no_module (getCmm (unEC code initEnv [] >> return ()))
+ st <- initC
+ let (cmm,_) = runC dflags no_module st (getCmm (unEC code initEnv [] >> return ()))
let ms = getMessages pst
if (errorsFound dflags ms)
then return (ms, Nothing)
diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs
index 7361bbf385..adc27ab1ff 100644
--- a/compiler/cmm/CmmPipeline.hs
+++ b/compiler/cmm/CmmPipeline.hs
@@ -11,15 +11,17 @@ module CmmPipeline (
import CLabel
import Cmm
+import CmmLint
import CmmLive
import CmmBuildInfoTables
import CmmCommonBlockElim
import CmmProcPoint
-import CmmSpillReload
import CmmRewriteAssignments
-import CmmStackLayout
import CmmContFlowOpt
import OptimizationFuel
+import CmmLayoutStack
+import Hoopl
+import CmmUtils
import DynFlags
import ErrUtils
@@ -28,6 +30,8 @@ import Data.Maybe
import Control.Monad
import Data.Map (Map)
import qualified Data.Map as Map
+import Data.Set (Set)
+import qualified Data.Set as Set
import Outputable
import StaticFlags
@@ -53,32 +57,28 @@ import StaticFlags
-- we actually need to do the initial pass.
cmmPipeline :: HscEnv -- Compilation env including
-- dynamic flags: -dcmm-lint -ddump-cps-cmm
- -> (TopSRT, [CmmGroup]) -- SRT table and accumulating list of compiled procs
+ -> TopSRT -- SRT table and accumulating list of compiled procs
-> CmmGroup -- Input C-- with Procedures
- -> IO (TopSRT, [CmmGroup]) -- Output CPS transformed C--
-cmmPipeline hsc_env (topSRT, rst) prog =
+ -> IO (TopSRT, CmmGroup) -- Output CPS transformed C--
+cmmPipeline hsc_env topSRT prog =
do let dflags = hsc_dflags hsc_env
--
showPass dflags "CPSZ"
- let tops = runCmmContFlowOpts prog
- (cafEnvs, tops) <- liftM unzip $ mapM (cpsTop hsc_env) tops
+ (cafEnvs, tops) <- {-# SCC "tops" #-} liftM unzip $ mapM (cpsTop hsc_env) prog
-- tops :: [[(CmmDecl,CAFSet]] (one list per group)
- let topCAFEnv = mkTopCAFInfo (concat cafEnvs)
+ let topCAFEnv = {-# SCC "topCAFEnv" #-} mkTopCAFInfo (concat cafEnvs)
-- folding over the groups
- (topSRT, tops) <- foldM (toTops hsc_env topCAFEnv) (topSRT, []) tops
+ (topSRT, tops) <- {-# SCC "toTops" #-} foldM (toTops hsc_env topCAFEnv) (topSRT, []) tops
let cmms :: CmmGroup
cmms = reverse (concat tops)
dumpIfSet_dyn dflags Opt_D_dump_cps_cmm "Post CPS Cmm" (ppr cmms)
- -- SRT is not affected by control flow optimization pass
- let prog' = runCmmContFlowOpts cmms
-
- return (topSRT, prog' : rst)
+ return (topSRT, cmms)
{- [Note global fuel]
~~~~~~~~~~~~~~~~~~~~~
@@ -92,75 +92,63 @@ global to one compiler session.
-- -ddump-cmmz
cpsTop :: HscEnv -> CmmDecl -> IO ([(CLabel, CAFSet)], [(CAFSet, CmmDecl)])
-cpsTop _ p@(CmmData {}) = return ([], [(Map.empty, p)])
+cpsTop _ p@(CmmData {}) = return ([], [(Set.empty, p)])
cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) l g) =
do
- -- Why bother doing these early: dualLivenessWithInsertion,
- -- insertLateReloads, rewriteAssignments?
+ ----------- Control-flow optimisations ---------------
+ g <- {-# SCC "cmmCfgOpts(1)" #-} return $ cmmCfgOpts g
+ dump Opt_D_dump_cmmz_cfg "Post control-flow optimsations" g
----------- Eliminate common blocks -------------------
- g <- return $ elimCommonBlocks g
+ g <- {-# SCC "elimCommonBlocks" #-} return $ elimCommonBlocks g
dump Opt_D_dump_cmmz_cbe "Post common block elimination" g
- -- Any work storing block Labels must be performed _after_ elimCommonBlocks
+ -- Any work storing block Labels must be performed _after_
+ -- elimCommonBlocks
----------- Proc points -------------------
- let callPPs = callProcPoints g
- procPoints <- run $ minimalProcPointSet (targetPlatform dflags) callPPs g
- g <- run $ addProcPointProtocols callPPs procPoints g
- dump Opt_D_dump_cmmz_proc "Post Proc Points Added" g
-
- ----------- Spills and reloads -------------------
- g <- run $ dualLivenessWithInsertion procPoints g
- dump Opt_D_dump_cmmz_spills "Post spills and reloads" g
-
- ----------- Sink and inline assignments -------------------
- g <- runOptimization $ rewriteAssignments platform g
- dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g
-
- ----------- Eliminate dead assignments -------------------
- g <- runOptimization $ removeDeadAssignments g
- dump Opt_D_dump_cmmz_dead "Post remove dead assignments" g
-
- ----------- Zero dead stack slots (Debug only) ---------------
- -- Debugging: stubbing slots on death can cause crashes early
- g <- if opt_StubDeadValues
- then run $ stubSlotsOnDeath g
- else return g
- dump Opt_D_dump_cmmz_stub "Post stub dead stack slots" g
-
- --------------- Stack layout ----------------
- slotEnv <- run $ liveSlotAnal g
- let spEntryMap = getSpEntryMap entry_off g
- mbpprTrace "live slot analysis results: " (ppr slotEnv) $ return ()
- let areaMap = layout procPoints spEntryMap slotEnv entry_off g
- mbpprTrace "areaMap" (ppr areaMap) $ return ()
-
- ------------ Manifest the stack pointer --------
- g <- run $ manifestSP spEntryMap areaMap entry_off g
- dump Opt_D_dump_cmmz_sp "Post manifestSP" g
- -- UGH... manifestSP can require updates to the procPointMap.
- -- We can probably do something quicker here for the update...
+ let callPPs = {-# SCC "callProcPoints" #-} callProcPoints g
+ procPoints <- {-# SCC "minimalProcPointSet" #-} run $
+ minimalProcPointSet (targetPlatform dflags) callPPs g
+
+ ----------- Layout the stack and manifest Sp ---------------
+ -- (also does: removeDeadAssignments, and lowerSafeForeignCalls)
+ (g, stackmaps) <- {-# SCC "layoutStack" #-}
+ run $ cmmLayoutStack procPoints entry_off g
+ dump Opt_D_dump_cmmz_sp "Layout Stack" g
+
+ g <- {-# SCC "sink" #-} run $ cmmSink g
+ dump Opt_D_dump_cmmz_rewrite "Sink assignments" g
+
+-- ----------- Sink and inline assignments -------------------
+-- g <- {-# SCC "rewriteAssignments" #-} runOptimization $
+-- rewriteAssignments platform g
+-- dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g
------------- Split into separate procedures ------------
- procPointMap <- run $ procPointAnalysis procPoints g
- dump Opt_D_dump_cmmz_procmap "procpoint map" procPointMap
- gs <- run $ splitAtProcPoints l callPPs procPoints procPointMap
- (CmmProc h l g)
- mapM_ (dump Opt_D_dump_cmmz_split "Post splitting") gs
-
- ------------- More CAFs and foreign calls ------------
- cafEnv <- run $ cafAnal g
- let localCAFs = catMaybes $ map (localCAFInfo cafEnv) gs
+ procPointMap <- {-# SCC "procPointAnalysis" #-} run $
+ procPointAnalysis procPoints g
+ dumpWith dflags Opt_D_dump_cmmz_procmap "procpoint map" procPointMap
+ gs <- {-# SCC "splitAtProcPoints" #-} run $
+ splitAtProcPoints l callPPs procPoints procPointMap (CmmProc h l g)
+ dumps Opt_D_dump_cmmz_split "Post splitting" gs
+
+ ------------- More CAFs ------------------------------
+ let cafEnv = {-# SCC "cafAnal" #-} cafAnal platform g
+ let localCAFs = {-# SCC "localCAFs" #-} catMaybes $ map (localCAFInfo platform cafEnv) gs
mbpprTrace "localCAFs" (ppr localCAFs) $ return ()
- gs <- run $ mapM (lowerSafeForeignCalls areaMap) gs
- mapM_ (dump Opt_D_dump_cmmz_lower "Post lowerSafeForeignCalls") gs
-
-- NO MORE GRAPH TRANSFORMATION AFTER HERE -- JUST MAKING INFOTABLES
- gs <- return $ map (setInfoTableStackMap slotEnv areaMap) gs
- mapM_ (dump Opt_D_dump_cmmz_info "after setInfoTableStackMap") gs
- gs <- return $ map (bundleCAFs cafEnv) gs
- mapM_ (dump Opt_D_dump_cmmz_cafs "after bundleCAFs") gs
+ gs <- {-# SCC "setInfoTableStackMap" #-}
+ return $ map (setInfoTableStackMap stackmaps) gs
+ dumps Opt_D_dump_cmmz_info "after setInfoTableStackMap" gs
+
+ ----------- Control-flow optimisations ---------------
+ gs <- {-# SCC "cmmCfgOpts(2)" #-} return $ map cmmCfgOptsProc gs
+ dumps Opt_D_dump_cmmz_cfg "Post control-flow optimsations" gs
+
+ gs <- {-# SCC "bundleCAFs" #-} return $ map (bundleCAFs cafEnv) gs
+ dumps Opt_D_dump_cmmz_cafs "after bundleCAFs" gs
+
return (localCAFs, gs)
-- gs :: [ (CAFSet, CmmDecl) ]
@@ -168,21 +156,40 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}})
where dflags = hsc_dflags hsc_env
platform = targetPlatform dflags
- mbpprTrace x y z = if dopt Opt_D_dump_cmmz dflags then pprTrace x y z else z
- dump f = dumpWith ppr f
- dumpWith pprFun f txt g = do
- -- ToDo: No easy way of say "dump all the cmmz, *and* split
- -- them into files." Also, -ddump-cmmz doesn't play nicely
- -- with -ddump-to-file, since the headers get omitted.
- dumpIfSet_dyn dflags f txt (pprFun g)
- when (not (dopt f dflags)) $
- dumpIfSet_dyn dflags Opt_D_dump_cmmz txt (pprFun g)
+ mbpprTrace x y z | dopt Opt_D_dump_cmmz dflags = pprTrace x y z
+ | otherwise = z
+ dump = dumpGraph dflags
+
+ dumps flag name
+ = mapM_ (dumpWith dflags flag name)
+
-- Runs a required transformation/analysis
run = runInfiniteFuelIO (hsc_OptFuel hsc_env)
-- Runs an optional transformation/analysis (and should
-- thus be subject to optimization fuel)
runOptimization = runFuelIO (hsc_OptFuel hsc_env)
+
+dumpGraph :: DynFlags -> DynFlag -> String -> CmmGraph -> IO ()
+dumpGraph dflags flag name g = do
+ when (dopt Opt_DoCmmLinting dflags) $ do_lint g
+ dumpWith dflags flag name g
+ where
+ do_lint g = case cmmLintGraph (targetPlatform dflags) g of
+ Just err -> do { printDump err
+ ; ghcExit dflags 1
+ }
+ Nothing -> return ()
+
+dumpWith :: Outputable a => DynFlags -> DynFlag -> String -> a -> IO ()
+dumpWith dflags flag txt g = do
+ -- ToDo: No easy way of say "dump all the cmmz, *and* split
+ -- them into files." Also, -ddump-cmmz doesn't play nicely
+ -- with -ddump-to-file, since the headers get omitted.
+ dumpIfSet_dyn dflags flag txt (ppr g)
+ when (not (dopt flag dflags)) $
+ dumpIfSet_dyn dflags Opt_D_dump_cmmz txt (ppr g)
+
-- This probably belongs in CmmBuildInfoTables?
-- We're just finishing the job here: once we know what CAFs are defined
-- in non-static closures, we can build the SRTs.
diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs
index f50d850b3a..8dda51b9b7 100644
--- a/compiler/cmm/CmmProcPoint.hs
+++ b/compiler/cmm/CmmProcPoint.hs
@@ -4,7 +4,7 @@
module CmmProcPoint
( ProcPointSet, Status(..)
, callProcPoints, minimalProcPointSet
- , addProcPointProtocols, splitAtProcPoints, procPointAnalysis
+ , splitAtProcPoints, procPointAnalysis
)
where
@@ -28,7 +28,7 @@ import Platform
import UniqSet
import UniqSupply
-import Compiler.Hoopl
+import Hoopl
import qualified Data.Map as Map
@@ -103,34 +103,50 @@ instance Outputable Status where
(hsep $ punctuate comma $ map ppr $ setElems ps)
ppr ProcPoint = text "<procpt>"
-lattice :: DataflowLattice Status
-lattice = DataflowLattice "direct proc-point reachability" unreached add_to
- where unreached = ReachedBy setEmpty
- add_to _ (OldFact ProcPoint) _ = (NoChange, ProcPoint)
- add_to _ _ (NewFact ProcPoint) = (SomeChange, ProcPoint) -- because of previous case
- add_to _ (OldFact (ReachedBy p)) (NewFact (ReachedBy p')) =
- let union = setUnion p' p
- in if setSize union > setSize p then (SomeChange, ReachedBy union)
- else (NoChange, ReachedBy p)
--------------------------------------------------
+-- Proc point analysis
+
+procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelUniqSM (BlockEnv Status)
+-- Once you know what the proc-points are, figure out
+-- what proc-points each block is reachable from
+procPointAnalysis procPoints g =
+ -- pprTrace "procPointAnalysis" (ppr procPoints) $
+ dataflowAnalFwdBlocks g initProcPoints $ analFwd lattice forward
+ where initProcPoints = [(id, ProcPoint) | id <- setElems procPoints]
+
-- transfer equations
forward :: FwdTransfer CmmNode Status
-forward = mkFTransfer3 first middle ((mkFactBase lattice . ) . last)
- where first :: CmmNode C O -> Status -> Status
- first (CmmEntry id) ProcPoint = ReachedBy $ setSingleton id
- first _ x = x
+forward = mkFTransfer3 first middle last
+ where
+ first :: CmmNode C O -> Status -> Status
+ first (CmmEntry id) ProcPoint = ReachedBy $ setSingleton id
+ first _ x = x
- middle _ x = x
+ middle _ x = x
- last :: CmmNode O C -> Status -> [(Label, Status)]
- last (CmmCall {cml_cont = Just k}) _ = [(k, ProcPoint)]
- last (CmmForeignCall {succ = k}) _ = [(k, ProcPoint)]
- last l x = map (\id -> (id, x)) (successors l)
+ last :: CmmNode O C -> Status -> FactBase Status
+ last l x = mkFactBase lattice $ map (\id -> (id, x)) (successors l)
--- It is worth distinguishing two sets of proc points:
--- those that are induced by calls in the original graph
--- and those that are introduced because they're reachable from multiple proc points.
+lattice :: DataflowLattice Status
+lattice = DataflowLattice "direct proc-point reachability" unreached add_to
+ where unreached = ReachedBy setEmpty
+ add_to _ (OldFact ProcPoint) _ = (NoChange, ProcPoint)
+ add_to _ _ (NewFact ProcPoint) = (SomeChange, ProcPoint)
+ -- because of previous case
+ add_to _ (OldFact (ReachedBy p)) (NewFact (ReachedBy p'))
+ | setSize union > setSize p = (SomeChange, ReachedBy union)
+ | otherwise = (NoChange, ReachedBy p)
+ where
+ union = setUnion p' p
+
+----------------------------------------------------------------------
+
+-- It is worth distinguishing two sets of proc points: those that are
+-- induced by calls in the original graph and those that are
+-- introduced because they're reachable from multiple proc points.
+--
+-- Extract the set of Continuation BlockIds, see Note [Continuation BlockIds].
callProcPoints :: CmmGraph -> ProcPointSet
callProcPoints g = foldGraphBlocks add (setSingleton (g_entry g)) g
where add :: CmmBlock -> BlockSet -> BlockSet
@@ -139,21 +155,17 @@ callProcPoints g = foldGraphBlocks add (setSingleton (g_entry g)) g
CmmForeignCall {succ=k} -> setInsert k set
_ -> set
-minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph -> FuelUniqSM ProcPointSet
+minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph
+ -> FuelUniqSM ProcPointSet
-- Given the set of successors of calls (which must be proc-points)
-- figure out the minimal set of necessary proc-points
-minimalProcPointSet platform callProcPoints g = extendPPSet platform g (postorderDfs g) callProcPoints
-
-procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelUniqSM (BlockEnv Status)
--- Once you know what the proc-points are, figure out
--- what proc-points each block is reachable from
-procPointAnalysis procPoints g =
- liftM snd $ dataflowPassFwd g initProcPoints $ analFwd lattice forward
- where initProcPoints = [(id, ProcPoint) | id <- setElems procPoints]
+minimalProcPointSet platform callProcPoints g
+ = extendPPSet platform g (postorderDfs g) callProcPoints
extendPPSet :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> FuelUniqSM ProcPointSet
extendPPSet platform g blocks procPoints =
do env <- procPointAnalysis procPoints g
+ -- pprTrace "extensPPSet" (ppr env) $ return ()
let add block pps = let id = entryLabel block
in case mapLookup id env of
Just ProcPoint -> setInsert id pps
@@ -179,183 +191,13 @@ extendPPSet platform g blocks procPoints =
pps -> extendPPSet g blocks
(foldl extendBlockSet procPoints' pps)
-}
- case newPoint of Just id ->
- if setMember id procPoints' then panic "added old proc pt"
- else extendPPSet platform g blocks (setInsert id procPoints')
- Nothing -> return procPoints'
-
-
-------------------------------------------------------------------------
--- Computing Proc-Point Protocols --
-------------------------------------------------------------------------
+ case newPoint of
+ Just id ->
+ if setMember id procPoints'
+ then panic "added old proc pt"
+ else extendPPSet platform g blocks (setInsert id procPoints')
+ Nothing -> return procPoints'
-{-
-
-There is one major trick, discovered by Michael Adams, which is that
-we want to choose protocols in a way that enables us to optimize away
-some continuations. The optimization is very much like branch-chain
-elimination, except that it involves passing results as well as
-control. The idea is that if a call's continuation k does nothing but
-CopyIn its results and then goto proc point P, the call's continuation
-may be changed to P, *provided* P's protocol is identical to the
-protocol for the CopyIn. We choose protocols to make this so.
-
-Here's an explanatory example; we begin with the source code (lines
-separate basic blocks):
-
- ..1..;
- x, y = g();
- goto P;
- -------
- P: ..2..;
-
-Zipperization converts this code as follows:
-
- ..1..;
- call g() returns to k;
- -------
- k: CopyIn(x, y);
- goto P;
- -------
- P: ..2..;
-
-What we'd like to do is assign P the same CopyIn protocol as k, so we
-can eliminate k:
-
- ..1..;
- call g() returns to P;
- -------
- P: CopyIn(x, y); ..2..;
-
-Of course, P may be the target of more than one continuation, and
-different continuations may have different protocols. Michael Adams
-implemented a voting mechanism, but he thinks a simple greedy
-algorithm would be just as good, so that's what we do.
-
--}
-
-data Protocol = Protocol Convention [CmmFormal] Area
- deriving Eq
-instance Outputable Protocol where
- ppr (Protocol c fs a) = text "Protocol" <+> ppr c <+> ppr fs <+> ppr a
-
--- | Function 'optimize_calls' chooses protocols only for those proc
--- points that are relevant to the optimization explained above.
--- The others are assigned by 'add_unassigned', which is not yet clever.
-
-addProcPointProtocols :: ProcPointSet -> ProcPointSet -> CmmGraph -> FuelUniqSM CmmGraph
-addProcPointProtocols callPPs procPoints g =
- do liveness <- cmmLiveness g
- (protos, g') <- optimize_calls liveness g
- blocks'' <- add_CopyOuts protos procPoints g'
- return $ ofBlockMap (g_entry g) blocks''
- where optimize_calls liveness g = -- see Note [Separate Adams optimization]
- do let (protos, blocks') =
- foldGraphBlocks maybe_add_call (mapEmpty, mapEmpty) g
- protos' = add_unassigned liveness procPoints protos
- let g' = ofBlockMap (g_entry g) (add_CopyIns callPPs protos' blocks')
- return (protos', removeUnreachableBlocks g')
- maybe_add_call :: CmmBlock -> (BlockEnv Protocol, BlockEnv CmmBlock)
- -> (BlockEnv Protocol, BlockEnv CmmBlock)
- -- ^ If the block is a call whose continuation goes to a proc point
- -- whose protocol either matches the continuation's or is not yet set,
- -- redirect the call (cf 'newblock') and set the protocol if necessary
- maybe_add_call block (protos, blocks) =
- case lastNode block of
- CmmCall tgt (Just k) args res s
- | Just proto <- mapLookup k protos,
- Just pee <- branchesToProcPoint k
- -> let newblock = replaceLastNode block (CmmCall tgt (Just pee)
- args res s)
- changed_blocks = insertBlock newblock blocks
- unchanged_blocks = insertBlock block blocks
- in case mapLookup pee protos of
- Nothing -> (mapInsert pee proto protos, changed_blocks)
- Just proto' ->
- if proto == proto' then (protos, changed_blocks)
- else (protos, unchanged_blocks)
- _ -> (protos, insertBlock block blocks)
-
- branchesToProcPoint :: BlockId -> Maybe BlockId
- -- ^ Tells whether the named block is just a branch to a proc point
- branchesToProcPoint id =
- let block = mapLookup id (toBlockMap g) `orElse`
- panic "branch out of graph"
- in case blockToNodeList block of
- (_, [], JustC (CmmBranch pee)) | setMember pee procPoints -> Just pee
- _ -> Nothing
-
--- | For now, following a suggestion by Ben Lippmeier, we pass all
--- live variables as arguments, hoping that a clever register
--- allocator might help.
-
-add_unassigned :: BlockEnv CmmLive -> ProcPointSet -> BlockEnv Protocol ->
- BlockEnv Protocol
-add_unassigned = pass_live_vars_as_args
-
-pass_live_vars_as_args :: BlockEnv CmmLive -> ProcPointSet ->
- BlockEnv Protocol -> BlockEnv Protocol
-pass_live_vars_as_args _liveness procPoints protos = protos'
- where protos' = setFold addLiveVars protos procPoints
- addLiveVars :: BlockId -> BlockEnv Protocol -> BlockEnv Protocol
- addLiveVars id protos =
- case mapLookup id protos of
- Just _ -> protos
- Nothing -> let live = emptyRegSet
- --lookupBlockEnv _liveness id `orElse`
- --panic ("no liveness at block " ++ show id)
- formals = uniqSetToList live
- prot = Protocol Private formals $ CallArea $ Young id
- in mapInsert id prot protos
-
-
--- | Add copy-in instructions to each proc point that did not arise from a call
--- instruction. (Proc-points that arise from calls already have their copy-in instructions.)
-
-add_CopyIns :: ProcPointSet -> BlockEnv Protocol -> BlockEnv CmmBlock -> BlockEnv CmmBlock
-add_CopyIns callPPs protos blocks = mapFold maybe_insert_CopyIns mapEmpty blocks
- where maybe_insert_CopyIns block blocks
- | not $ setMember bid callPPs
- , Just (Protocol c fs _area) <- mapLookup bid protos
- = let nodes = copyInSlot c fs
- (h, m, l) = blockToNodeList block
- in insertBlock (blockOfNodeList (h, nodes ++ m, l)) blocks
- | otherwise = insertBlock block blocks
- where bid = entryLabel block
-
-
--- | Add a CopyOut node before each procpoint.
--- If the predecessor is a call, then the copy outs should already be done by the callee.
--- Note: If we need to add copy-out instructions, they may require stack space,
--- so we accumulate a map from the successors to the necessary stack space,
--- then update the successors after we have finished inserting the copy-outs.
-
-add_CopyOuts :: BlockEnv Protocol -> ProcPointSet -> CmmGraph ->
- FuelUniqSM (BlockEnv CmmBlock)
-add_CopyOuts protos procPoints g = foldGraphBlocks mb_copy_out (return mapEmpty) g
- where mb_copy_out :: CmmBlock -> FuelUniqSM (BlockEnv CmmBlock) ->
- FuelUniqSM (BlockEnv CmmBlock)
- mb_copy_out b z | entryLabel b == g_entry g = skip b z
- mb_copy_out b z =
- case lastNode b of
- CmmCall {} -> skip b z -- copy out done by callee
- CmmForeignCall {} -> skip b z -- copy out done by callee
- _ -> copy_out b z
- copy_out b z = foldr trySucc init (successors b) >>= finish
- where init = (\bmap -> (b, bmap)) `liftM` z
- trySucc succId z =
- if setMember succId procPoints then
- case mapLookup succId protos of
- Nothing -> z
- Just (Protocol c fs _area) -> insert z succId $ copyOutSlot c fs
- else z
- insert z succId m =
- do (b, bmap) <- z
- (b, bs) <- insertBetween b m succId
- -- pprTrace "insert for succ" (ppr succId <> ppr m) $ do
- return $ (b, foldl (flip insertBlock) bmap bs)
- finish (b, bmap) = return $ insertBlock b bmap
- skip b bs = insertBlock b `liftM` bs
-- At this point, we have found a set of procpoints, each of which should be
-- the entry point of a procedure.
@@ -384,15 +226,18 @@ splitAtProcPoints entry_label callPPs procPoints procMap
[] -> graphEnv
[id] -> add graphEnv id bid b
_ -> panic "Each block should be reachable from only one ProcPoint"
- Nothing -> pprPanic "block not reached by a proc point?" (ppr bid)
+ Nothing -> graphEnv
where bid = entryLabel b
add graphEnv procId bid b = mapInsert procId graph' graphEnv
where graph = mapLookup procId graphEnv `orElse` mapEmpty
graph' = mapInsert bid b graph
+
graphEnv <- return $ foldGraphBlocks addBlock emptyBlockMap g
+
-- Build a map from proc point BlockId to pairs of:
-- * Labels for their new procedures
- -- * Labels for the info tables of their new procedures (only if the proc point is a callPP)
+ -- * Labels for the info tables of their new procedures (only if
+ -- the proc point is a callPP)
-- Due to common blockification, we may overestimate the set of procpoints.
let add_label map pp = Map.insert pp lbls map
where lbls | pp == entry = (entry_label, Just entry_info_lbl)
@@ -401,30 +246,15 @@ splitAtProcPoints entry_label callPPs procPoints procMap
entry_info_lbl = cit_lbl info_tbl
procLabels = foldl add_label Map.empty
(filter (flip mapMember (toBlockMap g)) (setElems procPoints))
- -- For each procpoint, we need to know the SP offset on entry.
- -- If the procpoint is:
- -- - continuation of a call, the SP offset is in the call
- -- - otherwise, 0 (and left out of the spEntryMap)
- let add_sp_off :: CmmBlock -> BlockEnv CmmStackInfo -> BlockEnv CmmStackInfo
- add_sp_off b env =
- case lastNode b of
- CmmCall {cml_cont = Just succ, cml_ret_args = off, cml_ret_off = updfr_off} ->
- mapInsert succ (StackInfo { arg_space = off, updfr_space = Just updfr_off}) env
- CmmForeignCall {succ = succ, updfr = updfr_off} ->
- mapInsert succ (StackInfo { arg_space = wORD_SIZE, updfr_space = Just updfr_off}) env
- _ -> env
- spEntryMap = foldGraphBlocks add_sp_off (mapInsert entry stack_info emptyBlockMap) g
- getStackInfo id = mapLookup id spEntryMap `orElse` StackInfo {arg_space = 0, updfr_space = Nothing}
-- In each new graph, add blocks jumping off to the new procedures,
-- and replace branches to procpoints with branches to the jump-off blocks
let add_jump_block (env, bs) (pp, l) =
do bid <- liftM mkBlockId getUniqueM
- let b = blockOfNodeList (JustC (CmmEntry bid), [], JustC jump)
- StackInfo {arg_space = argSpace, updfr_space = off} = getStackInfo pp
- jump = CmmCall (CmmLit (CmmLabel l)) Nothing argSpace 0
- (off `orElse` 0) -- Jump's shouldn't need the offset...
+ let b = blockJoin (CmmEntry bid) emptyBlock jump
+ jump = CmmCall (CmmLit (CmmLabel l)) Nothing 0 0 0
return (mapInsert pp bid env, b : bs)
- add_jumps (newGraphEnv) (ppId, blockEnv) =
+
+ add_jumps newGraphEnv (ppId, blockEnv) =
do let needed_jumps = -- find which procpoints we currently branch to
mapFold add_if_branch_to_pp [] blockEnv
add_if_branch_to_pp :: CmmBlock -> [(BlockId, CLabel)] -> [(BlockId, CLabel)]
@@ -441,17 +271,16 @@ splitAtProcPoints entry_label callPPs procPoints procMap
foldM add_jump_block (mapEmpty, []) needed_jumps
-- update the entry block
let b = expectJust "block in env" $ mapLookup ppId blockEnv
- off = getStackInfo ppId
blockEnv' = mapInsert ppId b blockEnv
-- replace branches to procpoints with branches to jumps
blockEnv'' = toBlockMap $ replaceBranches jumpEnv $ ofBlockMap ppId blockEnv'
-- add the jump blocks to the graph
blockEnv''' = foldl (flip insertBlock) blockEnv'' jumpBlocks
- let g' = (off, ofBlockMap ppId blockEnv''')
+ let g' = ofBlockMap ppId blockEnv'''
-- pprTrace "g' pre jumps" (ppr g') $ do
return (mapInsert ppId g' newGraphEnv)
graphEnv <- foldM add_jumps emptyBlockMap $ mapToList graphEnv
- let to_proc (bid, (stack_info, g)) = case expectJust "pp label" $ Map.lookup bid procLabels of
+ let to_proc (bid, g) = case expectJust "pp label" $ Map.lookup bid procLabels of
(lbl, Just info_lbl)
| bid == entry
-> CmmProc (TopInfo {info_tbl=info_tbl, stack_info=stack_info})
@@ -462,15 +291,22 @@ splitAtProcPoints entry_label callPPs procPoints procMap
(lbl, Nothing)
-> CmmProc (TopInfo {info_tbl=CmmNonInfoTable, stack_info=stack_info})
lbl (replacePPIds g)
- -- References to procpoint IDs can now be replaced with the infotable's label
- replacePPIds g = mapGraphNodes (id, mapExp repl, mapExp repl) g
+ where
+ stack_info = panic "No StackInfo"
+
+ -- References to procpoint IDs can now be replaced with the
+ -- infotable's label
+ replacePPIds g = {-# SCC "replacePPIds" #-}
+ mapGraphNodes (id, mapExp repl, mapExp repl) g
where repl e@(CmmLit (CmmBlock bid)) =
case Map.lookup bid procLabels of
Just (_, Just info_lbl) -> CmmLit (CmmLabel info_lbl)
_ -> e
repl e = e
- -- The C back end expects to see return continuations before the call sites.
- -- Here, we sort them in reverse order -- it gets reversed later.
+
+ -- The C back end expects to see return continuations before the
+ -- call sites. Here, we sort them in reverse order -- it gets
+ -- reversed later.
let (_, block_order) = foldl add_block_num (0::Int, emptyBlockMap) (postorderDfs g)
add_block_num (i, map) block = (i+1, mapInsert (entryLabel block) i map)
sort_fn (bid, _) (bid', _) =
@@ -482,6 +318,27 @@ splitAtProcPoints entry_label callPPs procPoints procMap
procs
splitAtProcPoints _ _ _ _ t@(CmmData _ _) = return [t]
+
+-- Only called from CmmProcPoint.splitAtProcPoints. NB. does a
+-- recursive lookup, see comment below.
+replaceBranches :: BlockEnv BlockId -> CmmGraph -> CmmGraph
+replaceBranches env cmmg
+ = {-# SCC "replaceBranches" #-}
+ ofBlockMap (g_entry cmmg) $ mapMap f $ toBlockMap cmmg
+ where
+ f block = replaceLastNode block $ last (lastNode block)
+
+ last :: CmmNode O C -> CmmNode O C
+ last (CmmBranch id) = CmmBranch (lookup id)
+ last (CmmCondBranch e ti fi) = CmmCondBranch e (lookup ti) (lookup fi)
+ last (CmmSwitch e tbl) = CmmSwitch e (map (fmap lookup) tbl)
+ last l@(CmmCall {}) = l
+ last l@(CmmForeignCall {}) = l
+ lookup id = fmap lookup (mapLookup id env) `orElse` id
+ -- XXX: this is a recursive lookup, it follows chains
+ -- until the lookup returns Nothing, at which point we
+ -- return the last BlockId
+
----------------------------------------------------------------
{-
diff --git a/compiler/cmm/CmmRewriteAssignments.hs b/compiler/cmm/CmmRewriteAssignments.hs
index ecf3f7e0c3..2c33b7b5ac 100644
--- a/compiler/cmm/CmmRewriteAssignments.hs
+++ b/compiler/cmm/CmmRewriteAssignments.hs
@@ -27,7 +27,7 @@ import UniqFM
import Unique
import BlockId
-import Compiler.Hoopl hiding (Unique)
+import Hoopl
import Data.Maybe
import Prelude hiding (succ, zip)
@@ -404,8 +404,8 @@ clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False
-- ToDo: Also catch MachOp case
clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _)
| getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?)
-clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
- where f (CmmLoad (CmmStackSlot (CallArea a') o') t)
+clobbers (CmmStackSlot a o, rhs) (_, expr) = f expr
+ where f (CmmLoad (CmmStackSlot a' o') t)
= (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))
f (CmmLoad e _) = containsStackSlot e
f (CmmMachOp _ es) = or (map f es)
@@ -416,9 +416,6 @@ clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
containsStackSlot (CmmStackSlot{}) = True
containsStackSlot _ = False
-clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr
- where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l'
- f _ = False
clobbers _ (_, e) = f e
where f (CmmLoad (CmmStackSlot _ _) _) = False
f (CmmLoad{}) = True -- conservative
@@ -432,7 +429,7 @@ clobbers _ (_, e) = f e
-- [ I32 ]
-- [ F64 ]
-- s' -w'- o'
-type CallSubArea = (AreaId, Int, Int) -- area, offset, width
+type CallSubArea = (Area, Int, Int) -- area, offset, width
overlaps :: CallSubArea -> CallSubArea -> Bool
overlaps (a, _, _) (a', _, _) | a /= a' = False
overlaps (_, o, w) (_, o', w') =
@@ -457,7 +454,7 @@ invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap
invalidateVolatile k m = mapUFM p m
where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize
where exp CmmLit{} = True
- exp (CmmLoad (CmmStackSlot (CallArea (Young k')) _) _)
+ exp (CmmLoad (CmmStackSlot (Young k') _) _)
| k' == k = False
exp (CmmLoad (CmmStackSlot _ _) _) = True
exp (CmmMachOp _ es) = and (map exp es)
@@ -596,10 +593,6 @@ assignmentRewrite = mkFRewrite3 first middle last
where rep = typeWidth (localRegType r)
_ -> old
-- See Note [Soundness of store rewriting]
- inlineExp assign old@(CmmLoad (CmmStackSlot (RegSlot r) _) _)
- = case lookupUFM assign r of
- Just (AlwaysInline x) -> x
- _ -> old
inlineExp _ old = old
inlinable :: CmmNode e x -> Bool
diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs
deleted file mode 100644
index 9e762fe48a..0000000000
--- a/compiler/cmm/CmmSpillReload.hs
+++ /dev/null
@@ -1,166 +0,0 @@
-{-# LANGUAGE GADTs, NoMonoLocalBinds, FlexibleContexts #-}
--- Norman likes local bindings
--- If this module lives on I'd like to get rid of this flag in due course
-
-{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
-
--- TODO: Get rid of this flag:
-{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
-
-module CmmSpillReload
- ( dualLivenessWithInsertion
- )
-where
-
-import BlockId
-import Cmm
-import CmmUtils
-import CmmLive
-import OptimizationFuel
-
-import Control.Monad
-import Outputable hiding (empty)
-import qualified Outputable as PP
-import UniqSet
-
-import Compiler.Hoopl hiding (Unique)
-import Data.Maybe
-import Prelude hiding (succ, zip)
-
-{- Note [Overview of spill/reload]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The point of this module is to insert spills and reloads to establish
-the invariant that at a call or any proc point with an established
-protocol all live variables not expected in registers are sitting on the
-stack. We use a backward dual liveness analysis (both traditional
-register liveness as well as register slot liveness on the stack) to
-insert spills and reloads. It should be followed by a forward
-transformation to sink reloads as deeply as possible, so as to reduce
-register pressure: this transformation is performed by
-CmmRewriteAssignments.
-
-A variable can be expected to be live in a register, live on the
-stack, or both. This analysis ensures that spills and reloads are
-inserted as needed to make sure that every live variable needed
-after a call is available on the stack. Spills are placed immediately
-after their reaching definitions, but reloads are placed immediately
-after a return from a call (the entry point.)
-
-Note that we offer no guarantees about the consistency of the value
-in memory and the value in the register, except that they are
-equal across calls/procpoints. If the variable is changed, this
-mapping breaks: but as the original value of the register may still
-be useful in a different context, the memory location is not updated.
--}
-
-data DualLive = DualLive { on_stack :: RegSet, in_regs :: RegSet }
-
-changeStack, changeRegs :: (RegSet -> RegSet) -> DualLive -> DualLive
-changeStack f live = live { on_stack = f (on_stack live) }
-changeRegs f live = live { in_regs = f (in_regs live) }
-
-dualLiveLattice :: DataflowLattice DualLive
-dualLiveLattice = DataflowLattice "variables live in registers and on stack" empty add
- where empty = DualLive emptyRegSet emptyRegSet
- add _ (OldFact old) (NewFact new) = (changeIf $ change1 || change2, DualLive stack regs)
- where (change1, stack) = add1 (on_stack old) (on_stack new)
- (change2, regs) = add1 (in_regs old) (in_regs new)
- add1 old new = if sizeUniqSet join > sizeUniqSet old then (True, join) else (False, old)
- where join = unionUniqSets old new
-
-dualLivenessWithInsertion :: BlockSet -> CmmGraph -> FuelUniqSM CmmGraph
-dualLivenessWithInsertion procPoints g =
- liftM fst $ dataflowPassBwd g [] $ analRewBwd dualLiveLattice
- (dualLiveTransfers (g_entry g) procPoints)
- (insertSpillsAndReloads g procPoints)
-
--- Note [Live registers on entry to procpoints]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- Remember that the transfer function is only ever run on the rewritten
--- version of a graph, and the rewrite function for spills and reloads
--- enforces the invariant that no local registers are live on entry to
--- a procpoint. Accordingly, we check for this invariant here. An old
--- version of this code incorrectly claimed that any live registers were
--- live on the stack before entering the function: this is wrong, but
--- didn't cause bugs because it never actually was invoked.
-
-dualLiveTransfers :: BlockId -> BlockSet -> (BwdTransfer CmmNode DualLive)
-dualLiveTransfers entry procPoints = mkBTransfer3 first middle last
- where first :: CmmNode C O -> DualLive -> DualLive
- first (CmmEntry id) live -- See Note [Live registers on entry to procpoints]
- | id == entry || setMember id procPoints = noLiveOnEntry id (in_regs live) live
- | otherwise = live
-
- middle :: CmmNode O O -> DualLive -> DualLive
- middle m = changeStack updSlots
- . changeRegs updRegs
- where -- Reuse middle of liveness analysis from CmmLive
- updRegs = case getBTransfer3 xferLive of (_, middle, _) -> middle m
-
- updSlots live = foldSlotsUsed reload (foldSlotsDefd spill live m) m
- spill live s@(RegSlot r, _, _) = check s $ deleteFromRegSet live r
- spill live _ = live
- reload live s@(RegSlot r, _, _) = check s $ extendRegSet live r
- reload live _ = live
- -- Ensure the assignment refers to the entirety of the
- -- register slot (and not just a slice).
- check (RegSlot (LocalReg _ ty), o, w) x
- | o == w && w == widthInBytes (typeWidth ty) = x
- check _ _ = panic "dualLiveTransfers: slices unsupported"
-
- -- Register analysis is identical to liveness analysis from CmmLive.
- last :: CmmNode O C -> FactBase DualLive -> DualLive
- last l fb = changeRegs (gen_kill l) $ case l of
- CmmCall {cml_cont=Nothing} -> empty
- CmmCall {cml_cont=Just k} -> keep_stack_only k
- CmmForeignCall {succ=k} -> keep_stack_only k
- _ -> joinOutFacts dualLiveLattice l fb
- where empty = fact_bot dualLiveLattice
- lkp k = fromMaybe empty (lookupFact k fb)
- keep_stack_only k = DualLive (on_stack (lkp k)) emptyRegSet
-
-insertSpillsAndReloads :: CmmGraph -> BlockSet -> CmmBwdRewrite DualLive
-insertSpillsAndReloads graph procPoints = deepBwdRw3 first middle nothing
- -- Beware: deepBwdRw with one polymorphic function seems more reasonable here,
- -- but GHC miscompiles it, see bug #4044.
- where first :: CmmNode C O -> Fact O DualLive -> CmmReplGraph C O
- first e@(CmmEntry id) live = return $
- if id /= (g_entry graph) && setMember id procPoints then
- case map reload (uniqSetToList (in_regs live)) of
- [] -> Nothing
- is -> Just $ mkFirst e <*> mkMiddles is
- else Nothing
- -- EZY: There was some dead code for handling the case where
- -- we were not splitting procedures. Check Git history if
- -- you're interested (circa e26ea0f41).
-
- middle :: CmmNode O O -> Fact O DualLive -> CmmReplGraph O O
- -- Don't add spills next to reloads.
- middle (CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg') _) _)) _ | reg == reg' = return Nothing
- -- Spill if register is live on stack.
- middle m@(CmmAssign (CmmLocal reg) _) live
- | reg `elemRegSet` on_stack live = return (Just (mkMiddles [m, spill reg]))
- middle _ _ = return Nothing
-
- nothing _ _ = return Nothing
-
-spill, reload :: LocalReg -> CmmNode O O
-spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r)
-reload r = CmmAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r)
-
----------------------
--- prettyprinting
-
-ppr_regs :: String -> RegSet -> SDoc
-ppr_regs s regs = text s <+> commafy (map ppr $ uniqSetToList regs)
- where commafy xs = hsep $ punctuate comma xs
-
-instance Outputable DualLive where
- ppr (DualLive {in_regs = regs, on_stack = stack}) =
- if isEmptyUniqSet regs && isEmptyUniqSet stack then
- text "<nothing-live>"
- else
- nest 2 $ fsep [if isEmptyUniqSet regs then PP.empty
- else (ppr_regs "live in regs =" regs),
- if isEmptyUniqSet stack then PP.empty
- else (ppr_regs "live on stack =" stack)]
diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs
index 0756c87583..d831a8aba5 100644
--- a/compiler/cmm/CmmUtils.hs
+++ b/compiler/cmm/CmmUtils.hs
@@ -60,13 +60,14 @@ module CmmUtils(
-- * Operations that probably don't belong here
modifyGraph,
- lastNode, replaceLastNode, insertBetween,
+ lastNode, replaceLastNode,
ofBlockMap, toBlockMap, insertBlock,
ofBlockList, toBlockList, bodyToBlockList,
foldGraphBlocks, mapGraphNodes, postorderDfs, mapGraphNodes1,
analFwd, analBwd, analRewFwd, analRewBwd,
- dataflowPassFwd, dataflowPassBwd
+ dataflowPassFwd, dataflowPassBwd, dataflowAnalFwd, dataflowAnalBwd,
+ dataflowAnalFwdBlocks
) where
#include "HsVersions.h"
@@ -89,7 +90,7 @@ import Data.Word
import Data.Maybe
import Data.Bits
import Control.Monad
-import Compiler.Hoopl hiding ( Unique )
+import Hoopl
---------------------------------------------------
--
@@ -402,13 +403,13 @@ mkLiveness (reg:regs)
modifyGraph :: (Graph n C C -> Graph n' C C) -> GenCmmGraph n -> GenCmmGraph n'
modifyGraph f g = CmmGraph {g_entry=g_entry g, g_graph=f (g_graph g)}
-toBlockMap :: CmmGraph -> LabelMap CmmBlock
+toBlockMap :: CmmGraph -> BlockEnv CmmBlock
toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body
-ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph
+ofBlockMap :: BlockId -> BlockEnv CmmBlock -> CmmGraph
ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO}
-insertBlock :: CmmBlock -> LabelMap CmmBlock -> LabelMap CmmBlock
+insertBlock :: CmmBlock -> BlockEnv CmmBlock -> BlockEnv CmmBlock
insertBlock block map =
ASSERT (isNothing $ mapLookup id map)
mapInsert id block map
@@ -418,7 +419,8 @@ toBlockList :: CmmGraph -> [CmmBlock]
toBlockList g = mapElems $ toBlockMap g
ofBlockList :: BlockId -> [CmmBlock] -> CmmGraph
-ofBlockList entry blocks = CmmGraph {g_entry=entry, g_graph=GMany NothingO body NothingO}
+ofBlockList entry blocks = CmmGraph { g_entry = entry
+ , g_graph = GMany NothingO body NothingO }
where body = foldr addBlock emptyBody blocks
bodyToBlockList :: Body CmmNode -> [CmmBlock]
@@ -439,87 +441,67 @@ foldGraphBlocks :: (CmmBlock -> a -> a) -> a -> CmmGraph -> a
foldGraphBlocks k z g = mapFold k z $ toBlockMap g
postorderDfs :: CmmGraph -> [CmmBlock]
-postorderDfs g = postorder_dfs_from (toBlockMap g) (g_entry g)
-
--------------------------------------------------
--- Manipulating CmmBlocks
-
-lastNode :: CmmBlock -> CmmNode O C
-lastNode block = foldBlockNodesF3 (nothing, nothing, const) block ()
- where nothing :: a -> b -> ()
- nothing _ _ = ()
-
-replaceLastNode :: Block CmmNode e C -> CmmNode O C -> Block CmmNode e C
-replaceLastNode block last = blockOfNodeList (first, middle, JustC last)
- where (first, middle, _) = blockToNodeList block
-
-----------------------------------------------------------------------
------ Splicing between blocks
--- Given a middle node, a block, and a successor BlockId,
--- we can insert the middle node between the block and the successor.
--- We return the updated block and a list of new blocks that must be added
--- to the graph.
--- The semantics is a bit tricky. We consider cases on the last node:
--- o For a branch, we can just insert before the branch,
--- but sometimes the optimizer does better if we actually insert
--- a fresh basic block, enabling some common blockification.
--- o For a conditional branch, switch statement, or call, we must insert
--- a new basic block.
--- o For a jump or return, this operation is impossible.
-
-insertBetween :: MonadUnique m => CmmBlock -> [CmmNode O O] -> BlockId -> m (CmmBlock, [CmmBlock])
-insertBetween b ms succId = insert $ lastNode b
- where insert :: MonadUnique m => CmmNode O C -> m (CmmBlock, [CmmBlock])
- insert (CmmBranch bid) =
- if bid == succId then
- do (bid', bs) <- newBlocks
- return (replaceLastNode b (CmmBranch bid'), bs)
- else panic "tried invalid block insertBetween"
- insert (CmmCondBranch c t f) =
- do (t', tbs) <- if t == succId then newBlocks else return $ (t, [])
- (f', fbs) <- if f == succId then newBlocks else return $ (f, [])
- return (replaceLastNode b (CmmCondBranch c t' f'), tbs ++ fbs)
- insert (CmmSwitch e ks) =
- do (ids, bs) <- mapAndUnzipM mbNewBlocks ks
- return (replaceLastNode b (CmmSwitch e ids), join bs)
- insert (CmmCall {}) =
- panic "unimp: insertBetween after a call -- probably not a good idea"
- insert (CmmForeignCall {}) =
- panic "unimp: insertBetween after a foreign call -- probably not a good idea"
-
- newBlocks :: MonadUnique m => m (BlockId, [CmmBlock])
- newBlocks = do id <- liftM mkBlockId $ getUniqueM
- return $ (id, [blockOfNodeList (JustC (CmmEntry id), ms, JustC (CmmBranch succId))])
- mbNewBlocks :: MonadUnique m => Maybe BlockId -> m (Maybe BlockId, [CmmBlock])
- mbNewBlocks (Just k) = if k == succId then liftM fstJust newBlocks
- else return (Just k, [])
- mbNewBlocks Nothing = return (Nothing, [])
- fstJust (id, bs) = (Just id, bs)
+postorderDfs g = {-# SCC "postorderDfs" #-} postorder_dfs_from (toBlockMap g) (g_entry g)
-------------------------------------------------
-- Running dataflow analysis and/or rewrites
-- Constructing forward and backward analysis-only pass
-analFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdPass m n f
-analBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdPass m n f
+analFwd :: DataflowLattice f -> FwdTransfer n f -> FwdPass FuelUniqSM n f
+analBwd :: DataflowLattice f -> BwdTransfer n f -> BwdPass FuelUniqSM n f
analFwd lat xfer = analRewFwd lat xfer noFwdRewrite
analBwd lat xfer = analRewBwd lat xfer noBwdRewrite
-- Constructing forward and backward analysis + rewrite pass
-analRewFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdRewrite m n f -> FwdPass m n f
-analRewBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdRewrite m n f -> BwdPass m n f
+analRewFwd :: DataflowLattice f -> FwdTransfer n f
+ -> FwdRewrite FuelUniqSM n f
+ -> FwdPass FuelUniqSM n f
+
+analRewBwd :: DataflowLattice f
+ -> BwdTransfer n f
+ -> BwdRewrite FuelUniqSM n f
+ -> BwdPass FuelUniqSM n f
analRewFwd lat xfer rew = FwdPass {fp_lattice = lat, fp_transfer = xfer, fp_rewrite = rew}
analRewBwd lat xfer rew = BwdPass {bp_lattice = lat, bp_transfer = xfer, bp_rewrite = rew}
-- Running forward and backward dataflow analysis + optional rewrite
-dataflowPassFwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> FwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f)
+dataflowPassFwd :: NonLocal n =>
+ GenCmmGraph n -> [(BlockId, f)]
+ -> FwdPass FuelUniqSM n f
+ -> FuelUniqSM (GenCmmGraph n, BlockEnv f)
dataflowPassFwd (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = do
(graph, facts, NothingO) <- analyzeAndRewriteFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts)
return (CmmGraph {g_entry=entry, g_graph=graph}, facts)
-dataflowPassBwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> BwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f)
+dataflowAnalFwd :: NonLocal n =>
+ GenCmmGraph n -> [(BlockId, f)]
+ -> FwdPass FuelUniqSM n f
+ -> BlockEnv f
+dataflowAnalFwd (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd =
+ analyzeFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts)
+
+dataflowAnalFwdBlocks :: NonLocal n =>
+ GenCmmGraph n -> [(BlockId, f)]
+ -> FwdPass FuelUniqSM n f
+ -> FuelUniqSM (BlockEnv f)
+dataflowAnalFwdBlocks (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = do
+-- (graph, facts, NothingO) <- analyzeAndRewriteFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts)
+-- return facts
+ return (analyzeFwdBlocks fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts))
+
+dataflowAnalBwd :: NonLocal n =>
+ GenCmmGraph n -> [(BlockId, f)]
+ -> BwdPass FuelUniqSM n f
+ -> BlockEnv f
+dataflowAnalBwd (CmmGraph {g_entry=entry, g_graph=graph}) facts bwd =
+ analyzeBwd bwd (JustC [entry]) graph (mkFactBase (bp_lattice bwd) facts)
+
+dataflowPassBwd :: NonLocal n =>
+ GenCmmGraph n -> [(BlockId, f)]
+ -> BwdPass FuelUniqSM n f
+ -> FuelUniqSM (GenCmmGraph n, BlockEnv f)
dataflowPassBwd (CmmGraph {g_entry=entry, g_graph=graph}) facts bwd = do
(graph, facts, NothingO) <- analyzeAndRewriteBwd bwd (JustC [entry]) graph (mkFactBase (bp_lattice bwd) facts)
return (CmmGraph {g_entry=entry, g_graph=graph}, facts)
diff --git a/compiler/cmm/Hoopl.hs b/compiler/cmm/Hoopl.hs
new file mode 100644
index 0000000000..404482e047
--- /dev/null
+++ b/compiler/cmm/Hoopl.hs
@@ -0,0 +1,124 @@
+module Hoopl (
+ module Compiler.Hoopl,
+ module Hoopl.Dataflow,
+ deepBwdRw3, deepBwdRw,
+ thenFwdRw
+ ) where
+
+import Compiler.Hoopl hiding
+ ( Unique,
+ FwdTransfer(..), FwdRewrite(..), FwdPass(..),
+ BwdTransfer(..), BwdRewrite(..), BwdPass(..),
+ noFwdRewrite, noBwdRewrite,
+-- analyzeAndRewriteFwd, analyzeAndRewriteBwd,
+ mkFactBase, Fact,
+ mkBRewrite, mkBRewrite3, mkBTransfer, mkBTransfer3,
+ mkFRewrite, mkFRewrite3, mkFTransfer, mkFTransfer3,
+ deepFwdRw, deepFwdRw3, thenFwdRw, iterFwdRw,
+ deepBwdRw, deepBwdRw3, thenBwdRw, iterBwdRw
+ )
+
+import Hoopl.Dataflow
+import OptimizationFuel
+import Control.Monad
+
+deepFwdRw3 :: (n C O -> f -> FuelUniqSM (Maybe (Graph n C O)))
+ -> (n O O -> f -> FuelUniqSM (Maybe (Graph n O O)))
+ -> (n O C -> f -> FuelUniqSM (Maybe (Graph n O C)))
+ -> (FwdRewrite FuelUniqSM n f)
+deepFwdRw :: (forall e x . n e x -> f -> FuelUniqSM (Maybe (Graph n e x))) -> FwdRewrite FuelUniqSM n f
+deepFwdRw3 f m l = iterFwdRw $ mkFRewrite3 f m l
+deepFwdRw f = deepFwdRw3 f f f
+
+-- N.B. rw3, rw3', and rw3a are triples of functions.
+-- But rw and rw' are single functions.
+thenFwdRw :: forall n f.
+ FwdRewrite FuelUniqSM n f
+ -> FwdRewrite FuelUniqSM n f
+ -> FwdRewrite FuelUniqSM n f
+thenFwdRw rw3 rw3' = wrapFR2 thenrw rw3 rw3'
+ where
+ thenrw :: forall e x t t1.
+ (t -> t1 -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f)))
+ -> (t -> t1 -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f)))
+ -> t
+ -> t1
+ -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f))
+ thenrw rw rw' n f = rw n f >>= fwdRes
+ where fwdRes Nothing = rw' n f
+ fwdRes (Just gr) = return $ Just $ fadd_rw rw3' gr
+
+iterFwdRw :: forall m n f.
+ FwdRewrite FuelUniqSM n f
+ -> FwdRewrite FuelUniqSM n f
+iterFwdRw rw3 = wrapFR iter rw3
+ where iter :: forall a e x t.
+ (t -> a -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f)))
+ -> t
+ -> a
+ -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f))
+ iter rw n = (liftM $ liftM $ fadd_rw (iterFwdRw rw3)) . rw n
+
+-- | Function inspired by 'rew' in the paper
+_frewrite_cps :: ((Graph n e x, FwdRewrite FuelUniqSM n f) -> FuelUniqSM a)
+ -> FuelUniqSM a
+ -> (forall e x . n e x -> f -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f)))
+ -> n e x
+ -> f
+ -> FuelUniqSM a
+_frewrite_cps j n rw node f =
+ do mg <- rw node f
+ case mg of Nothing -> n
+ Just gr -> j gr
+
+
+
+-- | Function inspired by 'add' in the paper
+fadd_rw :: FwdRewrite FuelUniqSM n f
+ -> (Graph n e x, FwdRewrite FuelUniqSM n f)
+ -> (Graph n e x, FwdRewrite FuelUniqSM n f)
+fadd_rw rw2 (g, rw1) = (g, rw1 `thenFwdRw` rw2)
+
+
+
+deepBwdRw3 ::
+ (n C O -> f -> FuelUniqSM (Maybe (Graph n C O)))
+ -> (n O O -> f -> FuelUniqSM (Maybe (Graph n O O)))
+ -> (n O C -> FactBase f -> FuelUniqSM (Maybe (Graph n O C)))
+ -> (BwdRewrite FuelUniqSM n f)
+deepBwdRw :: (forall e x . n e x -> Fact x f -> FuelUniqSM (Maybe (Graph n e x)))
+ -> BwdRewrite FuelUniqSM n f
+deepBwdRw3 f m l = iterBwdRw $ mkBRewrite3 f m l
+deepBwdRw f = deepBwdRw3 f f f
+
+
+thenBwdRw :: forall n f. BwdRewrite FuelUniqSM n f -> BwdRewrite FuelUniqSM n f -> BwdRewrite FuelUniqSM n f
+thenBwdRw rw1 rw2 = wrapBR2 f rw1 rw2
+ where f :: forall t t1 t2 e x.
+ t
+ -> (t1 -> t2 -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f)))
+ -> (t1 -> t2 -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f)))
+ -> t1
+ -> t2
+ -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f))
+ f _ rw1 rw2' n f = do
+ res1 <- rw1 n f
+ case res1 of
+ Nothing -> rw2' n f
+ Just gr -> return $ Just $ badd_rw rw2 gr
+
+iterBwdRw :: forall n f. BwdRewrite FuelUniqSM n f -> BwdRewrite FuelUniqSM n f
+iterBwdRw rw = wrapBR f rw
+ where f :: forall t e x t1 t2.
+ t
+ -> (t1 -> t2 -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f)))
+ -> t1
+ -> t2
+ -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f))
+ f _ rw' n f = liftM (liftM (badd_rw (iterBwdRw rw))) (rw' n f)
+
+-- | Function inspired by 'add' in the paper
+badd_rw :: BwdRewrite FuelUniqSM n f
+ -> (Graph n e x, BwdRewrite FuelUniqSM n f)
+ -> (Graph n e x, BwdRewrite FuelUniqSM n f)
+badd_rw rw2 (g, rw1) = (g, rw1 `thenBwdRw` rw2)
diff --git a/compiler/cmm/Hoopl/Dataflow.hs b/compiler/cmm/Hoopl/Dataflow.hs
new file mode 100644
index 0000000000..cdab2cd2fe
--- /dev/null
+++ b/compiler/cmm/Hoopl/Dataflow.hs
@@ -0,0 +1,890 @@
+{-# LANGUAGE RankNTypes, ScopedTypeVariables, GADTs, EmptyDataDecls, PatternGuards, TypeFamilies, MultiParamTypeClasses #-}
+#if __GLASGOW_HASKELL__ >= 703
+{-# OPTIONS_GHC -fprof-auto-top #-}
+#endif
+#if __GLASGOW_HASKELL__ >= 701
+{-# LANGUAGE Trustworthy #-}
+#endif
+
+module Hoopl.Dataflow
+ ( DataflowLattice(..), OldFact(..), NewFact(..), Fact, mkFactBase
+ , ChangeFlag(..)
+ , FwdPass(..), FwdTransfer, mkFTransfer, mkFTransfer3, getFTransfer3
+ -- * Respecting Fuel
+
+ -- $fuel
+ , FwdRewrite, mkFRewrite, mkFRewrite3, getFRewrite3, noFwdRewrite
+ , wrapFR, wrapFR2
+ , BwdPass(..), BwdTransfer, mkBTransfer, mkBTransfer3, getBTransfer3
+ , wrapBR, wrapBR2
+ , BwdRewrite, mkBRewrite, mkBRewrite3, getBRewrite3, noBwdRewrite
+ , analyzeAndRewriteFwd, analyzeAndRewriteBwd
+ , analyzeFwd, analyzeFwdBlocks, analyzeBwd
+ )
+where
+
+import OptimizationFuel
+
+import Data.Maybe
+import Data.Array
+
+import Compiler.Hoopl.Collections
+import Compiler.Hoopl.Fuel
+import Compiler.Hoopl.Graph hiding (Graph) -- hiding so we can redefine
+ -- and include definition in paper
+import qualified Compiler.Hoopl.GraphUtil as U
+import Compiler.Hoopl.Label
+import Compiler.Hoopl.Dataflow (JoinFun)
+import Compiler.Hoopl.Util
+
+import Compiler.Hoopl.Dataflow (
+ DataflowLattice(..), OldFact(..), NewFact(..), Fact
+ , ChangeFlag(..), mkFactBase
+ , FwdPass(..), FwdRewrite(..), FwdTransfer(..), mkFRewrite, getFRewrite3, mkFTransfer, mkFTransfer3
+ , wrapFR, wrapFR2
+ , BwdPass(..), BwdRewrite(..), BwdTransfer(..), mkBTransfer, mkBTransfer3, getBTransfer3
+ , wrapBR, wrapBR2
+ , mkBRewrite, getBRewrite3
+ )
+
+-- import Debug.Trace
+
+noRewrite :: a -> b -> FuelUniqSM (Maybe c)
+noRewrite _ _ = return Nothing
+
+noFwdRewrite :: FwdRewrite FuelUniqSM n f
+noFwdRewrite = FwdRewrite3 (noRewrite, noRewrite, noRewrite)
+
+-- | Functions passed to 'mkFRewrite3' should not be aware of the fuel supply.
+-- The result returned by 'mkFRewrite3' respects fuel.
+mkFRewrite3 :: forall n f.
+ (n C O -> f -> FuelUniqSM (Maybe (Graph n C O)))
+ -> (n O O -> f -> FuelUniqSM (Maybe (Graph n O O)))
+ -> (n O C -> f -> FuelUniqSM (Maybe (Graph n O C)))
+ -> FwdRewrite FuelUniqSM n f
+mkFRewrite3 f m l = FwdRewrite3 (lift f, lift m, lift l)
+ where lift :: forall t t1 a. (t -> t1 -> FuelUniqSM (Maybe a))
+ -> t -> t1 -> FuelUniqSM (Maybe (a, FwdRewrite FuelUniqSM n f))
+ {-# INLINE lift #-}
+ lift rw node fact = do
+ a <- rw node fact
+ case a of
+ Nothing -> return Nothing
+ Just a -> do f <- getFuel
+ if f == 0
+ then return Nothing
+ else setFuel (f-1) >> return (Just (a,noFwdRewrite))
+
+noBwdRewrite :: BwdRewrite FuelUniqSM n f
+noBwdRewrite = BwdRewrite3 (noRewrite, noRewrite, noRewrite)
+
+mkBRewrite3 :: forall n f.
+ (n C O -> f -> FuelUniqSM (Maybe (Graph n C O)))
+ -> (n O O -> f -> FuelUniqSM (Maybe (Graph n O O)))
+ -> (n O C -> FactBase f -> FuelUniqSM (Maybe (Graph n O C)))
+ -> BwdRewrite FuelUniqSM n f
+mkBRewrite3 f m l = BwdRewrite3 (lift f, lift m, lift l)
+ where lift :: forall t t1 a. (t -> t1 -> FuelUniqSM (Maybe a))
+ -> t -> t1 -> FuelUniqSM (Maybe (a, BwdRewrite FuelUniqSM n f))
+ {-# INLINE lift #-}
+ lift rw node fact = do
+ a <- rw node fact
+ case a of
+ Nothing -> return Nothing
+ Just a -> do f <- getFuel
+ if f == 0
+ then return Nothing
+ else setFuel (f-1) >> return (Just (a,noBwdRewrite))
+
+-----------------------------------------------------------------------------
+-- Analyze and rewrite forward: the interface
+-----------------------------------------------------------------------------
+
+-- | if the graph being analyzed is open at the entry, there must
+-- be no other entry point, or all goes horribly wrong...
+analyzeAndRewriteFwd
+ :: forall n f e x . NonLocal n =>
+ FwdPass FuelUniqSM n f
+ -> MaybeC e [Label]
+ -> Graph n e x -> Fact e f
+ -> FuelUniqSM (Graph n e x, FactBase f, MaybeO x f)
+analyzeAndRewriteFwd pass entries g f =
+ do (rg, fout) <- arfGraph pass (fmap targetLabels entries) g f
+ let (g', fb) = normalizeGraph rg
+ return (g', fb, distinguishedExitFact g' fout)
+
+distinguishedExitFact :: forall n e x f . Graph n e x -> Fact x f -> MaybeO x f
+distinguishedExitFact g f = maybe g
+ where maybe :: Graph n e x -> MaybeO x f
+ maybe GNil = JustO f
+ maybe (GUnit {}) = JustO f
+ maybe (GMany _ _ x) = case x of NothingO -> NothingO
+ JustO _ -> JustO f
+
+----------------------------------------------------------------
+-- Forward Implementation
+----------------------------------------------------------------
+
+type Entries e = MaybeC e [Label]
+
+arfGraph :: forall n f e x . NonLocal n =>
+ FwdPass FuelUniqSM n f ->
+ Entries e -> Graph n e x -> Fact e f -> FuelUniqSM (DG f n e x, Fact x f)
+arfGraph pass@FwdPass { fp_lattice = lattice,
+ fp_transfer = transfer,
+ fp_rewrite = rewrite } entries g in_fact = graph g in_fact
+ where
+ {- nested type synonyms would be so lovely here
+ type ARF thing = forall e x . thing e x -> f -> m (DG f n e x, Fact x f)
+ type ARFX thing = forall e x . thing e x -> Fact e f -> m (DG f n e x, Fact x f)
+ -}
+ graph :: Graph n e x -> Fact e f -> FuelUniqSM (DG f n e x, Fact x f)
+ block :: forall e x .
+ Block n e x -> f -> FuelUniqSM (DG f n e x, Fact x f)
+
+ body :: [Label] -> LabelMap (Block n C C)
+ -> Fact C f -> FuelUniqSM (DG f n C C, Fact C f)
+ -- Outgoing factbase is restricted to Labels *not* in
+ -- in the Body; the facts for Labels *in*
+ -- the Body are in the 'DG f n C C'
+
+ cat :: forall e a x f1 f2 f3.
+ (f1 -> FuelUniqSM (DG f n e a, f2))
+ -> (f2 -> FuelUniqSM (DG f n a x, f3))
+ -> (f1 -> FuelUniqSM (DG f n e x, f3))
+
+ graph GNil f = return (dgnil, f)
+ graph (GUnit blk) f = block blk f
+ graph (GMany e bdy x) f = ((e `ebcat` bdy) `cat` exit x) f
+ where
+ ebcat :: MaybeO e (Block n O C) -> Body n -> Fact e f -> FuelUniqSM (DG f n e C, Fact C f)
+ exit :: MaybeO x (Block n C O) -> Fact C f -> FuelUniqSM (DG f n C x, Fact x f)
+ exit (JustO blk) f = arfx block blk f
+ exit NothingO f = return (dgnilC, f)
+ ebcat entry bdy f = c entries entry f
+ where c :: MaybeC e [Label] -> MaybeO e (Block n O C)
+ -> Fact e f -> FuelUniqSM (DG f n e C, Fact C f)
+ c NothingC (JustO entry) f = (block entry `cat` body (successors entry) bdy) f
+ c (JustC entries) NothingO f = body entries bdy f
+ c _ _ _ = error "bogus GADT pattern match failure"
+
+ -- Lift from nodes to blocks
+ block BNil f = return (dgnil, f)
+ block (BlockCO n b) f = (node n `cat` block b) f
+ block (BlockCC l b n) f = (node l `cat` block b `cat` node n) f
+ block (BlockOC b n) f = (block b `cat` node n) f
+
+ block (BMiddle n) f = node n f
+ block (BCat b1 b2) f = (block b1 `cat` block b2) f
+ block (BHead h n) f = (block h `cat` node n) f
+ block (BTail n t) f = (node n `cat` block t) f
+
+ {-# INLINE node #-}
+ node :: forall e x . (ShapeLifter e x)
+ => n e x -> f -> FuelUniqSM (DG f n e x, Fact x f)
+ node n f
+ = do { grw <- frewrite rewrite n f
+ ; case grw of
+ Nothing -> return ( singletonDG f n
+ , ftransfer transfer n f )
+ Just (g, rw) ->
+ let pass' = pass { fp_rewrite = rw }
+ f' = fwdEntryFact n f
+ in arfGraph pass' (fwdEntryLabel n) g f' }
+
+ -- | Compose fact transformers and concatenate the resulting
+ -- rewritten graphs.
+ {-# INLINE cat #-}
+ cat ft1 ft2 f = do { (g1,f1) <- ft1 f
+ ; (g2,f2) <- ft2 f1
+ ; let !g = g1 `dgSplice` g2
+ ; return (g, f2) }
+
+ arfx :: forall x .
+ (Block n C x -> f -> FuelUniqSM (DG f n C x, Fact x f))
+ -> (Block n C x -> Fact C f -> FuelUniqSM (DG f n C x, Fact x f))
+ arfx arf thing fb =
+ arf thing $ fromJust $ lookupFact (entryLabel thing) $ joinInFacts lattice fb
+ -- joinInFacts adds debugging information
+
+
+ -- Outgoing factbase is restricted to Labels *not* in
+ -- in the Body; the facts for Labels *in*
+ -- the Body are in the 'DG f n C C'
+ body entries blockmap init_fbase
+ = fixpoint Fwd lattice do_block entries blockmap init_fbase
+ where
+ lattice = fp_lattice pass
+ do_block :: forall x . Block n C x -> FactBase f
+ -> FuelUniqSM (DG f n C x, Fact x f)
+ do_block b fb = block b entryFact
+ where entryFact = getFact lattice (entryLabel b) fb
+
+
+-- Join all the incoming facts with bottom.
+-- We know the results _shouldn't change_, but the transfer
+-- functions might, for example, generate some debugging traces.
+joinInFacts :: DataflowLattice f -> FactBase f -> FactBase f
+joinInFacts (lattice @ DataflowLattice {fact_bot = bot, fact_join = fj}) fb =
+ mkFactBase lattice $ map botJoin $ mapToList fb
+ where botJoin (l, f) = (l, snd $ fj l (OldFact bot) (NewFact f))
+
+forwardBlockList :: (NonLocal n)
+ => [Label] -> Body n -> [Block n C C]
+-- This produces a list of blocks in order suitable for forward analysis,
+-- along with the list of Labels it may depend on for facts.
+forwardBlockList entries blks = postorder_dfs_from blks entries
+
+----------------------------------------------------------------
+-- Forward Analysis only
+----------------------------------------------------------------
+
+-- | if the graph being analyzed is open at the entry, there must
+-- be no other entry point, or all goes horribly wrong...
+analyzeFwd
+ :: forall n f e . NonLocal n =>
+ FwdPass FuelUniqSM n f
+ -> MaybeC e [Label]
+ -> Graph n e C -> Fact e f
+ -> FactBase f
+analyzeFwd FwdPass { fp_lattice = lattice,
+ fp_transfer = FwdTransfer3 (ftr, mtr, ltr) }
+ entries g in_fact = graph g in_fact
+ where
+ graph :: Graph n e C -> Fact e f -> FactBase f
+ graph (GMany entry blockmap NothingO)
+ = case (entries, entry) of
+ (NothingC, JustO entry) -> block entry `cat` body (successors entry)
+ (JustC entries, NothingO) -> body entries
+ _ -> error "bogus GADT pattern match failure"
+ where
+ body :: [Label] -> Fact C f -> Fact C f
+ body entries f
+ = fixpointAnal Fwd lattice do_block entries blockmap f
+ where
+ do_block :: forall x . Block n C x -> FactBase f -> Fact x f
+ do_block b fb = block b entryFact
+ where entryFact = getFact lattice (entryLabel b) fb
+
+ -- NB. eta-expand block, GHC can't do this by itself. See #5809.
+ block :: forall e x . Block n e x -> f -> Fact x f
+ block BNil f = f
+ block (BlockCO n b) f = (ftr n `cat` block b) f
+ block (BlockCC l b n) f = (ftr l `cat` (block b `cat` ltr n)) f
+ block (BlockOC b n) f = (block b `cat` ltr n) f
+
+ block (BMiddle n) f = mtr n f
+ block (BCat b1 b2) f = (block b1 `cat` block b2) f
+ block (BHead h n) f = (block h `cat` mtr n) f
+ block (BTail n t) f = (mtr n `cat` block t) f
+
+ {-# INLINE cat #-}
+ cat :: forall f1 f2 f3 . (f1 -> f2) -> (f2 -> f3) -> (f1 -> f3)
+ cat ft1 ft2 = \f -> ft2 $! ft1 f
+
+-- | if the graph being analyzed is open at the entry, there must
+-- be no other entry point, or all goes horribly wrong...
+analyzeFwdBlocks
+ :: forall n f e . NonLocal n =>
+ FwdPass FuelUniqSM n f
+ -> MaybeC e [Label]
+ -> Graph n e C -> Fact e f
+ -> FactBase f
+analyzeFwdBlocks FwdPass { fp_lattice = lattice,
+ fp_transfer = FwdTransfer3 (ftr, _, ltr) }
+ entries g in_fact = graph g in_fact
+ where
+ graph :: Graph n e C -> Fact e f -> FactBase f
+ graph (GMany entry blockmap NothingO)
+ = case (entries, entry) of
+ (NothingC, JustO entry) -> block entry `cat` body (successors entry)
+ (JustC entries, NothingO) -> body entries
+ _ -> error "bogus GADT pattern match failure"
+ where
+ body :: [Label] -> Fact C f -> Fact C f
+ body entries f
+ = fixpointAnal Fwd lattice do_block entries blockmap f
+ where
+ do_block :: forall x . Block n C x -> FactBase f -> Fact x f
+ do_block b fb = block b entryFact
+ where entryFact = getFact lattice (entryLabel b) fb
+
+ -- NB. eta-expand block, GHC can't do this by itself. See #5809.
+ block :: forall e x . Block n e x -> f -> Fact x f
+ block BNil f = f
+ block (BlockCO n _) f = ftr n f
+ block (BlockCC l _ n) f = (ftr l `cat` ltr n) f
+ block (BlockOC _ n) f = ltr n f
+
+ {-# INLINE cat #-}
+ cat :: forall f1 f2 f3 . (f1 -> f2) -> (f2 -> f3) -> (f1 -> f3)
+ cat ft1 ft2 = \f -> ft2 $! ft1 f
+
+----------------------------------------------------------------
+-- Backward Analysis only
+----------------------------------------------------------------
+
+-- | if the graph being analyzed is open at the entry, there must
+-- be no other entry point, or all goes horribly wrong...
+analyzeBwd
+ :: forall n f e . NonLocal n =>
+ BwdPass FuelUniqSM n f
+ -> MaybeC e [Label]
+ -> Graph n e C -> Fact C f
+ -> FactBase f
+analyzeBwd BwdPass { bp_lattice = lattice,
+ bp_transfer = BwdTransfer3 (ftr, mtr, ltr) }
+ entries g in_fact = graph g in_fact
+ where
+ graph :: Graph n e C -> Fact C f -> FactBase f
+ graph (GMany entry blockmap NothingO)
+ = case (entries, entry) of
+ (NothingC, JustO entry) -> body (successors entry)
+ (JustC entries, NothingO) -> body entries
+ _ -> error "bogus GADT pattern match failure"
+ where
+ body :: [Label] -> Fact C f -> Fact C f
+ body entries f
+ = fixpointAnal Bwd lattice do_block entries blockmap f
+ where
+ do_block :: forall x . Block n C x -> Fact x f -> FactBase f
+ do_block b fb = mapSingleton (entryLabel b) (block b fb)
+
+ -- NB. eta-expand block, GHC can't do this by itself. See #5809.
+ block :: forall e x . Block n e x -> Fact x f -> f
+ block BNil f = f
+ block (BlockCO n b) f = (ftr n `cat` block b) f
+ block (BlockCC l b n) f = ((ftr l `cat` block b) `cat` ltr n) f
+ block (BlockOC b n) f = (block b `cat` ltr n) f
+
+ block (BMiddle n) f = mtr n f
+ block (BCat b1 b2) f = (block b1 `cat` block b2) f
+ block (BHead h n) f = (block h `cat` mtr n) f
+ block (BTail n t) f = (mtr n `cat` block t) f
+
+ {-# INLINE cat #-}
+ cat :: forall f1 f2 f3 . (f2 -> f3) -> (f1 -> f2) -> (f1 -> f3)
+ cat ft1 ft2 = \f -> ft1 $! ft2 f
+
+-----------------------------------------------------------------------------
+-- Backward analysis and rewriting: the interface
+-----------------------------------------------------------------------------
+
+
+-- | if the graph being analyzed is open at the exit, I don't
+-- quite understand the implications of possible other exits
+analyzeAndRewriteBwd
+ :: NonLocal n
+ => BwdPass FuelUniqSM n f
+ -> MaybeC e [Label] -> Graph n e x -> Fact x f
+ -> FuelUniqSM (Graph n e x, FactBase f, MaybeO e f)
+analyzeAndRewriteBwd pass entries g f =
+ do (rg, fout) <- arbGraph pass (fmap targetLabels entries) g f
+ let (g', fb) = normalizeGraph rg
+ return (g', fb, distinguishedEntryFact g' fout)
+
+distinguishedEntryFact :: forall n e x f . Graph n e x -> Fact e f -> MaybeO e f
+distinguishedEntryFact g f = maybe g
+ where maybe :: Graph n e x -> MaybeO e f
+ maybe GNil = JustO f
+ maybe (GUnit {}) = JustO f
+ maybe (GMany e _ _) = case e of NothingO -> NothingO
+ JustO _ -> JustO f
+
+
+-----------------------------------------------------------------------------
+-- Backward implementation
+-----------------------------------------------------------------------------
+
+arbGraph :: forall n f e x .
+ NonLocal n =>
+ BwdPass FuelUniqSM n f ->
+ Entries e -> Graph n e x -> Fact x f -> FuelUniqSM (DG f n e x, Fact e f)
+arbGraph pass@BwdPass { bp_lattice = lattice,
+ bp_transfer = transfer,
+ bp_rewrite = rewrite } entries g in_fact = graph g in_fact
+ where
+ {- nested type synonyms would be so lovely here
+ type ARB thing = forall e x . thing e x -> Fact x f -> m (DG f n e x, f)
+ type ARBX thing = forall e x . thing e x -> Fact x f -> m (DG f n e x, Fact e f)
+ -}
+ graph :: Graph n e x -> Fact x f -> FuelUniqSM (DG f n e x, Fact e f)
+ block :: forall e x . Block n e x -> Fact x f -> FuelUniqSM (DG f n e x, f)
+ body :: [Label] -> Body n -> Fact C f -> FuelUniqSM (DG f n C C, Fact C f)
+ node :: forall e x . (ShapeLifter e x)
+ => n e x -> Fact x f -> FuelUniqSM (DG f n e x, f)
+ cat :: forall e a x info info' info''.
+ (info' -> FuelUniqSM (DG f n e a, info''))
+ -> (info -> FuelUniqSM (DG f n a x, info'))
+ -> (info -> FuelUniqSM (DG f n e x, info''))
+
+ graph GNil f = return (dgnil, f)
+ graph (GUnit blk) f = block blk f
+ graph (GMany e bdy x) f = ((e `ebcat` bdy) `cat` exit x) f
+ where
+ ebcat :: MaybeO e (Block n O C) -> Body n -> Fact C f -> FuelUniqSM (DG f n e C, Fact e f)
+ exit :: MaybeO x (Block n C O) -> Fact x f -> FuelUniqSM (DG f n C x, Fact C f)
+ exit (JustO blk) f = arbx block blk f
+ exit NothingO f = return (dgnilC, f)
+ ebcat entry bdy f = c entries entry f
+ where c :: MaybeC e [Label] -> MaybeO e (Block n O C)
+ -> Fact C f -> FuelUniqSM (DG f n e C, Fact e f)
+ c NothingC (JustO entry) f = (block entry `cat` body (successors entry) bdy) f
+ c (JustC entries) NothingO f = body entries bdy f
+ c _ _ _ = error "bogus GADT pattern match failure"
+
+ -- Lift from nodes to blocks
+ block BNil f = return (dgnil, f)
+ block (BlockCO n b) f = (node n `cat` block b) f
+ block (BlockCC l b n) f = (node l `cat` block b `cat` node n) f
+ block (BlockOC b n) f = (block b `cat` node n) f
+
+ block (BMiddle n) f = node n f
+ block (BCat b1 b2) f = (block b1 `cat` block b2) f
+ block (BHead h n) f = (block h `cat` node n) f
+ block (BTail n t) f = (node n `cat` block t) f
+
+ {-# INLINE node #-}
+ node n f
+ = do { bwdres <- brewrite rewrite n f
+ ; case bwdres of
+ Nothing -> return (singletonDG entry_f n, entry_f)
+ where entry_f = btransfer transfer n f
+ Just (g, rw) ->
+ do { let pass' = pass { bp_rewrite = rw }
+ ; (g, f) <- arbGraph pass' (fwdEntryLabel n) g f
+ ; return (g, bwdEntryFact lattice n f)} }
+
+ -- | Compose fact transformers and concatenate the resulting
+ -- rewritten graphs.
+ {-# INLINE cat #-}
+ cat ft1 ft2 f = do { (g2,f2) <- ft2 f
+ ; (g1,f1) <- ft1 f2
+ ; let !g = g1 `dgSplice` g2
+ ; return (g, f1) }
+
+ arbx :: forall x .
+ (Block n C x -> Fact x f -> FuelUniqSM (DG f n C x, f))
+ -> (Block n C x -> Fact x f -> FuelUniqSM (DG f n C x, Fact C f))
+
+ arbx arb thing f = do { (rg, f) <- arb thing f
+ ; let fb = joinInFacts (bp_lattice pass) $
+ mapSingleton (entryLabel thing) f
+ ; return (rg, fb) }
+ -- joinInFacts adds debugging information
+
+ -- Outgoing factbase is restricted to Labels *not* in
+ -- in the Body; the facts for Labels *in*
+ -- the Body are in the 'DG f n C C'
+ body entries blockmap init_fbase
+ = fixpoint Bwd (bp_lattice pass) do_block entries blockmap init_fbase
+ where
+ do_block :: forall x. Block n C x -> Fact x f -> FuelUniqSM (DG f n C x, LabelMap f)
+ do_block b f = do (g, f) <- block b f
+ return (g, mapSingleton (entryLabel b) f)
+
+
+{-
+
+The forward and backward cases are not dual. In the forward case, the
+entry points are known, and one simply traverses the body blocks from
+those points. In the backward case, something is known about the exit
+points, but this information is essentially useless, because we don't
+actually have a dual graph (that is, one with edges reversed) to
+compute with. (Even if we did have a dual graph, it would not avail
+us---a backward analysis must include reachable blocks that don't
+reach the exit, as in a procedure that loops forever and has side
+effects.)
+
+-}
+
+-----------------------------------------------------------------------------
+-- fixpoint
+-----------------------------------------------------------------------------
+
+data Direction = Fwd | Bwd
+
+-- | fixpointing for analysis-only
+--
+fixpointAnal :: forall n f. NonLocal n
+ => Direction
+ -> DataflowLattice f
+ -> (Block n C C -> Fact C f -> Fact C f)
+ -> [Label]
+ -> LabelMap (Block n C C)
+ -> Fact C f -> FactBase f
+
+fixpointAnal direction DataflowLattice{ fact_bot = bot, fact_join = join }
+ do_block entries blockmap init_fbase
+ = loop start init_fbase
+ where
+ blocks = sortBlocks direction entries blockmap
+ n = length blocks
+ block_arr = {-# SCC "block_arr" #-} listArray (0,n-1) blocks
+ start = {-# SCC "start" #-} [0..n-1]
+ dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks
+
+ loop
+ :: IntHeap -- blocks still to analyse
+ -> FactBase f -- current factbase (increases monotonically)
+ -> FactBase f
+
+ loop [] fbase = fbase
+ loop (ix:todo) fbase =
+ let
+ blk = block_arr ! ix
+
+ out_facts = {-# SCC "do_block" #-} do_block blk fbase
+
+ !(todo', fbase') = {-# SCC "mapFoldWithKey" #-}
+ mapFoldWithKey (updateFact join dep_blocks)
+ (todo,fbase) out_facts
+ in
+ -- trace ("analysing: " ++ show (entryLabel blk)) $
+ -- trace ("fbase': " ++ show (mapKeys fbase')) $ return ()
+ -- trace ("changed: " ++ show changed) $ return ()
+ -- trace ("to analyse: " ++ show to_analyse) $ return ()
+
+ loop todo' fbase'
+
+
+-- | fixpointing for combined analysis/rewriting
+--
+fixpoint :: forall n f. NonLocal n
+ => Direction
+ -> DataflowLattice f
+ -> (Block n C C -> Fact C f -> FuelUniqSM (DG f n C C, Fact C f))
+ -> [Label]
+ -> LabelMap (Block n C C)
+ -> (Fact C f -> FuelUniqSM (DG f n C C, Fact C f))
+
+fixpoint direction DataflowLattice{ fact_bot = bot, fact_join = join }
+ do_block entries blockmap init_fbase
+ = do
+ -- trace ("fixpoint: " ++ show (case direction of Fwd -> True; Bwd -> False) ++ " " ++ show (mapKeys blockmap) ++ show entries ++ " " ++ show (mapKeys init_fbase)) $ return()
+ (fbase, newblocks) <- loop start init_fbase mapEmpty
+ -- trace ("fixpoint DONE: " ++ show (mapKeys fbase) ++ show (mapKeys newblocks)) $ return()
+ return (GMany NothingO newblocks NothingO,
+ mapDeleteList (mapKeys blockmap) fbase)
+ -- The successors of the Graph are the the Labels
+ -- for which we have facts and which are *not* in
+ -- the blocks of the graph
+ where
+ blocks = sortBlocks direction entries blockmap
+ n = length blocks
+ block_arr = {-# SCC "block_arr" #-} listArray (0,n-1) blocks
+ start = {-# SCC "start" #-} [0..n-1]
+ dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks
+
+ loop
+ :: IntHeap
+ -> FactBase f -- current factbase (increases monotonically)
+ -> LabelMap (DBlock f n C C) -- transformed graph
+ -> FuelUniqSM (FactBase f, LabelMap (DBlock f n C C))
+
+ loop [] fbase newblocks = return (fbase, newblocks)
+ loop (ix:todo) fbase !newblocks = do
+ let blk = block_arr ! ix
+
+ -- trace ("analysing: " ++ show (entryLabel blk)) $ return ()
+ (rg, out_facts) <- do_block blk fbase
+ let !(todo', fbase') =
+ mapFoldWithKey (updateFact join dep_blocks)
+ (todo,fbase) out_facts
+
+ -- trace ("fbase': " ++ show (mapKeys fbase')) $ return ()
+ -- trace ("changed: " ++ show changed) $ return ()
+ -- trace ("to analyse: " ++ show to_analyse) $ return ()
+
+ let newblocks' = case rg of
+ GMany _ blks _ -> mapUnion blks newblocks
+
+ loop todo' fbase' newblocks'
+
+
+{- Note [TxFactBase invariants]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The TxFactBase is used only during a fixpoint iteration (or "sweep"),
+and accumulates facts (and the transformed code) during the fixpoint
+iteration.
+
+* tfb_fbase increases monotonically, across all sweeps
+
+* At the beginning of each sweep
+ tfb_cha = NoChange
+ tfb_lbls = {}
+
+* During each sweep we process each block in turn. Processing a block
+ is done thus:
+ 1. Read from tfb_fbase the facts for its entry label (forward)
+ or successors labels (backward)
+ 2. Transform those facts into new facts for its successors (forward)
+ or entry label (backward)
+ 3. Augment tfb_fbase with that info
+ We call the labels read in step (1) the "in-labels" of the sweep
+
+* The field tfb_lbls is the set of in-labels of all blocks that have
+ been processed so far this sweep, including the block that is
+ currently being processed. tfb_lbls is initialised to {}. It is a
+ subset of the Labels of the *original* (not transformed) blocks.
+
+* The tfb_cha field is set to SomeChange iff we decide we need to
+ perform another iteration of the fixpoint loop. It is initialsed to NoChange.
+
+ Specifically, we set tfb_cha to SomeChange in step (3) iff
+ (a) The fact in tfb_fbase for a block L changes
+ (b) L is in tfb_lbls
+ Reason: until a label enters the in-labels its accumuated fact in tfb_fbase
+ has not been read, hence cannot affect the outcome
+
+Note [Unreachable blocks]
+~~~~~~~~~~~~~~~~~~~~~~~~~
+A block that is not in the domain of tfb_fbase is "currently unreachable".
+A currently-unreachable block is not even analyzed. Reason: consider
+constant prop and this graph, with entry point L1:
+ L1: x:=3; goto L4
+ L2: x:=4; goto L4
+ L4: if x>3 goto L2 else goto L5
+Here L2 is actually unreachable, but if we process it with bottom input fact,
+we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4.
+
+* If a currently-unreachable block is not analyzed, then its rewritten
+ graph will not be accumulated in tfb_rg. And that is good:
+ unreachable blocks simply do not appear in the output.
+
+* Note that clients must be careful to provide a fact (even if bottom)
+ for each entry point. Otherwise useful blocks may be garbage collected.
+
+* Note that updateFact must set the change-flag if a label goes from
+ not-in-fbase to in-fbase, even if its fact is bottom. In effect the
+ real fact lattice is
+ UNR
+ bottom
+ the points above bottom
+
+* Even if the fact is going from UNR to bottom, we still call the
+ client's fact_join function because it might give the client
+ some useful debugging information.
+
+* All of this only applies for *forward* ixpoints. For the backward
+ case we must treat every block as reachable; it might finish with a
+ 'return', and therefore have no successors, for example.
+-}
+
+
+-----------------------------------------------------------------------------
+-- Pieces that are shared by fixpoint and fixpoint_anal
+-----------------------------------------------------------------------------
+
+-- | Sort the blocks into the right order for analysis.
+sortBlocks :: NonLocal n => Direction -> [Label] -> LabelMap (Block n C C)
+ -> [Block n C C]
+sortBlocks direction entries blockmap
+ = case direction of Fwd -> fwd
+ Bwd -> reverse fwd
+ where fwd = forwardBlockList entries blockmap
+
+-- | construct a mapping from L -> block indices. If the fact for L
+-- changes, re-analyse the given blocks.
+mkDepBlocks :: NonLocal n => Direction -> [Block n C C] -> LabelMap [Int]
+mkDepBlocks Fwd blocks = go blocks 0 mapEmpty
+ where go [] !_ m = m
+ go (b:bs) !n m = go bs (n+1) $! mapInsert (entryLabel b) [n] m
+mkDepBlocks Bwd blocks = go blocks 0 mapEmpty
+ where go [] !_ m = m
+ go (b:bs) !n m = go bs (n+1) $! go' (successors b) m
+ where go' [] m = m
+ go' (l:ls) m = go' ls (mapInsertWith (++) l [n] m)
+
+
+-- | After some new facts have been generated by analysing a block, we
+-- fold this function over them to generate (a) a list of block
+-- indices to (re-)analyse, and (b) the new FactBase.
+--
+updateFact :: JoinFun f -> LabelMap [Int]
+ -> Label -> f -- out fact
+ -> (IntHeap, FactBase f)
+ -> (IntHeap, FactBase f)
+
+updateFact fact_join dep_blocks lbl new_fact (todo, fbase)
+ = case lookupFact lbl fbase of
+ Nothing -> let !z = mapInsert lbl new_fact fbase in (changed, z)
+ -- Note [no old fact]
+ Just old_fact ->
+ case fact_join lbl (OldFact old_fact) (NewFact new_fact) of
+ (NoChange, _) -> (todo, fbase)
+ (_, f) -> let !z = mapInsert lbl f fbase in (changed, z)
+ where
+ changed = foldr insertIntHeap todo $
+ mapFindWithDefault [] lbl dep_blocks
+
+{-
+Note [no old fact]
+
+We know that the new_fact is >= _|_, so we don't need to join. However,
+if the new fact is also _|_, and we have already analysed its block,
+we don't need to record a change. So there's a tradeoff here. It turns
+out that always recording a change is faster.
+-}
+
+-----------------------------------------------------------------------------
+-- DG: an internal data type for 'decorated graphs'
+-- TOTALLY internal to Hoopl; each block is decorated with a fact
+-----------------------------------------------------------------------------
+
+type Graph = Graph' Block
+type DG f = Graph' (DBlock f)
+data DBlock f n e x = DBlock f (Block n e x) -- ^ block decorated with fact
+
+instance NonLocal n => NonLocal (DBlock f n) where
+ entryLabel (DBlock _ b) = entryLabel b
+ successors (DBlock _ b) = successors b
+
+--- constructors
+
+dgnil :: DG f n O O
+dgnilC :: DG f n C C
+dgSplice :: NonLocal n => DG f n e a -> DG f n a x -> DG f n e x
+
+---- observers
+
+normalizeGraph :: forall n f e x .
+ NonLocal n => DG f n e x
+ -> (Graph n e x, FactBase f)
+ -- A Graph together with the facts for that graph
+ -- The domains of the two maps should be identical
+
+normalizeGraph g = (graphMapBlocks dropFact g, facts g)
+ where dropFact :: DBlock t t1 t2 t3 -> Block t1 t2 t3
+ dropFact (DBlock _ b) = b
+ facts :: DG f n e x -> FactBase f
+ facts GNil = noFacts
+ facts (GUnit _) = noFacts
+ facts (GMany _ body exit) = bodyFacts body `mapUnion` exitFacts exit
+ exitFacts :: MaybeO x (DBlock f n C O) -> FactBase f
+ exitFacts NothingO = noFacts
+ exitFacts (JustO (DBlock f b)) = mapSingleton (entryLabel b) f
+ bodyFacts :: LabelMap (DBlock f n C C) -> FactBase f
+ bodyFacts body = mapFoldWithKey f noFacts body
+ where f :: forall t a x. (NonLocal t) => Label -> DBlock a t C x -> LabelMap a -> LabelMap a
+ f lbl (DBlock f _) fb = mapInsert lbl f fb
+
+--- implementation of the constructors (boring)
+
+dgnil = GNil
+dgnilC = GMany NothingO emptyBody NothingO
+
+dgSplice = U.splice fzCat
+ where fzCat :: DBlock f n e O -> DBlock t n O x -> DBlock f n e x
+ fzCat (DBlock f b1) (DBlock _ b2) = DBlock f $! b1 `U.cat` b2
+ -- NB. strictness, this function is hammered.
+
+----------------------------------------------------------------
+-- Utilities
+----------------------------------------------------------------
+
+-- Lifting based on shape:
+-- - from nodes to blocks
+-- - from facts to fact-like things
+-- Lowering back:
+-- - from fact-like things to facts
+-- Note that the latter two functions depend only on the entry shape.
+class ShapeLifter e x where
+ singletonDG :: f -> n e x -> DG f n e x
+ fwdEntryFact :: NonLocal n => n e x -> f -> Fact e f
+ fwdEntryLabel :: NonLocal n => n e x -> MaybeC e [Label]
+ ftransfer :: FwdTransfer n f -> n e x -> f -> Fact x f
+ frewrite :: FwdRewrite m n f -> n e x
+ -> f -> m (Maybe (Graph n e x, FwdRewrite m n f))
+-- @ end node.tex
+ bwdEntryFact :: NonLocal n => DataflowLattice f -> n e x -> Fact e f -> f
+ btransfer :: BwdTransfer n f -> n e x -> Fact x f -> f
+ brewrite :: BwdRewrite m n f -> n e x
+ -> Fact x f -> m (Maybe (Graph n e x, BwdRewrite m n f))
+
+instance ShapeLifter C O where
+ singletonDG f n = gUnitCO (DBlock f (BlockCO n BNil))
+ fwdEntryFact n f = mapSingleton (entryLabel n) f
+ bwdEntryFact lat n fb = getFact lat (entryLabel n) fb
+ ftransfer (FwdTransfer3 (ft, _, _)) n f = ft n f
+ btransfer (BwdTransfer3 (bt, _, _)) n f = bt n f
+ frewrite (FwdRewrite3 (fr, _, _)) n f = fr n f
+ brewrite (BwdRewrite3 (br, _, _)) n f = br n f
+ fwdEntryLabel n = JustC [entryLabel n]
+
+instance ShapeLifter O O where
+ singletonDG f = gUnitOO . DBlock f . BMiddle
+ fwdEntryFact _ f = f
+ bwdEntryFact _ _ f = f
+ ftransfer (FwdTransfer3 (_, ft, _)) n f = ft n f
+ btransfer (BwdTransfer3 (_, bt, _)) n f = bt n f
+ frewrite (FwdRewrite3 (_, fr, _)) n f = fr n f
+ brewrite (BwdRewrite3 (_, br, _)) n f = br n f
+ fwdEntryLabel _ = NothingC
+
+instance ShapeLifter O C where
+ singletonDG f n = gUnitOC (DBlock f (BlockOC BNil n))
+ fwdEntryFact _ f = f
+ bwdEntryFact _ _ f = f
+ ftransfer (FwdTransfer3 (_, _, ft)) n f = ft n f
+ btransfer (BwdTransfer3 (_, _, bt)) n f = bt n f
+ frewrite (FwdRewrite3 (_, _, fr)) n f = fr n f
+ brewrite (BwdRewrite3 (_, _, br)) n f = br n f
+ fwdEntryLabel _ = NothingC
+
+{-
+class ShapeLifter e x where
+ singletonDG :: f -> n e x -> DG f n e x
+
+instance ShapeLifter C O where
+ singletonDG f n = gUnitCO (DBlock f (BlockCO n BNil))
+
+instance ShapeLifter O O where
+ singletonDG f = gUnitOO . DBlock f . BMiddle
+
+instance ShapeLifter O C where
+ singletonDG f n = gUnitOC (DBlock f (BlockOC BNil n))
+-}
+
+-- Fact lookup: the fact `orelse` bottom
+getFact :: DataflowLattice f -> Label -> FactBase f -> f
+getFact lat l fb = case lookupFact l fb of Just f -> f
+ Nothing -> fact_bot lat
+
+
+
+{- Note [Respects fuel]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-}
+-- $fuel
+-- A value of type 'FwdRewrite' or 'BwdRewrite' /respects fuel/ if
+-- any function contained within the value satisfies the following properties:
+--
+-- * When fuel is exhausted, it always returns 'Nothing'.
+--
+-- * When it returns @Just g rw@, it consumes /exactly/ one unit
+-- of fuel, and new rewrite 'rw' also respects fuel.
+--
+-- Provided that functions passed to 'mkFRewrite', 'mkFRewrite3',
+-- 'mkBRewrite', and 'mkBRewrite3' are not aware of the fuel supply,
+-- the results respect fuel.
+--
+-- It is an /unchecked/ run-time error for the argument passed to 'wrapFR',
+-- 'wrapFR2', 'wrapBR', or 'warpBR2' to return a function that does not respect fuel.
+
+-- -----------------------------------------------------------------------------
+-- a Heap of Int
+
+-- We should really use a proper Heap here, but my attempts to make
+-- one have not succeeded in beating the simple ordered list. Another
+-- alternative is IntSet (using deleteFindMin), but that was also
+-- slower than the ordered list in my experiments --SDM 25/1/2012
+
+type IntHeap = [Int] -- ordered
+
+insertIntHeap :: Int -> [Int] -> [Int]
+insertIntHeap x [] = [x]
+insertIntHeap x (y:ys)
+ | x < y = x : y : ys
+ | x == y = x : ys
+ | otherwise = y : insertIntHeap x ys
diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs
index 04586b1029..797b785de2 100644
--- a/compiler/cmm/MkGraph.hs
+++ b/compiler/cmm/MkGraph.hs
@@ -1,29 +1,19 @@
{-# LANGUAGE GADTs #-}
--- ToDo: remove -fno-warn-warnings-deprecations
-{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
--- ToDo: remove -fno-warn-incomplete-patterns
-{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
-
--- Module for building CmmAGraphs.
-
--- As the CmmAGraph is a wrapper over Graph CmmNode O x, it is different
--- from Hoopl's AGraph. The current clients expect functions with the
--- same names Hoopl uses, so this module cannot be in the same namespace
--- as Compiler.Hoopl.
-
module MkGraph
- ( CmmAGraph
- , emptyAGraph, (<*>), catAGraphs, outOfLine
- , mkLabel, mkMiddle, mkLast
- , withFreshLabel, withUnique, lgraphOfAGraph, labelAGraph
+ ( CmmAGraph, CgStmt(..)
+ , (<*>), catAGraphs
+ , mkLabel, mkMiddle, mkLast, outOfLine
+ , lgraphOfAGraph, labelAGraph
, stackStubExpr
- , mkNop, mkAssign, mkStore, mkCall, mkCmmCall, mkSafeCall, mkUnsafeCall, mkFinalCall
- , mkJump, mkDirectJump, mkForeignJump, mkJumpGC, mkCbranch, mkSwitch
- , mkReturn, mkReturnSimple, mkComment, mkCallEntry
- , mkBranch, mkCmmIfThenElse, mkCmmIfThen, mkCmmWhileDo
- , copyInOflow, copyInSlot, copyOutOflow, copyOutSlot
+ , mkNop, mkAssign, mkStore, mkUnsafeCall, mkFinalCall, mkCallReturnsTo
+ , mkJump, mkDirectJump, mkForeignJump, mkForeignJumpExtra, mkJumpGC
+ , mkCbranch, mkSwitch
+ , mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch
+ , copyInOflow, copyOutOflow
+ , noExtraStack
+ , toCall, Transfer(..)
)
where
@@ -31,250 +21,232 @@ import BlockId
import Cmm
import CmmCallConv (assignArgumentsPos, ParamLocation(..))
+
import Compiler.Hoopl hiding (Unique, (<*>), mkFirst, mkMiddle, mkLast, mkLabel, mkBranch, Shape(..))
-import qualified Compiler.Hoopl as H
-import Compiler.Hoopl.GHC (uniqueToLbl)
import FastString
import ForeignCall
import Outputable
import Prelude hiding (succ)
import SMRep (ByteOff)
-import StaticFlags
-import Unique
import UniqSupply
-import Util
+import OrdList
#include "HsVersions.h"
-{-
-A 'CmmAGraph' is an abstract version of a 'Graph CmmNode O x' from module
-'Cmm'. The difference is that the 'CmmAGraph' can be eigher open of closed at
-exit and it can supply fresh Labels and Uniques.
-
-It also supports a splicing operation <*>, which is different from the Hoopl's
-<*>, because it splices two CmmAGraphs. Specifically, it can splice Graph
-O C and Graph O x. In this case, the open beginning of the second graph is
-thrown away. In the debug mode this sequence is checked to be empty or
-containing a branch (see note [Branch follows branch]).
-
-When an CmmAGraph open at exit is being converted to a CmmGraph, the output
-exit sequence is considered unreachable. If the graph consist of one block
-only, if it not the case and we crash. Otherwise we just throw the exit
-sequence away (and in debug mode we test that it really was unreachable).
--}
-
-{-
-Node [Branch follows branch]
-============================
-Why do we say it's ok for a Branch to follow a Branch?
-Because the standard constructor mkLabel has fall-through
-semantics. So if you do a mkLabel, you finish the current block,
-giving it a label, and start a new one that branches to that label.
-Emitting a Branch at this point is fine:
- goto L1; L2: ...stuff...
--}
-
-data CmmGraphOC = Opened (Graph CmmNode O O)
- | Closed (Graph CmmNode O C)
-type CmmAGraph = UniqSM CmmGraphOC -- Graph open at entry
-
-{-
-MS: I began with
- newtype CmmAGraph = forall x. AG (UniqSM (Graph CmmNode O x))
-but that does not work well, because we cannot take the graph
-out of the monad -- we do not know the type of what we would take
-out and pattern matching does not help, as we cannot pattern match
-on a graph inside the monad.
--}
-data Transfer = Call | Jump | Ret deriving Eq
+-----------------------------------------------------------------------------
+-- Building Graphs
+
+
+-- | CmmAGraph is a chunk of code consisting of:
+--
+-- * ordinary statements (assignments, stores etc.)
+-- * jumps
+-- * labels
+-- * out-of-line labelled blocks
+--
+-- The semantics is that control falls through labels and out-of-line
+-- blocks. Everything after a jump up to the next label is by
+-- definition unreachable code, and will be discarded.
+--
+-- Two CmmAGraphs can be stuck together with <*>, with the meaning that
+-- control flows from the first to the second.
+--
+-- A 'CmmAGraph' can be turned into a 'CmmGraph' (closed at both ends)
+-- by providing a label for the entry point; see 'labelAGraph'.
+--
+type CmmAGraph = OrdList CgStmt
+
+data CgStmt
+ = CgLabel BlockId
+ | CgStmt (CmmNode O O)
+ | CgLast (CmmNode O C)
+ | CgFork BlockId CmmAGraph
+
+flattenCmmAGraph :: BlockId -> CmmAGraph -> CmmGraph
+flattenCmmAGraph id stmts =
+ CmmGraph { g_entry = id,
+ g_graph = GMany NothingO body NothingO }
+ where
+ (block, blocks) = flatten (fromOL stmts)
+ entry = blockJoinHead (CmmEntry id) block
+ body = foldr addBlock emptyBody (entry:blocks)
+
+ flatten :: [CgStmt] -> (Block CmmNode O C, [Block CmmNode C C])
+ flatten [] = panic "flatten []"
+
+ -- A label at the end of a function or fork: this label must not be reachable,
+ -- but it might be referred to from another BB that also isn't reachable.
+ -- Eliminating these has to be done with a dead-code analysis. For now,
+ -- we just make it into a well-formed block by adding a recursive jump.
+ flatten [CgLabel id]
+ = (goto_id, [blockJoinHead (CmmEntry id) goto_id] )
+ where goto_id = blockJoinTail emptyBlock (CmmBranch id)
+
+ -- A jump/branch: throw away all the code up to the next label, because
+ -- it is unreachable. Be careful to keep forks that we find on the way.
+ flatten (CgLast stmt : stmts)
+ = case dropWhile isOrdinaryStmt stmts of
+ [] ->
+ ( sing, [] )
+ [CgLabel id] ->
+ ( sing, [blockJoin (CmmEntry id) emptyBlock (CmmBranch id)] )
+ (CgLabel id : stmts) ->
+ ( sing, blockJoinHead (CmmEntry id) block : blocks )
+ where (block,blocks) = flatten stmts
+ (CgFork fork_id stmts : ss) ->
+ flatten (CgFork fork_id stmts : CgLast stmt : ss)
+ _ -> panic "MkGraph.flatten"
+ where
+ sing = blockJoinTail emptyBlock stmt
+
+ flatten (s:ss) =
+ case s of
+ CgStmt stmt -> (blockCons stmt block, blocks)
+ CgLabel id -> (blockJoinTail emptyBlock (CmmBranch id),
+ blockJoinHead (CmmEntry id) block : blocks)
+ CgFork fork_id stmts ->
+ (block, blockJoinHead (CmmEntry fork_id) fork_block : fork_blocks ++ blocks)
+ where (fork_block, fork_blocks) = flatten (fromOL stmts)
+ _ -> panic "MkGraph.flatten"
+ where (block,blocks) = flatten ss
+
+isOrdinaryStmt :: CgStmt -> Bool
+isOrdinaryStmt (CgStmt _) = True
+isOrdinaryStmt (CgLast _) = True
+isOrdinaryStmt _ = False
+
+
---------- AGraph manipulation
-emptyAGraph :: CmmAGraph
(<*>) :: CmmAGraph -> CmmAGraph -> CmmAGraph
+(<*>) = appOL
+
catAGraphs :: [CmmAGraph] -> CmmAGraph
+catAGraphs = concatOL
+
+-- | created a sequence "goto id; id:" as an AGraph
+mkLabel :: BlockId -> CmmAGraph
+mkLabel bid = unitOL (CgLabel bid)
+
+-- | creates an open AGraph from a given node
+mkMiddle :: CmmNode O O -> CmmAGraph
+mkMiddle middle = unitOL (CgStmt middle)
-mkLabel :: BlockId -> CmmAGraph -- created a sequence "goto id; id:" as an AGraph
-mkMiddle :: CmmNode O O -> CmmAGraph -- creates an open AGraph from a given node
-mkLast :: CmmNode O C -> CmmAGraph -- created a closed AGraph from a given node
+-- | created a closed AGraph from a given node
+mkLast :: CmmNode O C -> CmmAGraph
+mkLast last = unitOL (CgLast last)
-withFreshLabel :: String -> (BlockId -> CmmAGraph) -> CmmAGraph
-withUnique :: (Unique -> CmmAGraph) -> CmmAGraph
+-- | A labelled code block; should end in a last node
+outOfLine :: BlockId -> CmmAGraph -> CmmAGraph
+outOfLine l g = unitOL (CgFork l g)
+-- | allocate a fresh label for the entry point
lgraphOfAGraph :: CmmAGraph -> UniqSM CmmGraph
- -- ^ allocate a fresh label for the entry point
+lgraphOfAGraph g = do u <- getUniqueM
+ return (flattenCmmAGraph (mkBlockId u) g)
+
+-- | use the given BlockId as the label of the entry point
labelAGraph :: BlockId -> CmmAGraph -> UniqSM CmmGraph
- -- ^ use the given BlockId as the label of the entry point
+labelAGraph lbl ag = return (flattenCmmAGraph lbl ag)
---------- No-ops
mkNop :: CmmAGraph
+mkNop = nilOL
+
mkComment :: FastString -> CmmAGraph
+#ifdef DEBUG
+-- SDM: generating all those comments takes time, this saved about 4% for me
+mkComment fs = mkMiddle $ CmmComment fs
+#else
+mkComment _ = nilOL
+#endif
---------- Assignment and store
mkAssign :: CmmReg -> CmmExpr -> CmmAGraph
-mkStore :: CmmExpr -> CmmExpr -> CmmAGraph
+mkAssign l r = mkMiddle $ CmmAssign l r
----------- Calls
-mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual] ->
- UpdFrameOffset -> CmmAGraph
-mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] ->
- UpdFrameOffset -> CmmAGraph
- -- Native C-- calling convention
-mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset -> Bool -> CmmAGraph
-mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph
-mkFinalCall :: CmmExpr -> CCallConv -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
- -- Never returns; like exit() or barf()
+mkStore :: CmmExpr -> CmmExpr -> CmmAGraph
+mkStore l r = mkMiddle $ CmmStore l r
---------- Control transfer
-mkJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
-mkDirectJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
-mkJumpGC :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
-mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
-mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph
-mkSwitch :: CmmExpr -> [Maybe BlockId] -> CmmAGraph
+mkJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkJump e actuals updfr_off =
+ lastWithArgs Jump Old NativeNodeCall actuals updfr_off $
+ toCall e Nothing updfr_off 0
+
+mkDirectJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkDirectJump e actuals updfr_off =
+ lastWithArgs Jump Old NativeDirectCall actuals updfr_off $
+ toCall e Nothing updfr_off 0
+
+mkJumpGC :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkJumpGC e actuals updfr_off =
+ lastWithArgs Jump Old GC actuals updfr_off $
+ toCall e Nothing updfr_off 0
+
+mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset
+ -> CmmAGraph
+mkForeignJump conv e actuals updfr_off =
+ mkForeignJumpExtra conv e actuals updfr_off noExtraStack
+
+mkForeignJumpExtra :: Convention -> CmmExpr -> [CmmActual]
+ -> UpdFrameOffset -> (ByteOff, [(CmmExpr, ByteOff)])
+ -> CmmAGraph
+mkForeignJumpExtra conv e actuals updfr_off extra_stack =
+ lastWithArgsAndExtraStack Jump Old conv actuals updfr_off extra_stack $
+ toCall e Nothing updfr_off 0
+
+mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph
+mkCbranch pred ifso ifnot = mkLast (CmmCondBranch pred ifso ifnot)
+
+mkSwitch :: CmmExpr -> [Maybe BlockId] -> CmmAGraph
+mkSwitch e tbl = mkLast $ CmmSwitch e tbl
+
mkReturn :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkReturn e actuals updfr_off =
+ lastWithArgs Ret Old NativeReturn actuals updfr_off $
+ toCall e Nothing updfr_off 0
+
mkReturnSimple :: [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkReturnSimple actuals updfr_off =
+ mkReturn e actuals updfr_off
+ where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord
mkBranch :: BlockId -> CmmAGraph
-mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> CmmAGraph
-mkCmmIfThen :: CmmExpr -> CmmAGraph -> CmmAGraph
-mkCmmWhileDo :: CmmExpr -> CmmAGraph -> CmmAGraph
-
-outOfLine :: CmmAGraph -> CmmAGraph
--- ^ The argument is an CmmAGraph that must have an
--- empty entry sequence and be closed at the end.
--- The result is a new CmmAGraph that is open at the
--- end and goes directly from entry to exit, with the
--- original graph sitting to the side out-of-line.
---
--- Example: mkMiddle (x = 3)
--- <*> outOfLine (mkLabel L <*> ...stuff...)
--- <*> mkMiddle (y = x)
--- Control will flow directly from x=3 to y=x;
--- the block starting with L is "on the side".
---
--- N.B. algebraically forall g g' : g <*> outOfLine g' == outOfLine g' <*> g
+mkBranch bid = mkLast (CmmBranch bid)
+
+mkFinalCall :: CmmExpr -> CCallConv -> [CmmActual] -> UpdFrameOffset
+ -> CmmAGraph
+mkFinalCall f _ actuals updfr_off =
+ lastWithArgs Call Old NativeDirectCall actuals updfr_off $
+ toCall f Nothing updfr_off 0
+
+mkCallReturnsTo :: CmmExpr -> Convention -> [CmmActual]
+ -> BlockId
+ -> ByteOff
+ -> UpdFrameOffset
+ -> (ByteOff, [(CmmExpr,ByteOff)])
+ -> CmmAGraph
+mkCallReturnsTo f callConv actuals ret_lbl ret_off updfr_off extra_stack = do
+ lastWithArgsAndExtraStack Call (Young ret_lbl) callConv actuals
+ updfr_off extra_stack $
+ toCall f (Just ret_lbl) updfr_off ret_off
+
+mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph
+mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as
+
--------------------------------------------------------------------------
--- ================ IMPLEMENTATION ================--
-
---------------------------------------------------
--- Raw CmmAGraph handling
-
-emptyAGraph = return $ Opened emptyGraph
-ag <*> ah = do g <- ag
- h <- ah
- return (case (g, h) of
- (Opened g, Opened h) -> Opened $ g H.<*> h
- (Opened g, Closed h) -> Closed $ g H.<*> h
- (Closed g, Opened GNil) -> Closed g
- (Closed g, Opened (GUnit e)) -> note_unreachable e $ Closed g
- (Closed g, Opened (GMany (JustO e) b x)) -> note_unreachable e $ Opened $ g H.|*><*| GMany NothingO b x
- (Closed g, Closed (GMany (JustO e) b x)) -> note_unreachable e $ Closed $ g H.|*><*| GMany NothingO b x
- :: CmmGraphOC)
-catAGraphs = foldl (<*>) emptyAGraph
-
-outOfLine ag = withFreshLabel "outOfLine" $ \l ->
- do g <- ag
- return (case g of
- Closed (GMany (JustO e) b _) -> note_unreachable e $ Opened $
- GMany (JustO $ BLast $ CmmBranch l) b (JustO $ BFirst $ CmmEntry l)
- _ -> panic "outOfLine"
- :: CmmGraphOC)
-
-note_unreachable :: Block CmmNode O x -> a -> a
-note_unreachable block graph =
- ASSERT (block_is_empty_or_label) -- Note [Branch follows branch]
- graph
- where block_is_empty_or_label :: Bool
- block_is_empty_or_label = case blockToNodeList block of
- (NothingC, [], NothingC) -> True
- (NothingC, [], JustC (CmmBranch _)) -> True
- _ -> False
-
-mkLabel bid = return $ Opened $ H.mkLast (CmmBranch bid) |*><*| H.mkFirst (CmmEntry bid)
-mkMiddle middle = return $ Opened $ H.mkMiddle middle
-mkLast last = return $ Closed $ H.mkLast last
-
-withUnique f = getUniqueM >>= f
-withFreshLabel _name f = getUniqueM >>= f . uniqueToLbl . intToUnique . getKey
-lgraphOfAGraph g = do u <- getUniqueM
- labelAGraph (mkBlockId u) g
-
-labelAGraph lbl ag = do g <- ag
- return $ CmmGraph {g_entry=lbl, g_graph=H.mkFirst (CmmEntry lbl) H.<*> closed g}
- where closed :: CmmGraphOC -> Graph CmmNode O C
- closed (Closed g) = g
- closed (Opened g@(GMany entry body (JustO exit))) =
- ASSERT (entryLabel exit `notElem` map entryLabel (postorder_dfs g))
- GMany entry body NothingO
- closed (Opened _) = panic "labelAGraph"
-
---------------------------------------------------
--- CmmAGraph constructions
-
-mkNop = emptyAGraph
-mkComment fs = mkMiddle $ CmmComment fs
-mkStore l r = mkMiddle $ CmmStore l r
-
--- NEED A COMPILER-DEBUGGING FLAG HERE
--- Sanity check: any value assigned to a pointer must be non-zero.
--- If it's 0, cause a crash immediately.
-mkAssign l r = if opt_StubDeadValues then assign l r <*> check l else assign l r
- where assign l r = mkMiddle (CmmAssign l r)
- check (CmmGlobal _) = mkNop
- check l@(CmmLocal reg) = -- if a ptr arg is NULL, cause a crash!
- if isGcPtrType ty then
- mkCmmIfThen (CmmMachOp (MO_Eq w) [r, stackStubExpr w])
- (assign l (CmmLoad (CmmLit (CmmInt 0 w)) ty))
- else mkNop
- where ty = localRegType reg
- w = typeWidth ty
- r = CmmReg l
-- Why are we inserting extra blocks that simply branch to the successors?
-- Because in addition to the branch instruction, @mkBranch@ will insert
-- a necessary adjustment to the stack pointer.
-mkCbranch pred ifso ifnot = mkLast (CmmCondBranch pred ifso ifnot)
-mkSwitch e tbl = mkLast $ CmmSwitch e tbl
-
-mkSafeCall t fs as upd i = withFreshLabel "safe call" $ body
- where
- body k =
- ( mkStore (CmmStackSlot (CallArea (Young k)) (widthInBytes wordWidth))
- (CmmLit (CmmBlock k))
- <*> mkLast (CmmForeignCall {tgt=t, res=fs, args=as, succ=k, updfr=upd, intrbl=i})
- <*> mkLabel k)
-mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as
-mkBranch bid = mkLast (CmmBranch bid)
-
-mkCmmIfThenElse e tbranch fbranch =
- withFreshLabel "end of if" $ \endif ->
- withFreshLabel "start of then" $ \tid ->
- withFreshLabel "start of else" $ \fid ->
- mkCbranch e tid fid <*>
- mkLabel tid <*> tbranch <*> mkBranch endif <*>
- mkLabel fid <*> fbranch <*> mkLabel endif
-
-mkCmmIfThen e tbranch
- = withFreshLabel "end of if" $ \endif ->
- withFreshLabel "start of then" $ \tid ->
- mkCbranch e tid endif <*>
- mkLabel tid <*> tbranch <*> mkLabel endif
-
-mkCmmWhileDo e body =
- withFreshLabel "loop test" $ \test ->
- withFreshLabel "loop head" $ \head ->
- withFreshLabel "end while" $ \endwhile ->
- -- Forrest Baskett's while-loop layout
- mkBranch test <*> mkLabel head <*> body
- <*> mkLabel test <*> mkCbranch e head endwhile
- <*> mkLabel endwhile
-- For debugging purposes, we can stub out dead stack slots:
stackStubExpr :: Width -> CmmExpr
@@ -286,12 +258,9 @@ stackStubExpr w = CmmLit (CmmInt 0 w)
-- Therefore, for copying arguments and results, we provide different
-- functions to pass the arguments in an overflow area and to pass them in spill slots.
copyInOflow :: Convention -> Area -> [CmmFormal] -> (Int, CmmAGraph)
-copyInSlot :: Convention -> [CmmFormal] -> [CmmNode O O]
-copyOutSlot :: Convention -> [LocalReg] -> [CmmNode O O]
copyInOflow conv area formals = (offset, catAGraphs $ map mkMiddle nodes)
where (offset, nodes) = copyIn oneCopyOflowI conv area formals
-copyInSlot c f = snd $ copyIn oneCopySlotI c (panic "no area for copying to slots") f
type SlotCopier = Area -> (LocalReg, ByteOff) -> (ByteOff, [CmmNode O O]) ->
(ByteOff, [CmmNode O O])
@@ -312,104 +281,86 @@ copyIn oflow conv area formals =
adjust rst x@(_, RegisterParam _) = x : rst
-- Copy-in one arg, using overflow space if needed.
-oneCopyOflowI, oneCopySlotI :: SlotCopier
+oneCopyOflowI :: SlotCopier
oneCopyOflowI area (reg, off) (n, ms) =
(max n off, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty) : ms)
where ty = localRegType reg
--- Copy-in one arg, using spill slots if needed -- used for calling conventions at
--- a procpoint that is not a return point. The offset is irrelevant here...
-oneCopySlotI _ (reg, _) (n, ms) =
- (n, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg) w) ty) : ms)
- where ty = localRegType reg
- w = widthInBytes (typeWidth ty)
-
-
-- Factoring out the common parts of the copyout functions yielded something
-- more complicated:
-copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] -> UpdFrameOffset ->
- (Int, CmmAGraph)
+data Transfer = Call | Jump | Ret deriving Eq
+
+copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual]
+ -> UpdFrameOffset
+ -> (ByteOff, [(CmmExpr,ByteOff)]) -- extra stack stuff
+ -> (Int, CmmAGraph)
+
-- Generate code to move the actual parameters into the locations
--- required by the calling convention. This includes a store for the return address.
+-- required by the calling convention. This includes a store for the
+-- return address.
--
--- The argument layout function ignores the pointer to the info table, so we slot that
--- in here. When copying-out to a young area, we set the info table for return
--- and adjust the offsets of the other parameters.
--- If this is a call instruction, we adjust the offsets of the other parameters.
-copyOutOflow conv transfer area@(CallArea a) actuals updfr_off
- = foldr co (init_offset, emptyAGraph) args'
+-- The argument layout function ignores the pointer to the info table,
+-- so we slot that in here. When copying-out to a young area, we set
+-- the info table for return and adjust the offsets of the other
+-- parameters. If this is a call instruction, we adjust the offsets
+-- of the other parameters.
+copyOutOflow conv transfer area actuals updfr_off
+ (extra_stack_off, extra_stack_stuff)
+ = foldr co (init_offset, mkNop) (args' ++ stack_params)
where
co (v, RegisterParam r) (n, ms) = (n, mkAssign (CmmGlobal r) v <*> ms)
co (v, StackParam off) (n, ms) = (max n off, mkStore (CmmStackSlot area off) v <*> ms)
+ stack_params = [ (e, StackParam (off + init_offset))
+ | (e,off) <- extra_stack_stuff ]
+
(setRA, init_offset) =
- case a of Young id -> id `seq` -- Generate a store instruction for
- -- the return address if making a call
+ case area of
+ Young id -> id `seq` -- Generate a store instruction for
+ -- the return address if making a call
if transfer == Call then
([(CmmLit (CmmBlock id), StackParam init_offset)],
widthInBytes wordWidth)
else ([], 0)
- Old -> ([], updfr_off)
+ Old -> ([], updfr_off)
+
+ arg_offset = init_offset + extra_stack_off
args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it
args = assignArgumentsPos conv cmmExprType actuals
args' = foldl adjust setRA args
- where adjust rst (v, StackParam off) = (v, StackParam (off + init_offset)) : rst
+ where adjust rst (v, StackParam off) = (v, StackParam (off + arg_offset)) : rst
adjust rst x@(_, RegisterParam _) = x : rst
-copyOutOflow _ _ (RegSlot _) _ _ = panic "cannot copy arguments into a register slot"
--- Args passed only in registers and stack slots; no overflow space.
--- No return address may apply!
-copyOutSlot conv actuals = foldr co [] args
- where co (v, RegisterParam r) ms = CmmAssign (CmmGlobal r) (toExp v) : ms
- co (v, StackParam off) ms = CmmStore (CmmStackSlot (RegSlot v) off) (toExp v) : ms
- toExp r = CmmReg (CmmLocal r)
- args = assignArgumentsPos conv localRegType actuals
mkCallEntry :: Convention -> [CmmFormal] -> (Int, CmmAGraph)
-mkCallEntry conv formals = copyInOflow conv (CallArea Old) formals
+mkCallEntry conv formals = copyInOflow conv Old formals
-lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] -> UpdFrameOffset ->
- (ByteOff -> CmmAGraph) -> CmmAGraph
+lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual]
+ -> UpdFrameOffset
+ -> (ByteOff -> CmmAGraph)
+ -> CmmAGraph
lastWithArgs transfer area conv actuals updfr_off last =
- let (outArgs, copies) = copyOutOflow conv transfer area actuals updfr_off in
+ lastWithArgsAndExtraStack transfer area conv actuals
+ updfr_off noExtraStack last
+
+lastWithArgsAndExtraStack :: Transfer -> Area -> Convention -> [CmmActual]
+ -> UpdFrameOffset -> (ByteOff, [(CmmExpr,ByteOff)])
+ -> (ByteOff -> CmmAGraph)
+ -> CmmAGraph
+lastWithArgsAndExtraStack transfer area conv actuals updfr_off
+ extra_stack last =
+ let (outArgs, copies) = copyOutOflow conv transfer area actuals
+ updfr_off extra_stack in
copies <*> last outArgs
--- The area created for the jump and return arguments is the same area as the
--- procedure entry.
-old :: Area
-old = CallArea Old
-toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff -> CmmAGraph
+noExtraStack :: (ByteOff, [(CmmExpr,ByteOff)])
+noExtraStack = (0,[])
+
+toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff
+ -> CmmAGraph
toCall e cont updfr_off res_space arg_space =
mkLast $ CmmCall e cont arg_space res_space updfr_off
-mkJump e actuals updfr_off =
- lastWithArgs Jump old NativeNodeCall actuals updfr_off $ toCall e Nothing updfr_off 0
-mkDirectJump e actuals updfr_off =
- lastWithArgs Jump old NativeDirectCall actuals updfr_off $ toCall e Nothing updfr_off 0
-mkJumpGC e actuals updfr_off =
- lastWithArgs Jump old GC actuals updfr_off $ toCall e Nothing updfr_off 0
-mkForeignJump conv e actuals updfr_off =
- lastWithArgs Jump old conv actuals updfr_off $ toCall e Nothing updfr_off 0
-mkReturn e actuals updfr_off =
- lastWithArgs Ret old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0
- -- where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord
-mkReturnSimple actuals updfr_off =
- lastWithArgs Ret old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0
- where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord
-
-mkFinalCall f _ actuals updfr_off =
- lastWithArgs Call old NativeDirectCall actuals updfr_off $ toCall f Nothing updfr_off 0
-
-mkCmmCall f results actuals = mkCall f (NativeDirectCall, NativeReturn) results actuals
-
--- I'm dropping the SRT, but that should be okay: we plan to reconstruct it later.
-mkCall f (callConv, retConv) results actuals updfr_off =
- withFreshLabel "call successor" $ \k ->
- let area = CallArea $ Young k
- (off, copyin) = copyInOflow retConv area results
- copyout = lastWithArgs Call area callConv actuals updfr_off
- (toCall f (Just k) updfr_off off)
- in (copyout <*> mkLabel k <*> copyin)
diff --git a/compiler/cmm/OldCmm.hs b/compiler/cmm/OldCmm.hs
index fc4706c8c4..00bbe6d2ee 100644
--- a/compiler/cmm/OldCmm.hs
+++ b/compiler/cmm/OldCmm.hs
@@ -9,9 +9,7 @@
module OldCmm (
CmmGroup, GenCmmGroup, RawCmmGroup, CmmDecl, RawCmmDecl,
ListGraph(..),
-
- CmmInfo(..), CmmInfoTable(..), ClosureTypeInfo(..), UpdateFrame(..),
-
+ UpdateFrame(..), CmmInfoTable(..), ClosureTypeInfo(..),
CmmStatic(..), CmmStatics(..), CmmFormal, CmmActual,
cmmMapGraph, cmmTopMapGraph,
@@ -52,13 +50,6 @@ import ForeignCall
-- Info Tables
-----------------------------------------------------------------------------
-data CmmInfo
- = CmmInfo
- (Maybe BlockId) -- GC target. Nothing <=> CPS won't do stack check
- -- JD: NOT USED BY NEW CODE GEN
- (Maybe UpdateFrame) -- Update frame
- CmmInfoTable -- Info table
-
-- | A frame that is to be pushed before entry to the function.
-- Used to handle 'update' frames.
data UpdateFrame
@@ -85,8 +76,8 @@ data UpdateFrame
newtype ListGraph i = ListGraph [GenBasicBlock i]
-- | Cmm with the info table as a data type
-type CmmGroup = GenCmmGroup CmmStatics CmmInfo (ListGraph CmmStmt)
-type CmmDecl = GenCmmDecl CmmStatics CmmInfo (ListGraph CmmStmt)
+type CmmGroup = GenCmmGroup CmmStatics CmmInfoTable (ListGraph CmmStmt)
+type CmmDecl = GenCmmDecl CmmStatics CmmInfoTable (ListGraph CmmStmt)
-- | Cmm with the info tables converted to a list of 'CmmStatic' along with the info
-- table label. If we are building without tables-next-to-code there will be no statics
@@ -225,16 +216,9 @@ instance UserOfLocalRegs CmmCallTarget where
foldRegsUsed f set (CmmCallee e _) = foldRegsUsed f set e
foldRegsUsed f set (CmmPrim _ mStmts) = foldRegsUsed f set mStmts
-instance UserOfSlots CmmCallTarget where
- foldSlotsUsed f set (CmmCallee e _) = foldSlotsUsed f set e
- foldSlotsUsed _ set (CmmPrim {}) = set
-
instance UserOfLocalRegs a => UserOfLocalRegs (CmmHinted a) where
foldRegsUsed f set a = foldRegsUsed f set (hintlessCmm a)
-instance UserOfSlots a => UserOfSlots (CmmHinted a) where
- foldSlotsUsed f set a = foldSlotsUsed f set (hintlessCmm a)
-
instance DefinerOfLocalRegs a => DefinerOfLocalRegs (CmmHinted a) where
foldRegsDefd f set a = foldRegsDefd f set (hintlessCmm a)
diff --git a/compiler/cmm/OldCmmLint.hs b/compiler/cmm/OldCmmLint.hs
new file mode 100644
index 0000000000..72e40ce4f8
--- /dev/null
+++ b/compiler/cmm/OldCmmLint.hs
@@ -0,0 +1,209 @@
+-----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-- CmmLint: checking the correctness of Cmm statements and expressions
+--
+-----------------------------------------------------------------------------
+
+{-# OPTIONS -fno-warn-tabs #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and
+-- detab the module (please do the detabbing in a separate patch). See
+-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
+-- for details
+
+module OldCmmLint (
+ cmmLint, cmmLintTop
+ ) where
+
+import BlockId
+import OldCmm
+import CLabel
+import Outputable
+import OldPprCmm()
+import Constants
+import FastString
+import Platform
+
+import Data.Maybe
+
+-- -----------------------------------------------------------------------------
+-- Exported entry points:
+
+cmmLint :: (Outputable d, Outputable h)
+ => Platform -> GenCmmGroup d h (ListGraph CmmStmt) -> Maybe SDoc
+cmmLint platform tops = runCmmLint platform (mapM_ (lintCmmDecl platform)) tops
+
+cmmLintTop :: (Outputable d, Outputable h)
+ => Platform -> GenCmmDecl d h (ListGraph CmmStmt) -> Maybe SDoc
+cmmLintTop platform top = runCmmLint platform (lintCmmDecl platform) top
+
+runCmmLint :: Outputable a
+ => Platform -> (a -> CmmLint b) -> a -> Maybe SDoc
+runCmmLint _ l p =
+ case unCL (l p) of
+ Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"),
+ nest 2 err,
+ ptext $ sLit ("Program was:"),
+ nest 2 (ppr p)])
+ Right _ -> Nothing
+
+lintCmmDecl :: Platform -> (GenCmmDecl h i (ListGraph CmmStmt)) -> CmmLint ()
+lintCmmDecl platform (CmmProc _ lbl (ListGraph blocks))
+ = addLintInfo (text "in proc " <> pprCLabel platform lbl) $
+ let labels = foldl (\s b -> setInsert (blockId b) s) setEmpty blocks
+ in mapM_ (lintCmmBlock platform labels) blocks
+
+lintCmmDecl _ (CmmData {})
+ = return ()
+
+lintCmmBlock :: Platform -> BlockSet -> GenBasicBlock CmmStmt -> CmmLint ()
+lintCmmBlock platform labels (BasicBlock id stmts)
+ = addLintInfo (text "in basic block " <> ppr id) $
+ mapM_ (lintCmmStmt platform labels) stmts
+
+-- -----------------------------------------------------------------------------
+-- lintCmmExpr
+
+-- Checks whether a CmmExpr is "type-correct", and check for obvious-looking
+-- byte/word mismatches.
+
+lintCmmExpr :: Platform -> CmmExpr -> CmmLint CmmType
+lintCmmExpr platform (CmmLoad expr rep) = do
+ _ <- lintCmmExpr platform expr
+ -- Disabled, if we have the inlining phase before the lint phase,
+ -- we can have funny offsets due to pointer tagging. -- EZY
+ -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $
+ -- cmmCheckWordAddress expr
+ return rep
+lintCmmExpr platform expr@(CmmMachOp op args) = do
+ tys <- mapM (lintCmmExpr platform) args
+ if map (typeWidth . cmmExprType) args == machOpArgReps op
+ then cmmCheckMachOp op args tys
+ else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op)
+lintCmmExpr platform (CmmRegOff reg offset)
+ = lintCmmExpr platform (CmmMachOp (MO_Add rep)
+ [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)])
+ where rep = typeWidth (cmmRegType reg)
+lintCmmExpr _ expr =
+ return (cmmExprType expr)
+
+-- Check for some common byte/word mismatches (eg. Sp + 1)
+cmmCheckMachOp :: MachOp -> [CmmExpr] -> [CmmType] -> CmmLint CmmType
+cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] tys
+ = cmmCheckMachOp op [reg, lit] tys
+cmmCheckMachOp op _ tys
+ = return (machOpResultType op tys)
+
+isOffsetOp :: MachOp -> Bool
+isOffsetOp (MO_Add _) = True
+isOffsetOp (MO_Sub _) = True
+isOffsetOp _ = False
+
+-- This expression should be an address from which a word can be loaded:
+-- check for funny-looking sub-word offsets.
+_cmmCheckWordAddress :: CmmExpr -> CmmLint ()
+_cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)])
+ | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0
+ = cmmLintDubiousWordOffset e
+_cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg])
+ | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0
+ = cmmLintDubiousWordOffset e
+_cmmCheckWordAddress _
+ = return ()
+
+-- No warnings for unaligned arithmetic with the node register,
+-- which is used to extract fields from tagged constructor closures.
+notNodeReg :: CmmExpr -> Bool
+notNodeReg (CmmReg reg) | reg == nodeReg = False
+notNodeReg _ = True
+
+lintCmmStmt :: Platform -> BlockSet -> CmmStmt -> CmmLint ()
+lintCmmStmt platform labels = lint
+ where lint (CmmNop) = return ()
+ lint (CmmComment {}) = return ()
+ lint stmt@(CmmAssign reg expr) = do
+ erep <- lintCmmExpr platform expr
+ let reg_ty = cmmRegType reg
+ if (erep `cmmEqType_ignoring_ptrhood` reg_ty)
+ then return ()
+ else cmmLintAssignErr stmt erep reg_ty
+ lint (CmmStore l r) = do
+ _ <- lintCmmExpr platform l
+ _ <- lintCmmExpr platform r
+ return ()
+ lint (CmmCall target _res args _) =
+ do lintTarget platform labels target
+ mapM_ (lintCmmExpr platform . hintlessCmm) args
+ lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr platform e >> checkCond e
+ lint (CmmSwitch e branches) = do
+ mapM_ checkTarget $ catMaybes branches
+ erep <- lintCmmExpr platform e
+ if (erep `cmmEqType_ignoring_ptrhood` bWord)
+ then return ()
+ else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e <>
+ text " :: " <> ppr erep)
+ lint (CmmJump e _) = lintCmmExpr platform e >> return ()
+ lint (CmmReturn) = return ()
+ lint (CmmBranch id) = checkTarget id
+ checkTarget id = if setMember id labels then return ()
+ else cmmLintErr (text "Branch to nonexistent id" <+> ppr id)
+
+lintTarget :: Platform -> BlockSet -> CmmCallTarget -> CmmLint ()
+lintTarget platform _ (CmmCallee e _) = do _ <- lintCmmExpr platform e
+ return ()
+lintTarget _ _ (CmmPrim _ Nothing) = return ()
+lintTarget platform labels (CmmPrim _ (Just stmts))
+ = mapM_ (lintCmmStmt platform labels) stmts
+
+
+checkCond :: CmmExpr -> CmmLint ()
+checkCond (CmmMachOp mop _) | isComparisonMachOp mop = return ()
+checkCond (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth = return () -- constant values
+checkCond expr
+ = cmmLintErr (hang (text "expression is not a conditional:") 2
+ (ppr expr))
+
+-- -----------------------------------------------------------------------------
+-- CmmLint monad
+
+-- just a basic error monad:
+
+newtype CmmLint a = CmmLint { unCL :: Either SDoc a }
+
+instance Monad CmmLint where
+ CmmLint m >>= k = CmmLint $ case m of
+ Left e -> Left e
+ Right a -> unCL (k a)
+ return a = CmmLint (Right a)
+
+cmmLintErr :: SDoc -> CmmLint a
+cmmLintErr msg = CmmLint (Left msg)
+
+addLintInfo :: SDoc -> CmmLint a -> CmmLint a
+addLintInfo info thing = CmmLint $
+ case unCL thing of
+ Left err -> Left (hang info 2 err)
+ Right a -> Right a
+
+cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a
+cmmLintMachOpErr expr argsRep opExpectsRep
+ = cmmLintErr (text "in MachOp application: " $$
+ nest 2 (ppr expr) $$
+ (text "op is expecting: " <+> ppr opExpectsRep) $$
+ (text "arguments provide: " <+> ppr argsRep))
+
+cmmLintAssignErr :: CmmStmt -> CmmType -> CmmType -> CmmLint a
+cmmLintAssignErr stmt e_ty r_ty
+ = cmmLintErr (text "in assignment: " $$
+ nest 2 (vcat [ppr stmt,
+ text "Reg ty:" <+> ppr r_ty,
+ text "Rhs ty:" <+> ppr e_ty]))
+
+
+
+cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a
+cmmLintDubiousWordOffset expr
+ = cmmLintErr (text "offset is not a multiple of words: " $$
+ nest 2 (ppr expr))
diff --git a/compiler/cmm/OldPprCmm.hs b/compiler/cmm/OldPprCmm.hs
index d6a12221fb..a30be9c6c7 100644
--- a/compiler/cmm/OldPprCmm.hs
+++ b/compiler/cmm/OldPprCmm.hs
@@ -63,10 +63,6 @@ instance Outputable instr => Outputable (GenBasicBlock instr) where
instance Outputable CmmStmt where
ppr s = pprStmt s
-instance Outputable CmmInfo where
- ppr i = pprInfo i
-
-
-- --------------------------------------------------------------------------
instance Outputable CmmSafety where
ppr CmmUnsafe = ptext (sLit "_unsafe_call_")
@@ -74,21 +70,6 @@ instance Outputable CmmSafety where
ppr (CmmSafe srt) = ppr srt
-- --------------------------------------------------------------------------
--- Info tables. The current pretty printer needs refinement
--- but will work for now.
---
--- For ideas on how to refine it, they used to be printed in the
--- style of C--'s 'stackdata' declaration, just inside the proc body,
--- and were labelled with the procedure name ++ "_info".
-pprInfo :: CmmInfo -> SDoc
-pprInfo (CmmInfo _gc_target update_frame info_table) =
- vcat [{-ptext (sLit "gc_target: ") <>
- maybe (ptext (sLit "<none>")) ppr gc_target,-}
- ptext (sLit "update_frame: ") <>
- maybe (ptext (sLit "<none>")) pprUpdateFrame update_frame,
- ppr info_table]
-
--- --------------------------------------------------------------------------
-- Basic blocks look like assembly blocks.
-- lbl: stmt ; stmt ; ..
pprBBlock :: Outputable stmt => GenBasicBlock stmt -> SDoc
diff --git a/compiler/cmm/OptimizationFuel.hs b/compiler/cmm/OptimizationFuel.hs
index a85b11bcc6..6e968c0b1d 100644
--- a/compiler/cmm/OptimizationFuel.hs
+++ b/compiler/cmm/OptimizationFuel.hs
@@ -8,10 +8,8 @@
module OptimizationFuel
( OptimizationFuel, amountOfFuel, tankFilledTo, unlimitedFuel, anyFuelLeft, oneLessFuel
, OptFuelState, initOptFuelState
- , FuelConsumer, FuelUsingMonad, FuelState
- , fuelGet, fuelSet, lastFuelPass, setFuelPass
- , fuelExhausted, fuelDec1, tryWithFuel
- , runFuelIO, runInfiniteFuelIO, fuelConsumingPass
+ , FuelConsumer, FuelState
+ , runFuelIO, runInfiniteFuelIO
, FuelUniqSM
, liftUniq
)
@@ -62,25 +60,20 @@ anyFuelLeft (OptimizationFuel f) = f > 0
oneLessFuel (OptimizationFuel f) = ASSERT (f > 0) (OptimizationFuel (f - 1))
unlimitedFuel = OptimizationFuel infiniteFuel
-data FuelState = FuelState { fs_fuel :: OptimizationFuel, fs_lastpass :: String }
-newtype FuelUniqSM a = FUSM { unFUSM :: FuelState -> UniqSM (a, FuelState) }
-
-fuelConsumingPass :: String -> FuelConsumer a -> FuelUniqSM a
-fuelConsumingPass name f = do setFuelPass name
- fuel <- fuelGet
- let (a, fuel') = f fuel
- fuelSet fuel'
- return a
+data FuelState = FuelState { fs_fuel :: {-# UNPACK #-} !OptimizationFuel,
+ fs_lastpass :: String }
+newtype FuelUniqSM a = FUSM { unFUSM :: UniqSupply -> FuelState -> (# a, UniqSupply, FuelState #) }
runFuelIO :: OptFuelState -> FuelUniqSM a -> IO a
runFuelIO fs (FUSM f) =
do pass <- readIORef (pass_ref fs)
fuel <- readIORef (fuel_ref fs)
u <- mkSplitUniqSupply 'u'
- let (a, FuelState fuel' pass') = initUs_ u $ f (FuelState fuel pass)
- writeIORef (pass_ref fs) pass'
- writeIORef (fuel_ref fs) fuel'
- return a
+ case f u (FuelState fuel pass) of
+ (# a, _, FuelState fuel' pass' #) -> do
+ writeIORef (pass_ref fs) pass'
+ writeIORef (fuel_ref fs) fuel'
+ return a
-- ToDo: Do we need the pass_ref when we are doing infinite fueld
-- transformations?
@@ -88,21 +81,32 @@ runInfiniteFuelIO :: OptFuelState -> FuelUniqSM a -> IO a
runInfiniteFuelIO fs (FUSM f) =
do pass <- readIORef (pass_ref fs)
u <- mkSplitUniqSupply 'u'
- let (a, FuelState _ pass') = initUs_ u $ f (FuelState unlimitedFuel pass)
- writeIORef (pass_ref fs) pass'
- return a
+ case f u (FuelState unlimitedFuel pass) of
+ (# a, _, FuelState _fuel pass' #) -> do
+ writeIORef (pass_ref fs) pass'
+ return a
instance Monad FuelUniqSM where
- FUSM f >>= k = FUSM (\s -> f s >>= \(a, s') -> unFUSM (k a) s')
- return a = FUSM (\s -> return (a, s))
+ FUSM f >>= k = FUSM (\u s -> case f u s of (# a, u', s' #) ->
+ unFUSM (k a) u' s')
+ return a = FUSM (\u s -> (# a, u, s #))
instance MonadUnique FuelUniqSM where
- getUniqueSupplyM = liftUniq getUniqueSupplyM
- getUniqueM = liftUniq getUniqueM
- getUniquesM = liftUniq getUniquesM
+ getUniqueSupplyM =
+ FUSM $ \us f -> case splitUniqSupply us of
+ (us1,us2) -> (# us1, us2, f #)
+
+ getUniqueM =
+ FUSM $ \us f -> case splitUniqSupply us of
+ (us1,us2) -> (# uniqFromSupply us1, us2, f #)
+
+ getUniquesM =
+ FUSM $ \us f -> case splitUniqSupply us of
+ (us1,us2) -> (# uniqsFromSupply us1, us2, f #)
+
liftUniq :: UniqSM x -> FuelUniqSM x
-liftUniq x = FUSM (\s -> x >>= (\u -> return (u, s)))
+liftUniq x = FUSM (\u s -> case initUs u x of (a,u') -> (# a, u', s #))
class Monad m => FuelUsingMonad m where
fuelGet :: m OptimizationFuel
@@ -110,25 +114,14 @@ class Monad m => FuelUsingMonad m where
lastFuelPass :: m String
setFuelPass :: String -> m ()
-fuelExhausted :: FuelUsingMonad m => m Bool
-fuelExhausted = fuelGet >>= return . anyFuelLeft
-
-fuelDec1 :: FuelUsingMonad m => m ()
-fuelDec1 = fuelGet >>= fuelSet . oneLessFuel
-
-tryWithFuel :: FuelUsingMonad m => a -> m (Maybe a)
-tryWithFuel r = do f <- fuelGet
- if anyFuelLeft f then fuelSet (oneLessFuel f) >> return (Just r)
- else return Nothing
-
instance FuelUsingMonad FuelUniqSM where
fuelGet = extract fs_fuel
lastFuelPass = extract fs_lastpass
- fuelSet fuel = FUSM (\s -> return ((), s { fs_fuel = fuel }))
- setFuelPass pass = FUSM (\s -> return ((), s { fs_lastpass = pass }))
+ fuelSet fuel = FUSM (\u s -> (# (), u, s { fs_fuel = fuel } #))
+ setFuelPass pass = FUSM (\u s -> (# (), u, s { fs_lastpass = pass } #))
extract :: (FuelState -> a) -> FuelUniqSM a
-extract f = FUSM (\s -> return (f s, s))
+extract f = FUSM (\u s -> (# f s, u, s #))
instance FuelMonad FuelUniqSM where
getFuel = liftM amountOfFuel fuelGet
@@ -137,6 +130,6 @@ instance FuelMonad FuelUniqSM where
-- Don't bother to checkpoint the unique supply; it doesn't matter
instance CheckpointMonad FuelUniqSM where
type Checkpoint FuelUniqSM = FuelState
- checkpoint = FUSM $ \fuel -> return (fuel, fuel)
- restart fuel = FUSM $ \_ -> return ((), fuel)
+ checkpoint = FUSM $ \u fuel -> (# fuel, u, fuel #)
+ restart fuel = FUSM $ \u _ -> (# (), u, fuel #)
diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs
index 183708c08e..dee6ee881e 100644
--- a/compiler/cmm/PprCmm.hs
+++ b/compiler/cmm/PprCmm.hs
@@ -146,8 +146,6 @@ pprConvention Slow = text "<slow-convention>"
pprConvention GC = text "<gc-convention>"
pprConvention PrimOpCall = text "<primop-call-convention>"
pprConvention PrimOpReturn = text "<primop-ret-convention>"
-pprConvention (Foreign c) = ppr c
-pprConvention (Private {}) = text "<private-convention>"
pprForeignConvention :: ForeignConvention -> SDoc
pprForeignConvention (ForeignConvention c as rs) = ppr c <> ppr as <> ppr rs
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
index 7503127555..119f2b7239 100644
--- a/compiler/cmm/PprCmmExpr.hs
+++ b/compiler/cmm/PprCmmExpr.hs
@@ -237,12 +237,8 @@ pprLocalReg (LocalReg uniq rep)
-- Stack areas
pprArea :: Area -> SDoc
-pprArea (RegSlot r) = hcat [ text "slot<", ppr r, text ">" ]
-pprArea (CallArea id) = pprAreaId id
-
-pprAreaId :: AreaId -> SDoc
-pprAreaId Old = text "old"
-pprAreaId (Young id) = hcat [ text "young<", ppr id, text ">" ]
+pprArea Old = text "old"
+pprArea (Young id) = hcat [ text "young<", ppr id, text ">" ]
-- needs to be kept in syn with CmmExpr.hs.GlobalReg
--
diff --git a/compiler/cmm/SMRep.lhs b/compiler/cmm/SMRep.lhs
index ce30799bf6..8b3308ef97 100644
--- a/compiler/cmm/SMRep.lhs
+++ b/compiler/cmm/SMRep.lhs
@@ -21,6 +21,7 @@ module SMRep (
StgWord, StgHalfWord,
hALF_WORD_SIZE, hALF_WORD_SIZE_IN_BITS,
WordOff, ByteOff,
+ roundUpToWords,
-- * Closure repesentation
SMRep(..), -- CmmInfo sees the rep; no one else does
@@ -57,6 +58,7 @@ import FastString
import Data.Char( ord )
import Data.Word
+import Data.Bits
\end{code}
@@ -69,6 +71,9 @@ import Data.Word
\begin{code}
type WordOff = Int -- Word offset, or word count
type ByteOff = Int -- Byte offset, or byte count
+
+roundUpToWords :: ByteOff -> ByteOff
+roundUpToWords n = (n + (wORD_SIZE - 1)) .&. (complement (wORD_SIZE - 1))
\end{code}
StgWord is a type representing an StgWord on the target platform.
@@ -93,6 +98,7 @@ hALF_WORD_SIZE_IN_BITS = 32
#endif
\end{code}
+
%************************************************************************
%* *
\subsubsection[SMRep-datatype]{@SMRep@---storage manager representation}
diff --git a/compiler/cmm/cmm-notes b/compiler/cmm/cmm-notes
index 81882c8c0e..0e6a2341f2 100644
--- a/compiler/cmm/cmm-notes
+++ b/compiler/cmm/cmm-notes
@@ -24,27 +24,10 @@ More notes (June 11)
* Check in ClosureInfo:
-- NB: Results here should line up with the results of SMRep.rtsClosureType
-* Possible refactoring: Nuke AGraph in favour of
- mkIfThenElse :: Expr -> Graph -> Graph -> FCode Graph
- or even
- mkIfThenElse :: HasUniques m => Expr -> Graph -> Graph -> m Graph
- (Remmber that the .cmm file parser must use this function)
-
- or parameterise FCode over its envt; the CgState part seem useful for both
-
* "Remove redundant reloads" in CmmSpillReload should be redundant; since
insertLateReloads is now gone, every reload is reloading a live variable.
Test and nuke.
-* Stack layout is very like register assignment: find non-conflicting assigments.
- In particular we can use colouring or linear scan (etc).
-
- We'd fine-grain interference (on a word by word basis) to get maximum overlap.
- But that may make very big interference graphs. So linear scan might be
- more attactive.
-
- NB: linear scan does on-the-fly live range splitting.
-
* When stubbing dead slots be careful not to write into an area that
overlaps with an area that's in use. So stubbing needs to *follow*
stack layout.
@@ -81,10 +64,6 @@ Things to do:
Old.Cmm. We should abstract it to work on both representations, it needs only to
convert a CmmInfoTable to [CmmStatic].
- - The MkGraph currenty uses a different semantics for <*> than Hoopl. Maybe
- we could convert codeGen/StgCmm* clients to the Hoopl's semantics?
- It's all deeply unsatisfactory.
-
- Improve performance of Hoopl.
A nofib comparison of -fasm vs -fnewcodegen nofib compilation parameters
@@ -101,18 +80,12 @@ Things to do:
When compiling nofib, ghc-head + libraries compiled with -fnew-codegen
is 31.4% slower (http://fox.auryn.cz/msrc/0517_hoopl/32bit.oldghcoldgen.zipghcoldgen.txt).
- So we generate a bit better code, but it takes us longer!
-
EZY: Also importantly, Hoopl uses dramatically more memory than the
old code generator.
- Are all blockToNodeList and blockOfNodeList really needed? Maybe we could
splice blocks instead?
- In the CmmContFlowOpt.blockConcat, using Dataflow seems too clumsy. Still,
- a block catenation function would be probably nicer than blockToNodeList
- / blockOfNodeList combo.
-
- lowerSafeForeignCall seems too lowlevel. Just use Dataflow. After that
delete splitEntrySeq from HooplUtils.
@@ -129,10 +102,6 @@ Things to do:
- NB that CmmProcPoint line 283 has a hack that works around a GADT-related
bug in 6.10.
- - SDM (2010-02-26) can we remove the Foreign constructor from Convention?
- Reason: we never generate code for a function with the Foreign
- calling convention, and the code for calling foreign calls is generated
-
- AsmCodeGen has a generic Cmm optimiser; move this into new pipeline
EZY (2011-04-16): The mini-inliner has been generalized and ported,
but the constant folding and other optimizations need to still be
@@ -161,16 +130,6 @@ Things to do:
- Top-level SRT threading is a bit ugly
- - Add type/newtype for CmmModule = [CmmGroup] -- A module
- CmmGroup = [CmmTop] -- A .o file
- CmmTop = Proc | Data -- A procedure or data
-
- - This is a *change*: currently a CmmGroup is one function's-worth of code
- regardless of SplitObjs. Question: can we *always* generate M.o if there
- is just one element in the list (rather than M/M1.o, M/M2.o etc)
-
- One SRT per group.
-
- See "CAFs" below; we want to totally refactor the way SRTs are calculated
- Pull out Areas into its own module