diff options
Diffstat (limited to 'compiler/cmm')
30 files changed, 3377 insertions, 1777 deletions
diff --git a/compiler/cmm/BlockId.hs b/compiler/cmm/BlockId.hs index feeacb553d..d5a8e045bf 100644 --- a/compiler/cmm/BlockId.hs +++ b/compiler/cmm/BlockId.hs @@ -31,15 +31,9 @@ compilation unit in which it appears. type BlockId = Hoopl.Label -instance Uniquable BlockId where - getUnique label = getUnique (uniqueToInt $ lblToUnique label) - mkBlockId :: Unique -> BlockId mkBlockId unique = uniqueToLbl $ intToUnique $ getKey unique -instance Outputable BlockId where - ppr label = ppr (getUnique label) - retPtLbl :: BlockId -> CLabel retPtLbl label = mkReturnPtLabel $ getUnique label diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index f1318c1dc9..d70fd8c835 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -69,8 +69,6 @@ type CmmGroup = GenCmmGroup CmmStatics CmmTopInfo CmmGraph -- (a) C--, i.e. populated with various C-- constructs -- (Cmm and RawCmm in OldCmm.hs) -- (b) Native code, populated with data/instructions --- --- A second family of instances based on Hoopl is in Cmm.hs. -- | A top-level chunk, abstracted over the type of the contents of -- the basic blocks (Cmm or instructions are the likely instantiations). @@ -103,11 +101,15 @@ type CmmBwdRewrite f = BwdRewrite FuelUniqSM CmmNode f -- Info Tables ----------------------------------------------------------------------------- -data CmmTopInfo = TopInfo {info_tbl :: CmmInfoTable, stack_info :: CmmStackInfo} +data CmmTopInfo = TopInfo { info_tbl :: CmmInfoTable + , stack_info :: CmmStackInfo } data CmmStackInfo = StackInfo { - arg_space :: ByteOff, -- XXX: comment? + arg_space :: ByteOff, + -- number of bytes of arguments on the stack on entry to the + -- the proc. This is filled in by StgCmm.codeGen, and used + -- by the stack allocator later. updfr_space :: Maybe ByteOff -- XXX: comment? } diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs index 011947f55d..2378988b68 100644 --- a/compiler/cmm/CmmBuildInfoTables.hs +++ b/compiler/cmm/CmmBuildInfoTables.hs @@ -14,12 +14,10 @@ {-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} module CmmBuildInfoTables ( CAFSet, CAFEnv, cafAnal, localCAFInfo, mkTopCAFInfo - , setInfoTableSRT, setInfoTableStackMap + , setInfoTableSRT , TopSRT, emptySRT, srtToData , bundleCAFs - , lowerSafeForeignCalls - , cafTransfers, liveSlotTransfers - , mkLiveness ) + , cafTransfers ) where #include "HsVersions.h" @@ -39,7 +37,6 @@ import Bitmap import CLabel import Cmm import CmmUtils -import CmmStackLayout import Module import FastString import ForeignCall @@ -54,129 +51,25 @@ import Outputable import SMRep import UniqSupply -import Compiler.Hoopl +import Hoopl import Data.Map (Map) import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set import qualified FiniteMap as Map +#if __GLASGOW_HASKELL__ < 704 +foldSet = Set.fold +#else +foldSet = Set.foldr +#endif + ---------------------------------------------------------------- -- Building InfoTables ----------------------------------------------------------------------- --- Stack Maps - --- Given a block ID, we return a representation of the layout of the stack, --- as suspended before entering that block. --- (For a return site to a function call, the layout does not include the --- parameter passing area (or the "return address" on the stack)). --- If the element is `Nothing`, then it represents a word of the stack that --- does not contain a live pointer. --- If the element is `Just` a register, then it represents a live spill slot --- for a pointer; we assume that a pointer is the size of a word. --- The head of the list represents the young end of the stack where the infotable --- pointer for the block `Bid` is stored. --- The infotable pointer itself is not included in the list. --- Call areas are also excluded from the list: besides the stuff in the update --- frame (and the return infotable), call areas should never be live across --- function calls. - --- RTS Invariant: All pointers must be word-aligned because each bit in the bitmap --- represents a word. Consequently, we have to be careful when we see a live slot --- on the stack: if we have packed multiple sub-word values into a word, --- we have to make sure that we only mark the entire word as a non-pointer. - --- Also, don't forget to stop at the old end of the stack (oldByte), --- which may differ depending on whether there is an update frame. - -type RegSlotInfo - = ( Int -- Offset from oldest byte of Old area - , LocalReg -- The register - , Int) -- Width of the register - -live_ptrs :: ByteOff -> BlockEnv SubAreaSet -> AreaMap -> BlockId -> StackLayout -live_ptrs oldByte slotEnv areaMap bid = - -- pprTrace "live_ptrs for" (ppr bid <+> text (show oldByte ++ "-" ++ show youngByte) <+> - -- ppr liveSlots) $ - -- pprTrace ("stack layout for " ++ show bid ++ ": ") (ppr res) $ res - res - where - res = mkLiveness (reverse $ slotsToList youngByte liveSlots []) - - slotsToList :: Int -> [RegSlotInfo] -> [Maybe LocalReg] -> [Maybe LocalReg] - -- n starts at youngByte and is decremented down to oldByte - -- Returns a list, one element per word, with - -- (Just r) meaning 'pointer register r is saved here', - -- Nothing meaning 'non-pointer or empty' - - slotsToList n [] results | n == oldByte = results -- at old end of stack frame - - slotsToList n (s : _) _ | n == oldByte = - pprPanic "slot left off live_ptrs" (ppr s <+> ppr oldByte <+> - ppr n <+> ppr liveSlots <+> ppr youngByte) - - slotsToList n _ _ | n < oldByte = - panic "stack slots not allocated on word boundaries?" - - slotsToList n l@((n', r, w) : rst) results = - if n == (n' + w) then -- slot's young byte is at n - ASSERT (not (isPtr r) || - (n `mod` wORD_SIZE == 0 && w == wORD_SIZE)) -- ptrs must be aligned - slotsToList next (dropWhile (non_ptr_younger_than next) rst) - (stack_rep : results) - else slotsToList next (dropWhile (non_ptr_younger_than next) l) - (Nothing : results) - where next = n - wORD_SIZE - stack_rep = if isPtr r then Just r else Nothing - - slotsToList n [] results = slotsToList (n - wORD_SIZE) [] (Nothing : results) - - non_ptr_younger_than next (n', r, w) = - n' + w > next && - ASSERT (not (isPtr r)) - True - isPtr = isGcPtrType . localRegType - - liveSlots :: [RegSlotInfo] - liveSlots = sortBy (\ (off,_,_) (off',_,_) -> compare off' off) - (Map.foldRightWithKey (\_ -> flip $ foldl add_slot) [] slots) - - add_slot :: [RegSlotInfo] -> SubArea -> [RegSlotInfo] - add_slot rst (a@(RegSlot r@(LocalReg _ ty)), off, w) = - if off == w && widthInBytes (typeWidth ty) == w then - (expectJust "add_slot" (Map.lookup a areaMap), r, w) : rst - else panic "live_ptrs: only part of a variable live at a proc point" - add_slot rst (CallArea Old, _, _) = - rst -- the update frame (or return infotable) should be live - -- would be nice to check that only that part of the callarea is live... - add_slot rst ((CallArea _), _, _) = - rst - -- JD: THIS ISN'T CURRENTLY A CORRECTNESS PROBLEM, BUT WE SHOULD REALLY - -- MAKE LIVENESS INFO AROUND CALLS MORE PRECISE -- FOR NOW, A 32-BIT - -- FLOAT PADS OUT TO 64 BITS, BUT WE ASSUME THE WHOLE PARAMETER-PASSING - -- AREA IS LIVE (WHICH IT ISN'T...). WE SHOULD JUST PUT THE LIVE AREAS - -- IN THE CALL NODES, WHICH SHOULD EVENTUALLY HAVE LIVE REGISTER AS WELL, - -- SO IT'S ALL GOING IN THE SAME DIRECTION. - -- pprPanic "CallAreas must not be live across function calls" (ppr bid <+> ppr c) - - slots :: SubAreaSet -- The SubAreaSet for 'bid' - slots = expectJust "live_ptrs slots" $ mapLookup bid slotEnv - youngByte = expectJust "live_ptrs bid_pos" $ Map.lookup (CallArea (Young bid)) areaMap - --- Construct the stack maps for a procedure _if_ it needs an infotable. --- When wouldn't a procedure need an infotable? If it is a procpoint that --- is not the successor of a call. -setInfoTableStackMap :: SlotEnv -> AreaMap -> CmmDecl -> CmmDecl -setInfoTableStackMap slotEnv areaMap - t@(CmmProc (TopInfo {stack_info=StackInfo {updfr_space = Just updfr_off}}) _ - (CmmGraph {g_entry = eid})) - = updInfo (const (live_ptrs updfr_off slotEnv areaMap eid)) id t -setInfoTableStackMap _ _ t = t - - - ------------------------------------------------------------------------ -- SRTs -- WE NEED AN EXAMPLE HERE. @@ -191,14 +84,14 @@ setInfoTableStackMap _ _ t = t ----------------------------------------------------------------------- -- Finding the CAFs used by a procedure -type CAFSet = Map CLabel () +type CAFSet = Set CLabel type CAFEnv = BlockEnv CAFSet -- First, an analysis to find live CAFs. cafLattice :: DataflowLattice CAFSet -cafLattice = DataflowLattice "live cafs" Map.empty add - where add _ (OldFact old) (NewFact new) = case old `Map.union` new of - new' -> (changeIf $ Map.size new' > Map.size old, new') +cafLattice = DataflowLattice "live cafs" Set.empty add + where add _ (OldFact old) (NewFact new) = case old `Set.union` new of + new' -> (changeIf $ Set.size new' > Set.size old, new') cafTransfers :: BwdTransfer CmmNode CAFSet cafTransfers = mkBTransfer3 first middle last @@ -210,11 +103,11 @@ cafTransfers = mkBTransfer3 first middle last CmmLit (CmmLabelOff c _) -> add c set CmmLit (CmmLabelDiffOff c1 c2 _) -> add c1 $ add c2 set _ -> set - add l s = if hasCAF l then Map.insert (toClosureLbl l) () s + add l s = if hasCAF l then Set.insert (toClosureLbl l) s else s -cafAnal :: CmmGraph -> FuelUniqSM CAFEnv -cafAnal g = liftM snd $ dataflowPassBwd g [] $ analBwd cafLattice cafTransfers +cafAnal :: Platform -> CmmGraph -> CAFEnv +cafAnal g = dataflowAnalBwd g [] $ analBwd cafLattice cafTransfers ----------------------------------------------------------------------- -- Building the SRTs @@ -266,13 +159,13 @@ srtToData srt = [CmmData RelocatableReadOnlyData (Statics (lbl srt) tbl)] buildSRTs :: TopSRT -> Map CLabel CAFSet -> CAFSet -> FuelUniqSM (TopSRT, Maybe CmmDecl, C_SRT) buildSRTs topSRT topCAFMap cafs = - do let liftCAF lbl () z = -- get CAFs for functions without static closures - case Map.lookup lbl topCAFMap of Just cafs -> z `Map.union` cafs - Nothing -> Map.insert lbl () z + do let liftCAF lbl z = -- get CAFs for functions without static closures + case Map.lookup lbl topCAFMap of Just cafs -> z `Set.union` cafs + Nothing -> Set.insert lbl z -- For each label referring to a function f without a static closure, -- replace it with the CAFs that are reachable from f. sub_srt topSRT localCafs = - let cafs = Map.keys (Map.foldRightWithKey liftCAF Map.empty localCafs) + let cafs = Set.elems (foldSet liftCAF Set.empty localCafs) mkSRT topSRT = do localSRTs <- procpointSRT (lbl topSRT) (elt_map topSRT) cafs return (topSRT, localSRTs) @@ -373,21 +266,21 @@ mkTopCAFInfo localCAFs = foldl addToTop Map.empty g Map.insert l (flatten env cafset) env addToTop env (CyclicSCC nodes) = let (lbls, cafsets) = unzip nodes - cafset = lbls `Map.deleteList` foldl Map.union Map.empty cafsets + cafset = foldr Set.delete (foldl Set.union Set.empty cafsets) lbls in foldl (\env l -> Map.insert l (flatten env cafset) env) env lbls - flatten env cafset = Map.foldRightWithKey (lookup env) Map.empty cafset - lookup env caf () cafset' = - case Map.lookup caf env of Just cafs -> Map.foldRightWithKey add cafset' cafs - Nothing -> add caf () cafset' - add caf () cafset' = Map.insert caf () cafset' + flatten env cafset = foldSet (lookup env) Set.empty cafset + lookup env caf cafset' = + case Map.lookup caf env of Just cafs -> foldSet add cafset' cafs + Nothing -> add caf cafset' + add caf cafset' = Set.insert caf cafset' g = stronglyConnCompFromEdgedVertices - (map (\n@(l, cafs) -> (n, l, Map.keys cafs)) localCAFs) + (map (\n@(l, cafs) -> (n, l, Set.elems cafs)) localCAFs) -- Bundle the CAFs used at a procpoint. bundleCAFs :: CAFEnv -> CmmDecl -> (CAFSet, CmmDecl) bundleCAFs cafEnv t@(CmmProc _ _ (CmmGraph {g_entry=entry})) = (expectJust "bundleCAFs" (mapLookup entry cafEnv), t) -bundleCAFs _ t = (Map.empty, t) +bundleCAFs _ t = (Set.empty, t) -- Construct the SRTs for the given procedure. setInfoTableSRT :: Map CLabel CAFSet -> TopSRT -> (CAFSet, CmmDecl) -> @@ -418,91 +311,3 @@ updInfoTbl toVars toSrt info_tbl@(CmmInfoTable {}) StackRep ls -> StackRep (toVars ls) other -> other } updInfoTbl _ _ t@CmmNonInfoTable = t - ----------------------------------------------------------------- --- Safe foreign calls: We need to insert the code that suspends and resumes --- the thread before and after a safe foreign call. --- Why do we do this so late in the pipeline? --- Because we need this code to appear without interrruption: you can't rely on the --- value of the stack pointer between the call and resetting the thread state; --- you need to have an infotable on the young end of the stack both when --- suspending the thread and making the foreign call. --- All of this is much easier if we insert the suspend and resume calls here. - --- At the same time, we prepare for the stages of the compiler that --- build the proc points. We have to do this at the same time because --- the safe foreign calls need special treatment with respect to infotables. --- A safe foreign call needs an infotable even though it isn't --- a procpoint. The following datatype captures the information --- needed to generate the infotables along with the Cmm data and procedures. - --- JD: Why not do this while splitting procedures? -lowerSafeForeignCalls :: AreaMap -> CmmDecl -> FuelUniqSM CmmDecl -lowerSafeForeignCalls _ t@(CmmData _ _) = return t -lowerSafeForeignCalls areaMap (CmmProc info l g@(CmmGraph {g_entry=entry})) = do - let block b mblocks = mblocks >>= lowerSafeCallBlock entry areaMap b - blocks <- foldGraphBlocks block (return mapEmpty) g - return $ CmmProc info l (ofBlockMap entry blocks) - --- If the block ends with a safe call in the block, lower it to an unsafe --- call (with appropriate saves and restores before and after). -lowerSafeCallBlock :: BlockId -> AreaMap -> CmmBlock -> BlockEnv CmmBlock - -> FuelUniqSM (BlockEnv CmmBlock) -lowerSafeCallBlock entry areaMap b blocks = - case blockToNodeList b of - (JustC (CmmEntry id), m, JustC l@(CmmForeignCall {})) -> lowerSafeForeignCall entry areaMap blocks id m l - _ -> return $ insertBlock b blocks - --- Late in the code generator, we want to insert the code necessary --- to lower a safe foreign call to a sequence of unsafe calls. -lowerSafeForeignCall :: BlockId -> AreaMap -> BlockEnv CmmBlock -> BlockId -> [CmmNode O O] -> CmmNode O C - -> FuelUniqSM (BlockEnv CmmBlock) -lowerSafeForeignCall entry areaMap blocks bid m - (CmmForeignCall {tgt=tgt, res=rs, args=as, succ=succ, updfr = updfr_off, intrbl = intrbl}) = - do let newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep) - -- Both 'id' and 'new_base' are KindNonPtr because they're - -- RTS-only objects and are not subject to garbage collection - id <- newTemp bWord - new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) - let (caller_save, caller_load) = callerSaveVolatileRegs - load_tso <- newTemp gcWord -- TODO FIXME NOW - load_stack <- newTemp gcWord -- TODO FIXME NOW - let (<**>) = (M.<*>) - let suspendThread = foreignLbl "suspendThread" - resumeThread = foreignLbl "resumeThread" - foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit name))) - suspend = saveThreadState <**> - caller_save <**> - mkUnsafeCall (ForeignTarget suspendThread - (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint])) - [id] [CmmReg (CmmGlobal BaseReg), CmmLit (CmmInt (fromIntegral (fromEnum intrbl)) wordWidth)] - midCall = mkUnsafeCall tgt rs as - resume = mkUnsafeCall (ForeignTarget resumeThread - (ForeignConvention CCallConv [AddrHint] [AddrHint])) - [new_base] [CmmReg (CmmLocal id)] <**> - -- Assign the result to BaseReg: we - -- might now have a different Capability! - mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <**> - caller_load <**> - loadThreadState load_tso load_stack - -- We have to save the return value on the stack because its next use - -- may appear in a different procedure due to procpoint splitting... - saveRetVals = foldl (<**>) emptyAGraph $ map (M.mkMiddle . spill) rs - spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r) - regSlot r@(LocalReg _ _) = CmmRegOff (CmmGlobal Sp) (sp_off - offset) - where offset = w + expectJust "lowerForeign" (Map.lookup (RegSlot r) areaMap) - sp_off = wORD_SIZE + expectJust "lowerForeign" (Map.lookup (CallArea area) areaMap) - area = if succ == entry then Old else Young succ - w = widthInBytes $ typeWidth $ localRegType r - -- Note: The successor must be a procpoint, and we have already split, - -- so we use a jump, not a branch. - succLbl = CmmLit (CmmLabel (infoTblLbl succ)) - jump = CmmCall { cml_target = succLbl, cml_cont = Nothing - , cml_args = widthInBytes wordWidth ,cml_ret_args = 0 - , cml_ret_off = updfr_off} - graph' <- liftUniq $ labelAGraph bid $ catAGraphs (map M.mkMiddle m) <**> - suspend <**> midCall <**> - resume <**> saveRetVals <**> M.mkLast jump - return $ blocks `mapUnion` toBlockMap graph' -lowerSafeForeignCall _ _ _ _ _ _ = panic "lowerSafeForeignCall was passed something else" - diff --git a/compiler/cmm/CmmCommonBlockElim.hs b/compiler/cmm/CmmCommonBlockElim.hs index abbfd01156..4df7304acf 100644 --- a/compiler/cmm/CmmCommonBlockElim.hs +++ b/compiler/cmm/CmmCommonBlockElim.hs @@ -13,22 +13,23 @@ where import BlockId import Cmm import CmmUtils +import CmmContFlowOpt import Prelude hiding (iterate, succ, unzip, zip) -import Compiler.Hoopl +import Hoopl hiding (ChangeFlag) import Data.Bits import qualified Data.List as List import Data.Word import FastString -import Control.Monad import Outputable import UniqFM -import Unique my_trace :: String -> SDoc -> a -> a my_trace = if False then pprTrace else \_ _ a -> a --- Eliminate common blocks: +-- ----------------------------------------------------------------------------- +-- Eliminate common blocks + -- If two blocks are identical except for the label on the first node, -- then we can eliminate one of the blocks. To ensure that the semantics -- of the program are preserved, we have to rewrite each predecessor of the @@ -42,59 +43,50 @@ my_trace = if False then pprTrace else \_ _ a -> a -- TODO: Use optimization fuel elimCommonBlocks :: CmmGraph -> CmmGraph -elimCommonBlocks g = - upd_graph g . snd $ iterate common_block reset hashed_blocks - (emptyUFM, mapEmpty) - where hashed_blocks = map (\b -> (hash_block b, b)) (reverse (postorderDfs g)) - reset (_, subst) = (emptyUFM, subst) +elimCommonBlocks g = replaceLabels env g + where + env = iterate hashed_blocks mapEmpty + hashed_blocks = map (\b -> (hash_block b, b)) $ postorderDfs g -- Iterate over the blocks until convergence -iterate :: (t -> a -> (Bool, t)) -> (t -> t) -> [a] -> t -> t -iterate upd reset blocks state = - case foldl upd' (False, state) blocks of - (True, state') -> iterate upd reset blocks (reset state') - (False, state') -> state' - where upd' (b, s) a = let (b', s') = upd s a in (b || b', s') -- lift to track changes +iterate :: [(HashCode,CmmBlock)] -> BlockEnv BlockId -> BlockEnv BlockId +iterate blocks subst = + case foldl common_block (False, emptyUFM, subst) blocks of + (changed, _, subst) + | changed -> iterate blocks subst + | otherwise -> subst + +type State = (ChangeFlag, UniqFM [CmmBlock], BlockEnv BlockId) + +type ChangeFlag = Bool +type HashCode = Int -- Try to find a block that is equal (or ``common'') to b. -type BidMap = BlockEnv BlockId -type State = (UniqFM [CmmBlock], BidMap) -common_block :: (Outputable h, Uniquable h) => State -> (h, CmmBlock) -> (Bool, State) -common_block (bmap, subst) (hash, b) = +common_block :: State -> (HashCode, CmmBlock) -> State +common_block (old_change, bmap, subst) (hash, b) = case lookupUFM bmap hash of Just bs -> case (List.find (eqBlockBodyWith (eqBid subst) b) bs, mapLookup bid subst) of (Just b', Nothing) -> addSubst b' (Just b', Just b'') | entryLabel b' /= b'' -> addSubst b' - _ -> (False, (addToUFM bmap hash (b : bs), subst)) - Nothing -> (False, (addToUFM bmap hash [b], subst)) + | otherwise -> (old_change, bmap, subst) + _ -> (old_change, addToUFM bmap hash (b : bs), subst) + Nothing -> (old_change, addToUFM bmap hash [b], subst) where bid = entryLabel b - addSubst b' = my_trace "found new common block" (ppr (entryLabel b')) $ - (True, (bmap, mapInsert bid (entryLabel b') subst)) - --- Given the map ``subst'' from BlockId -> BlockId, we rewrite the graph. -upd_graph :: CmmGraph -> BidMap -> CmmGraph -upd_graph g subst = mapGraphNodes (id, middle, last) g - where middle = mapExpDeep exp - last l = last' (mapExpDeep exp l) - last' :: CmmNode O C -> CmmNode O C - last' (CmmBranch bid) = CmmBranch $ sub bid - last' (CmmCondBranch p t f) = cond p (sub t) (sub f) - last' (CmmCall t (Just bid) a r o) = CmmCall t (Just $ sub bid) a r o - last' l@(CmmCall _ Nothing _ _ _) = l - last' (CmmForeignCall t r a bid u i) = CmmForeignCall t r a (sub bid) u i - last' (CmmSwitch e bs) = CmmSwitch e $ map (liftM sub) bs - cond p t f = if t == f then CmmBranch t else CmmCondBranch p t f - exp (CmmStackSlot (CallArea (Young id)) off) = - CmmStackSlot (CallArea (Young (sub id))) off - exp (CmmLit (CmmBlock id)) = CmmLit (CmmBlock (sub id)) - exp e = e - sub = lookupBid subst + addSubst b' = my_trace "found new common block" (ppr bid <> char '=' <> ppr (entryLabel b')) $ + (True, bmap, mapInsert bid (entryLabel b') subst) + + +-- ----------------------------------------------------------------------------- +-- Hashing and equality on blocks + +-- Below here is mostly boilerplate: hashing blocks ignoring labels, +-- and comparing blocks modulo a label mapping. -- To speed up comparisons, we hash each basic block modulo labels. -- The hashing is a bit arbitrary (the numbers are completely arbitrary), -- but it should be fast and good enough. -hash_block :: CmmBlock -> Int +hash_block :: CmmBlock -> HashCode hash_block block = fromIntegral (foldBlockNodesB3 (hash_fst, hash_mid, hash_lst) block (0 :: Word32) .&. (0x7fffffff :: Word32)) -- UniqFM doesn't like negative Ints @@ -103,11 +95,11 @@ hash_block block = hash_lst m h = hash_node m + h `shiftL` 1 hash_node :: CmmNode O x -> Word32 - hash_node (CmmComment (FastString u _ _ _ _)) = cvt u + hash_node (CmmComment (FastString u _ _ _ _)) = 0 -- don't care hash_node (CmmAssign r e) = hash_reg r + hash_e e hash_node (CmmStore e e') = hash_e e + hash_e e' hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as - hash_node (CmmBranch _) = 23 -- would be great to hash these properly + hash_node (CmmBranch _) = 23 -- NB. ignore the label hash_node (CmmCondBranch p _ _) = hash_e p hash_node (CmmCall e _ _ _ _) = hash_e e hash_node (CmmForeignCall t _ _ _ _ _) = hash_tgt t @@ -143,18 +135,60 @@ hash_block block = -- Utilities: equality and substitution on the graph. -- Given a map ``subst'' from BlockID -> BlockID, we define equality. -eqBid :: BidMap -> BlockId -> BlockId -> Bool +eqBid :: BlockEnv BlockId -> BlockId -> BlockId -> Bool eqBid subst bid bid' = lookupBid subst bid == lookupBid subst bid' -lookupBid :: BidMap -> BlockId -> BlockId +lookupBid :: BlockEnv BlockId -> BlockId -> BlockId lookupBid subst bid = case mapLookup bid subst of Just bid -> lookupBid subst bid Nothing -> bid --- Equality on the body of a block, modulo a function mapping block IDs to block IDs. +-- Middle nodes and expressions can contain BlockIds, in particular in +-- CmmStackSlot and CmmBlock, so we have to use a special equality for +-- these. +-- +eqMiddleWith :: (BlockId -> BlockId -> Bool) + -> CmmNode O O -> CmmNode O O -> Bool +eqMiddleWith eqBid (CmmComment _) (CmmComment _) = True +eqMiddleWith eqBid (CmmAssign r1 e1) (CmmAssign r2 e2) + = r1 == r2 && eqExprWith eqBid e1 e2 +eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2) + = eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2 +eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1) + (CmmUnsafeForeignCall t2 r2 a2) + = t1 == t2 && r1 == r2 && and (zipWith (eqExprWith eqBid) a1 a2) +eqMiddleWith _ _ _ = False + +eqExprWith :: (BlockId -> BlockId -> Bool) + -> CmmExpr -> CmmExpr -> Bool +eqExprWith eqBid = eq + where + CmmLit l1 `eq` CmmLit l2 = eqLit l1 l2 + CmmLoad e1 _ `eq` CmmLoad e2 _ = e1 `eq` e2 + CmmReg r1 `eq` CmmReg r2 = r1==r2 + CmmRegOff r1 i1 `eq` CmmRegOff r2 i2 = r1==r2 && i1==i2 + CmmMachOp op1 es1 `eq` CmmMachOp op2 es2 = op1==op2 && es1 `eqs` es2 + CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2 + _e1 `eq` _e2 = False + + xs `eqs` ys = and (zipWith eq xs ys) + + eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2 + eqLit l1 l2 = l1 == l2 + + eqArea Old Old = True + eqArea (Young id1) (Young id2) = eqBid id1 id2 + eqArea _ _ = False + +-- Equality on the body of a block, modulo a function mapping block +-- IDs to block IDs. eqBlockBodyWith :: (BlockId -> BlockId -> Bool) -> CmmBlock -> CmmBlock -> Bool -eqBlockBodyWith eqBid block block' = middles == middles' && eqLastWith eqBid last last' - where (_, middles , JustC last :: MaybeC C (CmmNode O C)) = blockToNodeList block - (_, middles', JustC last' :: MaybeC C (CmmNode O C)) = blockToNodeList block' +eqBlockBodyWith eqBid block block' + = and (zipWith (eqMiddleWith eqBid) (blockToList m) (blockToList m')) && + eqLastWith eqBid l l' + where (_,m,l) = blockSplit block + (_,m',l') = blockSplit block' + + eqLastWith :: (BlockId -> BlockId -> Bool) -> CmmNode O C -> CmmNode O C -> Bool eqLastWith eqBid (CmmBranch bid1) (CmmBranch bid2) = eqBid bid1 bid2 diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index 73ce57e93f..36e7b8ec62 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -2,19 +2,19 @@ {-# OPTIONS_GHC -fno-warn-warnings-deprecations -fno-warn-incomplete-patterns #-} module CmmContFlowOpt - ( runCmmContFlowOpts - , removeUnreachableBlocks, replaceBranches + ( cmmCfgOpts + , cmmCfgOptsProc + , removeUnreachableBlocks + , replaceLabels ) where import BlockId import Cmm import CmmUtils -import Digraph import Maybes -import Outputable -import Compiler.Hoopl +import Hoopl import Control.Monad import Prelude hiding (succ, unzip, zip) @@ -24,104 +24,158 @@ import Prelude hiding (succ, unzip, zip) -- ----------------------------------------------------------------------------- -runCmmContFlowOpts :: CmmGroup -> CmmGroup -runCmmContFlowOpts = map (optProc cmmCfgOpts) - cmmCfgOpts :: CmmGraph -> CmmGraph -cmmCfgOpts = removeUnreachableBlocks . blockConcat . branchChainElim - -- Here branchChainElim can ultimately be replaced - -- with a more exciting combination of optimisations +cmmCfgOpts = removeUnreachableBlocks . blockConcat + +cmmCfgOptsProc :: CmmDecl -> CmmDecl +cmmCfgOptsProc = optProc cmmCfgOpts optProc :: (g -> g) -> GenCmmDecl d h g -> GenCmmDecl d h g optProc opt (CmmProc info lbl g) = CmmProc info lbl (opt g) optProc _ top = top + ----------------------------------------------------------------------------- -- --- Branch Chain Elimination +-- Block concatenation -- ----------------------------------------------------------------------------- --- | Remove any basic block of the form L: goto L', and replace L with --- L' everywhere else, unless L is the successor of a call instruction --- and L' is the entry block. You don't want to set the successor of a --- function call to the entry block because there is no good way to --- store both the infotables for the call and from the callee, while --- putting the stack pointer in a consistent place. +-- This optimisation does two things: +-- - If a block finishes with an unconditional branch, then we may +-- be able to concatenate the block it points to and remove the +-- branch. We do this either if the destination block is small +-- (e.g. just another branch), or if this is the only jump to +-- this particular destination block. +-- +-- - If a block finishes in a call whose continuation block is a +-- goto, then we can shortcut the destination, making the +-- continuation block the destination of the goto. +-- +-- Both transformations are improved by working from the end of the +-- graph towards the beginning, because we may be able to perform many +-- shortcuts in one go. + + +-- We need to walk over the blocks from the end back to the +-- beginning. We are going to maintain the "current" graph +-- (BlockEnv CmmBlock) as we go, and also a mapping from BlockId +-- to BlockId, representing continuation labels that we have +-- renamed. This latter mapping is important because we might +-- shortcut a CmmCall continuation. For example: +-- +-- Sp[0] = L +-- call g returns to L +-- +-- L: goto M -- --- JD isn't quite sure when it's safe to share continuations for different --- function calls -- have to think about where the SP will be, --- so we'll table that problem for now by leaving all call successors alone. - -branchChainElim :: CmmGraph -> CmmGraph -branchChainElim g - | null lone_branch_blocks = g -- No blocks to remove - | otherwise = {- pprTrace "branchChainElim" (ppr forest) $ -} - replaceLabels (mapFromList edges) g +-- M: ... +-- +-- So when we shortcut the L block, we need to replace not only +-- the continuation of the call, but also references to L in the +-- code (e.g. the assignment Sp[0] = L). So we keep track of +-- which labels we have renamed and apply the mapping at the end +-- with replaceLabels. + +blockConcat :: CmmGraph -> CmmGraph +blockConcat g@CmmGraph { g_entry = entry_id } + = replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks where - blocks = toBlockList g - - lone_branch_blocks :: [(BlockId, BlockId)] - -- each (L,K) is a block of the form - -- L : goto K - lone_branch_blocks = mapCatMaybes isLoneBranch blocks - - call_succs = foldl add emptyBlockSet blocks - where add :: BlockSet -> CmmBlock -> BlockSet - add succs b = - case lastNode b of - (CmmCall _ (Just k) _ _ _) -> setInsert k succs - (CmmForeignCall {succ=k}) -> setInsert k succs - _ -> succs - - isLoneBranch :: CmmBlock -> Maybe (BlockId, BlockId) - isLoneBranch block - | (JustC (CmmEntry id), [], JustC (CmmBranch target)) <- blockToNodeList block - , not (setMember id call_succs) - = Just (id,target) - | otherwise - = Nothing - - -- We build a graph from lone_branch_blocks (every node has only - -- one out edge). Then we - -- - topologically sort the graph: if from A we can reach B, - -- then A occurs before B in the result list. - -- - depth-first search starting from the nodes in this list. - -- This gives us a [[node]], in which each list is a dependency - -- chain. - -- - for each list [a1,a2,...an] replace branches to ai with an. - -- - -- This approach nicely deals with cycles by ignoring them. - -- Branches in a cycle will be redirected to somewhere in the - -- cycle, but we don't really care where. A cycle should be dead code, - -- and so will be eliminated by removeUnreachableBlocks. - -- - fromNode (b,_) = b - toNode a = (a,a) - - all_block_ids :: LabelSet - all_block_ids = setFromList (map fst lone_branch_blocks) - `setUnion` - setFromList (map snd lone_branch_blocks) - - forest = dfsTopSortG $ graphFromVerticesAndAdjacency nodes lone_branch_blocks - where nodes = map toNode $ setElems $ all_block_ids - - edges = [ (fromNode y, fromNode x) - | (x:xs) <- map reverse forest, y <- xs ] + -- we might be able to shortcut the entry BlockId itself + new_entry + | Just entry_blk <- mapLookup entry_id new_blocks + , Just dest <- canShortcut entry_blk + = dest + | otherwise + = entry_id ----------------------------------------------------------------- + blocks = postorderDfs g + + (new_blocks, shortcut_map) = + foldr maybe_concat (toBlockMap g, mapEmpty) blocks + + maybe_concat :: CmmBlock + -> (BlockEnv CmmBlock, BlockEnv BlockId) + -> (BlockEnv CmmBlock, BlockEnv BlockId) + maybe_concat block unchanged@(blocks, shortcut_map) + | CmmBranch b' <- last + , Just blk' <- mapLookup b' blocks + , shouldConcatWith b' blk' + = (mapInsert bid (splice head blk') blocks, shortcut_map) + + -- calls: if we can shortcut the continuation label, then + -- we must *also* remember to substitute for the label in the + -- code, because we will push it somewhere. + | Just b' <- callContinuation_maybe last + , Just blk' <- mapLookup b' blocks + , Just dest <- canShortcut blk' + = (blocks, mapInsert b' dest shortcut_map) + -- replaceLabels will substitute dest for b' everywhere, later + + -- non-calls: see if we can shortcut any of the successors. + | Nothing <- callContinuation_maybe last + = ( mapInsert bid (blockJoinTail head shortcut_last) blocks + , shortcut_map ) + + | otherwise + = (blocks, shortcut_map) + where + (head, last) = blockSplitTail block + bid = entryLabel block + shortcut_last = mapSuccessors shortcut last + shortcut l = + case mapLookup l blocks of + Just b | Just dest <- canShortcut b -> dest + _otherwise -> l + + shouldConcatWith b block + | num_preds b == 1 = True -- only one predecessor: go for it + | okToDuplicate block = True -- short enough to duplicate + | otherwise = False + where num_preds bid = mapLookup bid backEdges `orElse` 0 + + canShortcut :: CmmBlock -> Maybe BlockId + canShortcut block + | (_, middle, CmmBranch dest) <- blockSplit block + , isEmptyBlock middle + = Just dest + | otherwise + = Nothing + + backEdges :: BlockEnv Int -- number of predecessors for each block + backEdges = mapInsertWith (+) entry_id 1 $ -- add 1 for the entry id + mapMap setSize $ predMap blocks + + splice :: Block CmmNode C O -> CmmBlock -> CmmBlock + splice head rest = head `blockAppend` snd (blockSplitHead rest) + + +callContinuation_maybe :: CmmNode O C -> Maybe BlockId +callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b +callContinuation_maybe (CmmForeignCall { succ = b }) = Just b +callContinuation_maybe _ = Nothing + +okToDuplicate :: CmmBlock -> Bool +okToDuplicate block + = case blockSplit block of (_, m, _) -> isEmptyBlock m + -- cheap and cheerful; we might expand this in the future to + -- e.g. spot blocks that represent a single instruction or two + +------------------------------------------------------------------------ +-- Map over the CmmGraph, replacing each label with its mapping in the +-- supplied BlockEnv. replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph -replaceLabels env = - replace_eid . mapGraphNodes1 txnode +replaceLabels env g + | mapNull env = g + | otherwise = replace_eid $ mapGraphNodes1 txnode g where replace_eid g = g {g_entry = lookup (g_entry g)} lookup id = mapLookup id env `orElse` id txnode :: CmmNode e x -> CmmNode e x txnode (CmmBranch bid) = CmmBranch (lookup bid) - txnode (CmmCondBranch p t f) = CmmCondBranch (exp p) (lookup t) (lookup f) + txnode (CmmCondBranch p t f) = mkCmmCondBranch (exp p) (lookup t) (lookup f) txnode (CmmSwitch e arms) = CmmSwitch (exp e) (map (liftM lookup) arms) txnode (CmmCall t k a res r) = CmmCall (exp t) (liftM lookup k) a res r txnode fc@CmmForeignCall{} = fc{ args = map exp (args fc) @@ -130,90 +184,25 @@ replaceLabels env = exp :: CmmExpr -> CmmExpr exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid)) - exp (CmmStackSlot (CallArea (Young id)) i) = CmmStackSlot (CallArea (Young (lookup id))) i + exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i exp e = e - -replaceBranches :: BlockEnv BlockId -> CmmGraph -> CmmGraph -replaceBranches env g = mapGraphNodes (id, id, last) g - where - last :: CmmNode O C -> CmmNode O C - last (CmmBranch id) = CmmBranch (lookup id) - last (CmmCondBranch e ti fi) = CmmCondBranch e (lookup ti) (lookup fi) - last (CmmSwitch e tbl) = CmmSwitch e (map (fmap lookup) tbl) - last l@(CmmCall {}) = l - last l@(CmmForeignCall {}) = l - lookup id = fmap lookup (mapLookup id env) `orElse` id - -- XXX: this is a recursive lookup, it follows chains until the lookup - -- returns Nothing, at which point we return the last BlockId +mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C +mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f ---------------------------------------------------------------- -- Build a map from a block to its set of predecessors. Very useful. + predMap :: [CmmBlock] -> BlockEnv BlockSet predMap blocks = foldr add_preds mapEmpty blocks -- find the back edges where add_preds block env = foldl (add (entryLabel block)) env (successors block) add bid env b' = mapInsert b' (setInsert bid (mapLookup b' env `orElse` setEmpty)) env ------------------------------------------------------------------------------ --- --- Block concatenation --- ------------------------------------------------------------------------------ - --- If a block B branches to a label L, L is not the entry block, --- and L has no other predecessors, --- then we can splice the block starting with L onto the end of B. --- Order matters, so we work bottom up (reverse postorder DFS). --- This optimization can be inhibited by unreachable blocks, but --- the reverse postorder DFS returns only reachable blocks. --- --- To ensure correctness, we have to make sure that the BlockId of the block --- we are about to eliminate is not named in another instruction. --- --- Note: This optimization does _not_ subsume branch chain elimination. - -blockConcat :: CmmGraph -> CmmGraph -blockConcat g@(CmmGraph {g_entry=eid}) = - replaceLabels concatMap $ ofBlockMap (g_entry g) blocks' - where - blocks = postorderDfs g - - (blocks', concatMap) = - foldr maybe_concat (toBlockMap g, mapEmpty) $ blocks - - maybe_concat :: CmmBlock -> (LabelMap CmmBlock, LabelMap Label) -> (LabelMap CmmBlock, LabelMap Label) - maybe_concat b unchanged@(blocks', concatMap) = - let bid = entryLabel b - in case blockToNodeList b of - (JustC h, m, JustC (CmmBranch b')) -> - if canConcatWith b' then - (mapInsert bid (splice blocks' h m b') blocks', - mapInsert b' bid concatMap) - else unchanged - _ -> unchanged - - num_preds bid = liftM setSize (mapLookup bid backEdges) `orElse` 0 - - canConcatWith b' = b' /= eid && num_preds b' == 1 - - backEdges = predMap blocks - - splice :: forall map n e x. - IsMap map => - map (Block n e x) -> n C O -> [n O O] -> KeyOf map -> Block n C x - splice blocks' h m bid' = - case mapLookup bid' blocks' of - Nothing -> panic "unknown successor block" - Just block | (_, m', l') <- blockToNodeList block - -> blockOfNodeList (JustC h, (m ++ m'), l') - ----------------------------------------------------------------------------- -- -- Removing unreachable blocks --- ------------------------------------------------------------------------------ removeUnreachableBlocks :: CmmGraph -> CmmGraph removeUnreachableBlocks g diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs index 80c6079aac..e72eee041c 100644 --- a/compiler/cmm/CmmCvt.hs +++ b/compiler/cmm/CmmCvt.hs @@ -12,29 +12,25 @@ import CmmUtils import qualified OldCmm as Old import OldPprCmm () -import Compiler.Hoopl hiding ((<*>), mkLabel, mkBranch) +import Hoopl hiding ((<*>), mkLabel, mkBranch) import Data.Maybe import Maybes import Outputable cmmOfZgraph :: CmmGroup -> Old.CmmGroup cmmOfZgraph tops = map mapTop tops - where mapTop (CmmProc h l g) = CmmProc (Old.CmmInfo Nothing Nothing (info_tbl h)) l (ofZgraph g) + where mapTop (CmmProc h l g) = CmmProc (info_tbl h) l (ofZgraph g) mapTop (CmmData s ds) = CmmData s ds data ValueDirection = Arguments | Results -add_hints :: Convention -> ValueDirection -> [a] -> [Old.CmmHinted a] +add_hints :: ForeignTarget -> ValueDirection -> [a] -> [Old.CmmHinted a] add_hints conv vd args = zipWith Old.CmmHinted args (get_hints conv vd) -get_hints :: Convention -> ValueDirection -> [ForeignHint] -get_hints (Foreign (ForeignConvention _ hints _)) Arguments = hints -get_hints (Foreign (ForeignConvention _ _ hints)) Results = hints -get_hints _other_conv _vd = repeat NoHint - -get_conv :: ForeignTarget -> Convention -get_conv (PrimTarget _) = NativeNodeCall -- JD: SUSPICIOUS -get_conv (ForeignTarget _ fc) = Foreign fc +get_hints :: ForeignTarget -> ValueDirection -> [ForeignHint] +get_hints (ForeignTarget _ (ForeignConvention _ hints _)) Arguments = hints +get_hints (ForeignTarget _ (ForeignConvention _ _ hints)) Results = hints +get_hints (PrimTarget _) _vd = repeat NoHint cmm_target :: ForeignTarget -> Old.CmmCallTarget cmm_target (PrimTarget op) = Old.CmmPrim op Nothing @@ -89,8 +85,8 @@ ofZgraph g = Old.ListGraph $ mapMaybe convert_block $ postorderDfs g CmmUnsafeForeignCall (PrimTarget MO_Touch) _ _ -> Old.CmmNop CmmUnsafeForeignCall target ress args -> Old.CmmCall (cmm_target target) - (add_hints (get_conv target) Results ress) - (add_hints (get_conv target) Arguments args) + (add_hints target Results ress) + (add_hints target Arguments args) Old.CmmMayReturn last :: CmmNode O C -> () -> [Old.CmmStmt] diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 6eb91e89ba..939d4b7ca9 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -14,11 +14,11 @@ module CmmExpr , GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node, baseReg , VGcPtr(..), vgcFlag -- Temporary! , DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed - , DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed , RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet - , plusRegSet, minusRegSet, timesRegSet - , regUsedIn, regSlot - , Area(..), AreaId(..), SubArea, SubAreaSet, AreaMap, isStackSlotOf + , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet + , regSetToList + , regUsedIn + , Area(..) , module CmmMachOp , module CmmType ) @@ -31,9 +31,10 @@ import CmmMachOp import BlockId import CLabel import Unique -import UniqSet import Data.Map (Map) +import Data.Set (Set) +import qualified Data.Set as Set ----------------------------------------------------------------------------- -- CmmExpr @@ -42,11 +43,12 @@ import Data.Map (Map) data CmmExpr = CmmLit CmmLit -- Literal - | CmmLoad CmmExpr CmmType -- Read memory location - | CmmReg CmmReg -- Contents of register + | CmmLoad !CmmExpr !CmmType -- Read memory location + | CmmReg !CmmReg -- Contents of register | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.) - | CmmStackSlot Area Int -- addressing expression of a stack slot - | CmmRegOff CmmReg Int + | CmmStackSlot Area {-# UNPACK #-} !Int + -- addressing expression of a stack slot + | CmmRegOff !CmmReg Int -- CmmRegOff reg i -- ** is shorthand only, meaning ** -- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)] @@ -62,20 +64,16 @@ instance Eq CmmExpr where -- Equality ignores the types _e1 == _e2 = False data CmmReg - = CmmLocal LocalReg + = CmmLocal {-# UNPACK #-} !LocalReg | CmmGlobal GlobalReg deriving( Eq, Ord ) -- | A stack area is either the stack slot where a variable is spilled -- or the stack space where function arguments and results are passed. data Area - = RegSlot LocalReg - | CallArea AreaId - deriving (Eq, Ord) - -data AreaId = Old -- See Note [Old Area] - | Young BlockId + | Young {-# UNPACK #-} !BlockId -- Invariant: must be a continuation BlockId + -- See Note [Continuation BlockId] in CmmNode. deriving (Eq, Ord) {- Note [Old Area] @@ -94,15 +92,8 @@ necessarily at the young end of the Old area. End of note -} -type SubArea = (Area, Int, Int) -- area, offset, width -type SubAreaSet = Map Area [SubArea] - -type AreaMap = Map Area Int - -- Byte offset of the oldest byte of the Area, - -- relative to the oldest byte of the Old Area - data CmmLit - = CmmInt Integer Width + = CmmInt !Integer Width -- Interpretation: the 2's complement representation of the value -- is truncated to the specified size. This is easier than trying -- to keep the value within range, because we don't know whether @@ -120,7 +111,11 @@ data CmmLit -- It is also used inside the NCG during when generating -- position-independent code. | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset - | CmmBlock BlockId -- Code label + + | CmmBlock {-# UNPACK #-} !BlockId -- Code label + -- Invariant: must be a continuation BlockId + -- See Note [Continuation BlockId] in CmmNode. + | CmmHighStackMark -- stands for the max stack space used during a procedure deriving Eq @@ -163,7 +158,7 @@ maybeInvertCmmExpr _ = Nothing ----------------------------------------------------------------------------- data LocalReg - = LocalReg !Unique CmmType + = LocalReg {-# UNPACK #-} !Unique CmmType -- ^ Parameters: -- 1. Identifier -- 2. Type @@ -189,22 +184,35 @@ localRegType (LocalReg _ rep) = rep ----------------------------------------------------------------------------- -- | Sets of local registers -type RegSet = UniqSet LocalReg + +-- These are used for dataflow facts, and a common operation is taking +-- the union of two RegSets and then asking whether the union is the +-- same as one of the inputs. UniqSet isn't good here, because +-- sizeUniqSet is O(n) whereas Set.size is O(1), so we use ordinary +-- Sets. + +type RegSet = Set LocalReg emptyRegSet :: RegSet +nullRegSet :: RegSet -> Bool elemRegSet :: LocalReg -> RegSet -> Bool extendRegSet :: RegSet -> LocalReg -> RegSet deleteFromRegSet :: RegSet -> LocalReg -> RegSet mkRegSet :: [LocalReg] -> RegSet minusRegSet, plusRegSet, timesRegSet :: RegSet -> RegSet -> RegSet - -emptyRegSet = emptyUniqSet -elemRegSet = elementOfUniqSet -extendRegSet = addOneToUniqSet -deleteFromRegSet = delOneFromUniqSet -mkRegSet = mkUniqSet -minusRegSet = minusUniqSet -plusRegSet = unionUniqSets -timesRegSet = intersectUniqSets +sizeRegSet :: RegSet -> Int +regSetToList :: RegSet -> [LocalReg] + +emptyRegSet = Set.empty +nullRegSet = Set.null +elemRegSet = Set.member +extendRegSet = flip Set.insert +deleteFromRegSet = flip Set.delete +mkRegSet = Set.fromList +minusRegSet = Set.difference +plusRegSet = Set.union +timesRegSet = Set.intersection +sizeRegSet = Set.size +regSetToList = Set.toList class UserOfLocalRegs a where foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b @@ -236,7 +244,7 @@ instance DefinerOfLocalRegs LocalReg where foldRegsDefd f z r = f z r instance UserOfLocalRegs RegSet where - foldRegsUsed f = foldUniqSet (flip f) + foldRegsUsed f = Set.fold (flip f) instance UserOfLocalRegs CmmExpr where foldRegsUsed f z e = expr z e @@ -271,49 +279,6 @@ reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es _ `regUsedIn` CmmStackSlot _ _ = False ----------------------------------------------------------------------------- --- Stack slots ------------------------------------------------------------------------------ - -isStackSlotOf :: CmmExpr -> LocalReg -> Bool -isStackSlotOf (CmmStackSlot (RegSlot r) _) r' = r == r' -isStackSlotOf _ _ = False - -regSlot :: LocalReg -> CmmExpr -regSlot r = CmmStackSlot (RegSlot r) (widthInBytes $ typeWidth $ localRegType r) - ------------------------------------------------------------------------------ --- Stack slot use information for expressions and other types [_$_] ------------------------------------------------------------------------------ - --- Fold over the area, the offset into the area, and the width of the subarea. -class UserOfSlots a where - foldSlotsUsed :: (b -> SubArea -> b) -> b -> a -> b - -class DefinerOfSlots a where - foldSlotsDefd :: (b -> SubArea -> b) -> b -> a -> b - -instance UserOfSlots CmmExpr where - foldSlotsUsed f z e = expr z e - where expr z (CmmLit _) = z - expr z (CmmLoad (CmmStackSlot a i) ty) = f z (a, i, widthInBytes $ typeWidth ty) - expr z (CmmLoad addr _) = foldSlotsUsed f z addr - expr z (CmmReg _) = z - expr z (CmmMachOp _ exprs) = foldSlotsUsed f z exprs - expr z (CmmRegOff _ _) = z - expr z (CmmStackSlot _ _) = z - -instance UserOfSlots a => UserOfSlots [a] where - foldSlotsUsed _ set [] = set - foldSlotsUsed f set (x:xs) = foldSlotsUsed f (foldSlotsUsed f set x) xs - -instance DefinerOfSlots a => DefinerOfSlots [a] where - foldSlotsDefd _ set [] = set - foldSlotsDefd f set (x:xs) = foldSlotsDefd f (foldSlotsDefd f set x) xs - -instance DefinerOfSlots SubArea where - foldSlotsDefd f z a = f z a - ------------------------------------------------------------------------------ -- Global STG registers ----------------------------------------------------------------------------- diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs index fe0c104d1c..a171faa057 100644 --- a/compiler/cmm/CmmInfo.hs +++ b/compiler/cmm/CmmInfo.hs @@ -19,6 +19,8 @@ import CmmUtils import CLabel import SMRep import Bitmap +import Stream (Stream) +import qualified Stream import Maybes import Constants @@ -40,10 +42,16 @@ mkEmptyContInfoTable info_lbl , cit_prof = NoProfilingInfo , cit_srt = NoC_SRT } -cmmToRawCmm :: Platform -> [Old.CmmGroup] -> IO [Old.RawCmmGroup] +cmmToRawCmm :: Platform -> Stream IO Old.CmmGroup () + -> IO (Stream IO Old.RawCmmGroup ()) cmmToRawCmm platform cmms = do { uniqs <- mkSplitUniqSupply 'i' - ; return (initUs_ uniqs (mapM (concatMapM (mkInfoTable platform)) cmms)) } + ; let do_one uniqs cmm = do + case initUs uniqs $ concatMapM (mkInfoTable platform) cmm of + (b,uniqs') -> return (uniqs',b) + -- NB. strictness fixes a space leak. DO NOT REMOVE. + ; return (Stream.mapAccumL do_one uniqs cmms >> return ()) + } -- Make a concrete info table, represented as a list of CmmStatic -- (it can't be simply a list of Word, because the SRT field is @@ -82,7 +90,7 @@ mkInfoTable :: Platform -> CmmDecl -> UniqSM [RawCmmDecl] mkInfoTable _ (CmmData sec dat) = return [CmmData sec dat] -mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks) +mkInfoTable platform (CmmProc info entry_label blocks) | CmmNonInfoTable <- info -- Code without an info table. Easy. = return [CmmProc Nothing entry_label blocks] @@ -91,7 +99,8 @@ mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks) ; return (top_decls ++ mkInfoTableAndCode info_lbl info_cts entry_label blocks) } - | otherwise = panic "mkInfoTable" -- Patern match overlap check not clever enough + | otherwise = panic "mkInfoTable" + -- Patern match overlap check not clever enough ----------------------------------------------------- type InfoTableContents = ( [CmmLit] -- The standard part diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs new file mode 100644 index 0000000000..660506e7dc --- /dev/null +++ b/compiler/cmm/CmmLayoutStack.hs @@ -0,0 +1,1045 @@ +{-# LANGUAGE RecordWildCards, GADTs #-} +module CmmLayoutStack ( + cmmLayoutStack, setInfoTableStackMap, cmmSink + ) where + +import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX +import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX + +import Cmm +import BlockId +import CLabel +import CmmUtils +import MkGraph +import Module +import ForeignCall +import CmmLive +import CmmProcPoint +import SMRep +import Hoopl hiding ((<*>), mkLast, mkMiddle) +import OptimizationFuel +import Constants +import UniqSupply +import Maybes +import UniqFM +import Util + +import FastString +import Outputable +import Data.Map (Map) +import qualified Data.Map as Map +import qualified Data.Set as Set +import Control.Monad.Fix +import Data.Array as Array +import Data.Bits +import Data.List (nub, partition) +import Control.Monad (liftM) + +#include "HsVersions.h" + + +data StackSlot = Occupied | Empty + -- Occupied: a return address or part of an update frame + +instance Outputable StackSlot where + ppr Occupied = ptext (sLit "XXX") + ppr Empty = ptext (sLit "---") + +-- All stack locations are expressed as positive byte offsets from the +-- "base", which is defined to be the address above the return address +-- on the stack on entry to this CmmProc. +-- +-- Lower addresses have higher StackLocs. +-- +type StackLoc = ByteOff + +{- + A StackMap describes the stack at any given point. At a continuation + it has a particular layout, like this: + + | | <- base + |-------------| + | ret0 | <- base + 8 + |-------------| + . upd frame . <- base + sm_ret_off + |-------------| + | | + . vars . + . (live/dead) . + | | <- base + sm_sp - sm_args + |-------------| + | ret1 | + . ret vals . <- base + sm_sp (<--- Sp points here) + |-------------| + +Why do we include the final return address (ret0) in our stack map? I +have absolutely no idea, but it seems to be done that way consistently +in the rest of the code generator, so I played along here. --SDM + +Note that we will be constructing an info table for the continuation +(ret1), which needs to describe the stack down to, but not including, +the update frame (or ret0, if there is no update frame). +-} + +data StackMap = StackMap + { sm_sp :: StackLoc + -- ^ the offset of Sp relative to the base on entry + -- to this block. + , sm_args :: ByteOff + -- ^ the number of bytes of arguments in the area for this block + -- Defn: the offset of young(L) relative to the base is given by + -- (sm_sp - sm_args) of the StackMap for block L. + , sm_ret_off :: ByteOff + -- ^ Number of words of stack that we do not describe with an info + -- table, because it contains an update frame. + , sm_regs :: UniqFM (LocalReg,StackLoc) + -- ^ regs on the stack + } + +instance Outputable StackMap where + ppr StackMap{..} = + text "Sp = " <> int sm_sp $$ + text "sm_args = " <> int sm_args $$ + text "sm_ret_off = " <> int sm_ret_off $$ + text "sm_regs = " <> ppr (eltsUFM sm_regs) + + +cmmLayoutStack :: ProcPointSet -> ByteOff -> CmmGraph + -> FuelUniqSM (CmmGraph, BlockEnv StackMap) +cmmLayoutStack procpoints entry_args + graph0@(CmmGraph { g_entry = entry }) + = do + pprTrace "cmmLayoutStack" (ppr entry_args) $ return () + (graph, liveness) <- removeDeadAssignments graph0 + pprTrace "liveness" (ppr liveness) $ return () + let blocks = postorderDfs graph + + (final_stackmaps, final_high_sp, new_blocks) <- liftUniq $ + mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) -> + layout procpoints liveness entry entry_args + rec_stackmaps rec_high_sp blocks + + new_blocks' <- liftUniq $ mapM lowerSafeForeignCall new_blocks + + pprTrace ("Sp HWM") (ppr final_high_sp) $ + return (ofBlockList entry new_blocks', final_stackmaps) + + + +layout :: BlockSet -- proc points + -> BlockEnv CmmLive -- liveness + -> BlockId -- entry + -> ByteOff -- stack args on entry + + -> BlockEnv StackMap -- [final] stack maps + -> ByteOff -- [final] Sp high water mark + + -> [CmmBlock] -- [in] blocks + + -> UniqSM + ( BlockEnv StackMap -- [out] stack maps + , ByteOff -- [out] Sp high water mark + , [CmmBlock] -- [out] new blocks + ) + +layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks + = go blocks init_stackmap entry_args [] + where + (updfr, cont_info) = collectContInfo blocks + + init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args + , sm_args = entry_args + , sm_ret_off = updfr + , sm_regs = emptyUFM + } + + go [] acc_stackmaps acc_hwm acc_blocks + = return (acc_stackmaps, acc_hwm, acc_blocks) + + go (b0 : bs) acc_stackmaps acc_hwm acc_blocks + = do + let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0 + + let stack0@StackMap { sm_sp = sp0 } + = mapFindWithDefault + (pprPanic "no stack map for" (ppr entry_lbl)) + entry_lbl acc_stackmaps + + pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return () + + -- (a) Update the stack map to include the effects of + -- assignments in this block + let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0 + + -- (b) Insert assignments to reload all the live variables if this + -- block is a proc point + let middle1 = if entry_lbl `setMember` procpoints + then foldr blockCons middle0 (insertReloads stack0) + else middle0 + + -- (c) Look at the last node and if we are making a call or + -- jumping to a proc point, we must save the live + -- variables, adjust Sp, and construct the StackMaps for + -- each of the successor blocks. See handleLastNode for + -- details. + (middle2, sp_off, last1, fixup_blocks, out) + <- handleLastNode procpoints liveness cont_info + acc_stackmaps stack1 middle0 last0 + + pprTrace "layout(out)" (ppr out) $ return () + + -- (d) Manifest Sp: run over the nodes in the block and replace + -- CmmStackSlot with CmmLoad from Sp with a concrete offset. + -- + -- our block: + -- middle1 -- the original middle nodes + -- middle2 -- live variable saves from handleLastNode + -- Sp = Sp + sp_off -- Sp adjustment goes here + -- last1 -- the last node + -- + let middle_pre = blockToList $ foldl blockSnoc middle1 middle2 + + sp_high = final_hwm - entry_args + -- The stack check value is adjusted by the Sp offset on + -- entry to the proc, which is entry_args. We are + -- assuming that we only do a stack check at the + -- beginning of a proc, and we don't modify Sp before the + -- check. + + final_blocks = manifestSp final_stackmaps stack0 sp0 sp_high entry0 + middle_pre sp_off last1 fixup_blocks + + acc_stackmaps' = mapUnion acc_stackmaps out + + hwm' = maximum (acc_hwm : (sp0 - sp_off) : map sm_sp (mapElems out)) + + go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks) + + +-- ----------------------------------------------------------------------------- + +-- This doesn't seem right somehow. We need to find out whether this +-- proc will push some update frame material at some point, so that we +-- can avoid using that area of the stack for spilling. The +-- updfr_space field of the CmmProc *should* tell us, but it doesn't +-- (I think maybe it gets filled in later when we do proc-point +-- splitting). +-- +-- So we'll just take the max of all the cml_ret_offs. This could be +-- unnecessarily pessimistic, but probably not in the code we +-- generate. + +collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff) +collectContInfo blocks + = (maximum ret_offs, mapFromList (catMaybes mb_argss)) + where + (mb_argss, ret_offs) = mapAndUnzip get_cont blocks + + get_cont b = + case lastNode b of + CmmCall { cml_cont = Just l, .. } + -> (Just (l, cml_ret_args), cml_ret_off) + CmmForeignCall { .. } + -> (Just (succ, 0), updfr) -- ?? + _other -> (Nothing, 0) + + +-- ----------------------------------------------------------------------------- +-- Updating the StackMap from middle nodes + +-- Look for loads from stack slots, and update the StackMap. This is +-- purelyu for optimisation reasons, so that we can avoid saving a +-- variable back to a different stack slot if it is already on the +-- stack. +-- +-- This happens a lot: for example when function arguments are passed +-- on the stack and need to be immediately saved across a call, we +-- want to just leave them where they are on the stack. +-- +procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap +procMiddle stackmaps node sm + = case node of + CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _) + -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) } + where loc = getStackLoc area off stackmaps + CmmAssign (CmmLocal r) _other + -> sm { sm_regs = delFromUFM (sm_regs sm) r } + _other + -> sm + +getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc +getStackLoc Old n _ = n +getStackLoc (Young l) n stackmaps = + case mapLookup l stackmaps of + Nothing -> pprPanic "getStackLoc" (ppr l) + Just sm -> sm_sp sm - sm_args sm + n + + +-- ----------------------------------------------------------------------------- +-- Handling stack allocation for a last node + +-- We take a single last node and turn it into: +-- +-- C1 (some statements) +-- Sp = Sp + N +-- C2 (some more statements) +-- call f() -- the actual last node +-- +-- plus possibly some more blocks (we may have to add some fixup code +-- between the last node and the continuation). +-- +-- C1: is the code for saving the variables across this last node onto +-- the stack, if the continuation is a call or jumps to a proc point. +-- +-- C2: if the last node is a safe foreign call, we have to inject some +-- extra code that goes *after* the Sp adjustment. + +handleLastNode + :: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff + -> BlockEnv StackMap -> StackMap + -> Block CmmNode O O + -> CmmNode O C + -> UniqSM + ( [CmmNode O O] -- nodes to go *before* the Sp adjustment + , ByteOff -- amount to adjust Sp + , CmmNode O C -- new last node + , [CmmBlock] -- new blocks + , BlockEnv StackMap -- stackmaps for the continuations + ) + +handleLastNode procpoints liveness cont_info stackmaps + stack0@StackMap { sm_sp = sp0 } middle last + = case last of + -- At each return / tail call, + -- adjust Sp to point to the last argument pushed, which + -- is cml_args, after popping any other junk from the stack. + CmmCall{ cml_cont = Nothing, .. } -> do + let sp_off = sp0 - cml_args + return ([], sp_off, last, [], mapEmpty) + + -- At each CmmCall with a continuation: + CmmCall{ cml_cont = Just cont_lbl, .. } -> + return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off + + CmmForeignCall{ succ = cont_lbl, .. } -> do + return $ lastCall cont_lbl wORD_SIZE wORD_SIZE (sm_ret_off stack0) + -- one word each for args and results: the return address + + CmmBranch{..} -> handleProcPoints + CmmCondBranch{..} -> handleProcPoints + CmmSwitch{..} -> handleProcPoints + + where + -- Calls and ForeignCalls are handled the same way: + lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff + -> ( [CmmNode O O] + , ByteOff + , CmmNode O C + , [CmmBlock] + , BlockEnv StackMap + ) + lastCall lbl cml_args cml_ret_args cml_ret_off + = ( assignments + , spOffsetForCall sp0 cont_stack cml_args + , last + , [] -- no new blocks + , mapSingleton lbl cont_stack ) + where + (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off + + + prepareStack lbl cml_ret_args cml_ret_off + | Just cont_stack <- mapLookup lbl stackmaps + -- If we have already seen this continuation before, then + -- we just have to make the stack look the same: + = (fixupStack stack0 cont_stack, cont_stack) + -- Otherwise, we have to allocate the stack frame + | otherwise + = (save_assignments, new_cont_stack) + where + (new_cont_stack, save_assignments) + = setupStackFrame lbl liveness cml_ret_off cml_ret_args stack0 + + + -- proc point, we have to set up the stack to match what the proc + -- point is expecting. + -- + handleProcPoints :: UniqSM ( [CmmNode O O] + , ByteOff + , CmmNode O C + , [CmmBlock] + , BlockEnv StackMap ) + + handleProcPoints + -- Note [diamond proc point] + | Just l <- futureContinuation middle + , (nub $ filter (`setMember` procpoints) $ successors last) == [l] + = do + let cont_args = mapFindWithDefault 0 l cont_info + (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0) + out = mapFromList [ (l', cont_stack) + | l' <- successors last ] + return ( assigs + , spOffsetForCall sp0 cont_stack wORD_SIZE + , last + , [] + , out) + + | otherwise = do + pps <- mapM handleProcPoint (successors last) + let lbl_map :: LabelMap Label + lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ] + fix_lbl l = mapLookup l lbl_map `orElse` l + return ( [] + , 0 + , mapSuccessors fix_lbl last + , concat [ blk | (_,_,_,blk) <- pps ] + , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] ) + + -- For each proc point that is a successor of this block + -- (a) if the proc point already has a stackmap, we need to + -- shuffle the current stack to make it look the same. + -- We have to insert a new block to make this happen. + -- (b) otherwise, call "allocate live stack0" to make the + -- stack map for the proc point + handleProcPoint :: BlockId + -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock]) + handleProcPoint l + | not (l `setMember` procpoints) = return (l, l, stack0, []) + | otherwise = do + tmp_lbl <- liftM mkBlockId $ getUniqueM + let + (stack2, assigs) = + case mapLookup l stackmaps of + Just pp_sm -> (pp_sm, fixupStack stack0 pp_sm) + Nothing -> + pprTrace "first visit to proc point" + (ppr l <+> ppr stack1) $ + (stack1, assigs) + where + cont_args = mapFindWithDefault 0 l cont_info + (stack1, assigs) = + setupStackFrame l liveness (sm_ret_off stack0) + cont_args stack0 + + sp_off = sp0 - sm_sp stack2 + + block = blockJoin (CmmEntry tmp_lbl) + (maybeAddSpAdj sp_off (blockFromList assigs)) + (CmmBranch l) + -- + return (l, tmp_lbl, stack2, [block]) + + + +-- Sp is currently pointing to current_sp, +-- we want it to point to +-- (sm_sp cont_stack - sm_args cont_stack + args) +-- so the difference is +-- sp0 - (sm_sp cont_stack - sm_args cont_stack + args) +spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff +spOffsetForCall current_sp cont_stack args + = current_sp - (sm_sp cont_stack - sm_args cont_stack + args) + + +-- | create a sequence of assignments to establish the new StackMap, +-- given the old StackMap. +fixupStack :: StackMap -> StackMap -> [CmmNode O O] +fixupStack old_stack new_stack = concatMap move new_locs + where + old_map :: Map LocalReg ByteOff + old_map = Map.fromList (stackSlotRegs old_stack) + new_locs = stackSlotRegs new_stack + + move (r,n) + | Just m <- Map.lookup r old_map, n == m = [] + | otherwise = [CmmStore (CmmStackSlot Old n) + (CmmReg (CmmLocal r))] + + + +setupStackFrame + :: BlockId -- label of continuation + -> BlockEnv CmmLive -- liveness + -> ByteOff -- updfr + -> ByteOff -- bytes of return values on stack + -> StackMap -- current StackMap + -> (StackMap, [CmmNode O O]) + +setupStackFrame lbl liveness updfr_off ret_args stack0 + = (cont_stack, assignments) + where + -- get the set of LocalRegs live in the continuation + live = mapFindWithDefault Set.empty lbl liveness + + -- the stack from the base to updfr_off is off-limits. + -- our new stack frame contains: + -- * saved live variables + -- * the return address [young(C) + 8] + -- * the args for the call, + -- which are replaced by the return values at the return + -- point. + + -- everything up to updfr_off is off-limits + -- stack1 contains updfr_off, plus everything we need to save + (stack1, assignments) = allocate updfr_off live stack0 + + -- And the Sp at the continuation is: + -- sm_sp stack1 + ret_args + cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args + , sm_args = ret_args + , sm_ret_off = updfr_off + } + + +-- ----------------------------------------------------------------------------- +-- Note [diamond proc point] +-- +-- This special case looks for the pattern we get from a typical +-- tagged case expression: +-- +-- Sp[young(L1)] = L1 +-- if (R1 & 7) != 0 goto L1 else goto L2 +-- L2: +-- call [R1] returns to L1 +-- L1: live: {y} +-- x = R1 +-- +-- If we let the generic case handle this, we get +-- +-- Sp[-16] = L1 +-- if (R1 & 7) != 0 goto L1a else goto L2 +-- L2: +-- Sp[-8] = y +-- Sp = Sp - 16 +-- call [R1] returns to L1 +-- L1a: +-- Sp[-8] = y +-- Sp = Sp - 16 +-- goto L1 +-- L1: +-- x = R1 +-- +-- The code for saving the live vars is duplicated in each branch, and +-- furthermore there is an extra jump in the fast path (assuming L1 is +-- a proc point, which it probably is if there is a heap check). +-- +-- So to fix this we want to set up the stack frame before the +-- conditional jump. How do we know when to do this, and when it is +-- safe? The basic idea is, when we see the assignment +-- +-- Sp[young(L)] = L +-- +-- we know that +-- * we are definitely heading for L +-- * there can be no more reads from another stack area, because young(L) +-- overlaps with it. +-- +-- We don't necessarily know that everything live at L is live now +-- (some might be assigned between here and the jump to L). So we +-- simplify and only do the optimisation when we see +-- +-- (1) a block containing an assignment of a return address L +-- (2) ending in a branch where one (and only) continuation goes to L, +-- and no other continuations go to proc points. +-- +-- then we allocate the stack frame for L at the end of the block, +-- before the branch. +-- +-- We could generalise (2), but that would make it a bit more +-- complicated to handle, and this currently catches the common case. + +futureContinuation :: Block CmmNode O O -> Maybe BlockId +futureContinuation middle = foldBlockNodesB f middle Nothing + where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId + f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _ + = Just l + f _ r = r + +-- ----------------------------------------------------------------------------- +-- Saving live registers + +-- | Given a set of live registers and a StackMap, save all the registers +-- on the stack and return the new StackMap and the assignments to do +-- the saving. +-- +allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O]) +allocate ret_off live stackmap@StackMap{ sm_sp = sp0 + , sm_regs = regs0 } + = + pprTrace "allocate" (ppr live $$ ppr stackmap) $ + + -- we only have to save regs that are not already in a slot + let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live) + regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0 + in + + -- make a map of the stack + let stack = reverse $ Array.elems $ + accumArray (\_ x -> x) Empty (1, toWords (max sp0 ret_off)) $ + ret_words ++ live_words + where ret_words = + [ (x, Occupied) + | x <- [ 1 .. toWords ret_off] ] + live_words = + [ (toWords x, Occupied) + | (r,off) <- eltsUFM regs1, + let w = localRegBytes r, + x <- [ off, off-wORD_SIZE .. off - w + 1] ] + in + + -- Pass over the stack: find slots to save all the new live variables, + -- choosing the oldest slots first (hence a foldr). + let + save slot ([], stack, n, assigs, regs) -- no more regs to save + = ([], slot:stack, n `plusW` 1, assigs, regs) + save slot (to_save, stack, n, assigs, regs) + = case slot of + Occupied -> (to_save, Occupied:stack, n `plusW` 1, assigs, regs) + Empty + | Just (stack', r, to_save') <- + select_save to_save (slot:stack) + -> let assig = CmmStore (CmmStackSlot Old n') + (CmmReg (CmmLocal r)) + n' = n `plusW` 1 + in + (to_save', stack', n', assig : assigs, (r,(r,n')):regs) + + | otherwise + -> (to_save, slot:stack, n `plusW` 1, assigs, regs) + + -- we should do better here: right now we'll fit the smallest first, + -- but it would make more sense to fit the biggest first. + select_save :: [LocalReg] -> [StackSlot] + -> Maybe ([StackSlot], LocalReg, [LocalReg]) + select_save regs stack = go regs [] + where go [] _no_fit = Nothing + go (r:rs) no_fit + | Just rest <- dropEmpty words stack + = Just (replicate words Occupied ++ rest, r, rs++no_fit) + | otherwise + = go rs (r:no_fit) + where words = localRegWords r + + -- fill in empty slots as much as possible + (still_to_save, save_stack, n, save_assigs, save_regs) + = foldr save (to_save, [], 0, [], []) stack + + -- push any remaining live vars on the stack + (push_sp, push_assigs, push_regs) + = foldr push (n, [], []) still_to_save + where + push r (n, assigs, regs) + = (n', assig : assigs, (r,(r,n')) : regs) + where + n' = n + localRegBytes r + assig = CmmStore (CmmStackSlot Old n') + (CmmReg (CmmLocal r)) + + trim_sp + | not (null push_regs) = push_sp + | otherwise + = n `plusW` (- length (takeWhile isEmpty save_stack)) + + final_regs = regs1 `addListToUFM` push_regs + `addListToUFM` save_regs + + in + -- XXX should be an assert + if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else + + if (trim_sp .&. (wORD_SIZE - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else + + ( stackmap { sm_regs = final_regs , sm_sp = trim_sp } + , push_assigs ++ save_assigs ) + + +-- ----------------------------------------------------------------------------- +-- Manifesting Sp + +-- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The +-- block looks like this: +-- +-- middle_pre -- the middle nodes +-- Sp = Sp + sp_off -- Sp adjustment goes here +-- last -- the last node +-- +-- And we have some extra blocks too (that don't contain Sp adjustments) +-- +-- The adjustment for middle_pre will be different from that for +-- middle_post, because the Sp adjustment intervenes. +-- +manifestSp + :: BlockEnv StackMap -- StackMaps for other blocks + -> StackMap -- StackMap for this block + -> ByteOff -- Sp on entry to the block + -> ByteOff -- SpHigh + -> CmmNode C O -- first node + -> [CmmNode O O] -- middle + -> ByteOff -- sp_off + -> CmmNode O C -- last node + -> [CmmBlock] -- new blocks + -> [CmmBlock] -- final blocks with Sp manifest + +manifestSp stackmaps stack0 sp0 sp_high + first middle_pre sp_off last fixup_blocks + = final_block : fixup_blocks' + where + area_off = getAreaOff stackmaps + + adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x + adj_pre_sp = mapExpDeep (areaToSp sp0 sp_high area_off) + adj_post_sp = mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off) + + final_middle = maybeAddSpAdj sp_off $ + blockFromList $ + map adj_pre_sp $ + elimStackStores stack0 stackmaps area_off $ + middle_pre + + final_last = optStackCheck (adj_post_sp last) + + final_block = blockJoin first final_middle final_last + + fixup_blocks' = map (blockMapNodes3 (id, adj_post_sp, id)) fixup_blocks + + +getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc) +getAreaOff _ Old = 0 +getAreaOff stackmaps (Young l) = + case mapLookup l stackmaps of + Just sm -> sm_sp sm - sm_args sm + Nothing -> pprPanic "getAreaOff" (ppr l) + + +maybeAddSpAdj :: ByteOff -> Block CmmNode O O -> Block CmmNode O O +maybeAddSpAdj 0 block = block +maybeAddSpAdj sp_off block + = block `blockSnoc` CmmAssign spReg (cmmOffset (CmmReg spReg) sp_off) + + +{- +Sp(L) is the Sp offset on entry to block L relative to the base of the +OLD area. + +SpArgs(L) is the size of the young area for L, i.e. the number of +arguments. + + - in block L, each reference to [old + N] turns into + [Sp + Sp(L) - N] + + - in block L, each reference to [young(L') + N] turns into + [Sp + Sp(L) - Sp(L') + SpArgs(L') - N] + + - be careful with the last node of each block: Sp has already been adjusted + to be Sp + Sp(L) - Sp(L') +-} + +areaToSp :: ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr +areaToSp sp_old _sp_hwm area_off (CmmStackSlot area n) = + cmmOffset (CmmReg spReg) (sp_old - area_off area - n) +areaToSp _ sp_hwm _ (CmmLit CmmHighStackMark) = CmmLit (mkIntCLit sp_hwm) +areaToSp _ _ _ (CmmMachOp (MO_U_Lt _) -- Note [null stack check] + [CmmMachOp (MO_Sub _) + [ CmmReg (CmmGlobal Sp) + , CmmLit (CmmInt 0 _)], + CmmReg (CmmGlobal SpLim)]) = CmmLit (CmmInt 0 wordWidth) +areaToSp _ _ _ other = other + +-- ----------------------------------------------------------------------------- +-- Note [null stack check] +-- +-- If the high-water Sp is zero, then we end up with +-- +-- if (Sp - 0 < SpLim) then .. else .. +-- +-- and possibly some dead code for the failure case. Optimising this +-- away depends on knowing that SpLim <= Sp, so it is really the job +-- of the stack layout algorithm, hence we do it now. This is also +-- convenient because control-flow optimisation later will drop the +-- dead code. + +optStackCheck :: CmmNode O C -> CmmNode O C +optStackCheck n = -- Note [null stack check] + case n of + CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false + other -> other + + +-- ----------------------------------------------------------------------------- + +-- | Eliminate stores of the form +-- +-- Sp[area+n] = r +-- +-- when we know that r is already in the same slot as Sp[area+n]. We +-- could do this in a later optimisation pass, but that would involve +-- a separate analysis and we already have the information to hand +-- here. It helps clean up some extra stack stores in common cases. +-- +-- Note that we may have to modify the StackMap as we walk through the +-- code using procMiddle, since an assignment to a variable in the +-- StackMap will invalidate its mapping there. +-- +elimStackStores :: StackMap + -> BlockEnv StackMap + -> (Area -> ByteOff) + -> [CmmNode O O] + -> [CmmNode O O] +elimStackStores stackmap stackmaps area_off nodes + = go stackmap nodes + where + go _stackmap [] = [] + go stackmap (n:ns) + = case n of + CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r)) + | Just (_,off) <- lookupUFM (sm_regs stackmap) r + , area_off area + m == off + -> pprTrace "eliminated a node!" (ppr r) $ go stackmap ns + _otherwise + -> n : go (procMiddle stackmaps n stackmap) ns + + +-- ----------------------------------------------------------------------------- +-- Update info tables to include stack liveness + + +setInfoTableStackMap :: BlockEnv StackMap -> CmmDecl -> CmmDecl +setInfoTableStackMap stackmaps + (CmmProc top_info@TopInfo{..} l g@CmmGraph{g_entry = eid}) + = CmmProc top_info{ info_tbl = fix_info info_tbl } l g + where + fix_info info_tbl@CmmInfoTable{ cit_rep = StackRep _ } = + info_tbl { cit_rep = StackRep (get_liveness eid) } + fix_info other = other + + get_liveness :: BlockId -> Liveness + get_liveness lbl + = case mapLookup lbl stackmaps of + Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl) + Just sm -> stackMapToLiveness sm + +setInfoTableStackMap _ d = d + + +stackMapToLiveness :: StackMap -> Liveness +stackMapToLiveness StackMap{..} = + reverse $ Array.elems $ + accumArray (\_ x -> x) True (toWords sm_ret_off + 1, + toWords (sm_sp - sm_args)) live_words + where + live_words = [ (toWords off, False) + | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ] + + +-- ----------------------------------------------------------------------------- +-- Lowering safe foreign calls + +{- +Note [lower safe foreign calls] + +We start with + + Sp[young(L1)] = L1 + ,----------------------- + | r1 = foo(x,y,z) returns to L1 + '----------------------- + L1: + R1 = r1 -- copyIn, inserted by mkSafeCall + ... + +the stack layout algorithm will arrange to save and reload everything +live across the call. Our job now is to expand the call so we get + + Sp[young(L1)] = L1 + ,----------------------- + | SAVE_THREAD_STATE() + | token = suspendThread(BaseReg, interruptible) + | r = foo(x,y,z) + | BaseReg = resumeThread(token) + | LOAD_THREAD_STATE() + | R1 = r -- copyOut + | jump L1 + '----------------------- + L1: + r = R1 -- copyIn, inserted by mkSafeCall + ... + +Note the copyOut, which saves the results in the places that L1 is +expecting them (see Note {safe foreign call convention]). +-} + +lowerSafeForeignCall :: CmmBlock -> UniqSM CmmBlock +lowerSafeForeignCall block + | (entry, middle, CmmForeignCall { .. }) <- blockSplit block + = do + -- Both 'id' and 'new_base' are KindNonPtr because they're + -- RTS-only objects and are not subject to garbage collection + id <- newTemp bWord + new_base <- newTemp (cmmRegType (CmmGlobal BaseReg)) + let (caller_save, caller_load) = callerSaveVolatileRegs + load_tso <- newTemp gcWord + load_stack <- newTemp gcWord + let suspend = saveThreadState <*> + caller_save <*> + mkMiddle (callSuspendThread id intrbl) + midCall = mkUnsafeCall tgt res args + resume = mkMiddle (callResumeThread new_base id) <*> + -- Assign the result to BaseReg: we + -- might now have a different Capability! + mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*> + caller_load <*> + loadThreadState load_tso load_stack + -- Note: The successor must be a procpoint, and we have already split, + -- so we use a jump, not a branch. + succLbl = CmmLit (CmmLabel (infoTblLbl succ)) + + (ret_args, copyout) = copyOutOflow NativeReturn Jump (Young succ) + (map (CmmReg . CmmLocal) res) + updfr (0, []) + + jump = CmmCall { cml_target = succLbl + , cml_cont = Just succ + , cml_args = widthInBytes wordWidth + , cml_ret_args = ret_args + , cml_ret_off = updfr } + + graph' <- lgraphOfAGraph $ suspend <*> + midCall <*> + resume <*> + copyout <*> + mkLast jump + + case toBlockList graph' of + [one] -> let (_, middle', last) = blockSplit one + in return (blockJoin entry (middle `blockAppend` middle') last) + _ -> panic "lowerSafeForeignCall0" + + -- Block doesn't end in a safe foreign call: + | otherwise = return block + + +foreignLbl :: FastString -> CmmExpr +foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name)) + +newTemp :: CmmType -> UniqSM LocalReg +newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep) + +callSuspendThread :: LocalReg -> Bool -> CmmNode O O +callSuspendThread id intrbl = + CmmUnsafeForeignCall + (ForeignTarget (foreignLbl (fsLit "suspendThread")) + (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint])) + [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))] + +callResumeThread :: LocalReg -> LocalReg -> CmmNode O O +callResumeThread new_base id = + CmmUnsafeForeignCall + (ForeignTarget (foreignLbl (fsLit "resumeThread")) + (ForeignConvention CCallConv [AddrHint] [AddrHint])) + [new_base] [CmmReg (CmmLocal id)] + +-- ----------------------------------------------------------------------------- + +plusW :: ByteOff -> WordOff -> ByteOff +plusW b w = b + w * wORD_SIZE + +dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot] +dropEmpty 0 ss = Just ss +dropEmpty n (Empty : ss) = dropEmpty (n-1) ss +dropEmpty _ _ = Nothing + +isEmpty :: StackSlot -> Bool +isEmpty Empty = True +isEmpty _ = False + +localRegBytes :: LocalReg -> ByteOff +localRegBytes r = roundUpToWords (widthInBytes (typeWidth (localRegType r))) + +localRegWords :: LocalReg -> WordOff +localRegWords = toWords . localRegBytes + +toWords :: ByteOff -> WordOff +toWords x = x `quot` wORD_SIZE + + +insertReloads :: StackMap -> [CmmNode O O] +insertReloads stackmap = + [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp) + (localRegType r)) + | (r,sp) <- stackSlotRegs stackmap + ] + + +stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)] +stackSlotRegs sm = eltsUFM (sm_regs sm) + +-- ----------------------------------------------------------------------------- + +-- If we do this *before* stack layout, we might be able to avoid +-- saving some things across calls/procpoints. +-- +-- *but*, that will invalidate the liveness analysis, and we'll have +-- to re-do it. + +cmmSink :: CmmGraph -> FuelUniqSM CmmGraph +cmmSink graph = do + let liveness = cmmLiveness graph + return $ cmmSink' liveness graph + +cmmSink' :: BlockEnv CmmLive -> CmmGraph -> CmmGraph +cmmSink' liveness graph + = ofBlockList (g_entry graph) $ sink mapEmpty $ postorderDfs graph + where + + sink :: BlockEnv [(LocalReg, CmmExpr)] -> [CmmBlock] -> [CmmBlock] + sink _ [] = [] + sink sunk (b:bs) = + pprTrace "sink" (ppr l) $ + blockJoin first final_middle last : sink sunk' bs + where + l = entryLabel b + (first, middle, last) = blockSplit b + (middle', assigs) = walk (blockToList middle) emptyBlock + (mapFindWithDefault [] l sunk) + + (dropped_last, assigs') = partition (`conflictsWithLast` last) assigs + + final_middle = foldl blockSnoc middle' (toNodes dropped_last) + + sunk' = mapUnion sunk $ + mapFromList [ (l, filt assigs' (getLive l)) + | l <- successors last ] + where + getLive l = mapFindWithDefault Set.empty l liveness + filt as live = [ (r,e) | (r,e) <- as, r `Set.member` live ] + + +walk :: [CmmNode O O] -> Block CmmNode O O -> [(LocalReg, CmmExpr)] + -> (Block CmmNode O O, [(LocalReg, CmmExpr)]) + +walk [] acc as = (acc, as) +walk (n:ns) acc as + | Just a <- collect_it = walk ns acc (a:as) + | otherwise = walk ns (foldr (flip blockSnoc) acc (n:drop_nodes)) as' + where + collect_it = case n of + CmmAssign (CmmLocal r) e@(CmmReg (CmmGlobal _)) -> Just (r,e) +-- CmmAssign (CmmLocal r) e@(CmmLoad addr _) | +-- foldRegsUsed (\b r -> False) True addr -> Just (r,e) + _ -> Nothing + + drop_nodes = toNodes dropped + (dropped, as') = partition should_drop as + where should_drop a = a `conflicts` n + +toNodes as = [ CmmAssign (CmmLocal r) rhs | (r,rhs) <- as ] + +-- We only sink "r = G" assignments right now, so conflicts is very simple: +(r, rhs) `conflicts` CmmAssign reg _ | reg `regUsedIn` rhs = True +--(r, CmmLoad _ _) `conflicts` CmmStore _ _ = True +(r, _) `conflicts` node + = foldRegsUsed (\b r' -> r == r' || b) False node + +(r, _) `conflictsWithLast` node + = foldRegsUsed (\b r' -> r == r' || b) False node diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 01ebac6254..cd0558616e 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -1,67 +1,71 @@ ----------------------------------------------------------------------------- -- --- (c) The University of Glasgow 2004-2006 +-- (c) The University of Glasgow 2011 -- -- CmmLint: checking the correctness of Cmm statements and expressions -- ----------------------------------------------------------------------------- - -{-# OPTIONS -fno-warn-tabs #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and --- detab the module (please do the detabbing in a separate patch). See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces --- for details - +{-# LANGUAGE GADTs #-} module CmmLint ( - cmmLint, cmmLintTop + cmmLint, cmmLintDecl, cmmLintGraph ) where +import Hoopl +import Cmm +import CmmUtils +import PprCmm () import BlockId -import OldCmm +import FastString import CLabel import Outputable -import OldPprCmm() import Constants -import FastString -import Platform import Data.Maybe +-- Things to check: +-- - invariant on CmmBlock in CmmExpr (see comment there) +-- - check for branches to blocks that don't exist +-- - check types + -- ----------------------------------------------------------------------------- -- Exported entry points: cmmLint :: (Outputable d, Outputable h) - => Platform -> GenCmmGroup d h (ListGraph CmmStmt) -> Maybe SDoc -cmmLint platform tops = runCmmLint platform (mapM_ (lintCmmDecl platform)) tops + => GenCmmGroup d h (ListGraph CmmStmt) -> Maybe SDoc +cmmLint tops = runCmmLint (mapM_ lintCmmDecl) tops -cmmLintTop :: (Outputable d, Outputable h) - => Platform -> GenCmmDecl d h (ListGraph CmmStmt) -> Maybe SDoc -cmmLintTop platform top = runCmmLint platform (lintCmmDecl platform) top +cmmLintGraph :: CmmGraph -> Maybe SDoc +cmmLintGraph g = runCmmLint lintCmmGraph g -runCmmLint :: Outputable a - => Platform -> (a -> CmmLint b) -> a -> Maybe SDoc -runCmmLint _ l p = +runCmmLint :: Outputable a => (a -> CmmLint b) -> a -> Maybe SDoc +runCmmLint l p = case unCL (l p) of - Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"), - nest 2 err, - ptext $ sLit ("Program was:"), - nest 2 (ppr p)]) - Right _ -> Nothing - -lintCmmDecl :: Platform -> (GenCmmDecl h i (ListGraph CmmStmt)) -> CmmLint () -lintCmmDecl platform (CmmProc _ lbl (ListGraph blocks)) - = addLintInfo (text "in proc " <> pprCLabel platform lbl) $ - let labels = foldl (\s b -> setInsert (blockId b) s) setEmpty blocks - in mapM_ (lintCmmBlock platform labels) blocks - -lintCmmDecl _ (CmmData {}) + Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"), + nest 2 err, + ptext $ sLit ("Program was:"), + nest 2 (ppr p)]) + Right _ -> Nothing + +lintCmmDecl :: GenCmmDecl h i CmmGraph -> CmmLint () +lintCmmDecl (CmmProc _ lbl g) + = addLintInfo (text "in proc " <> ppr lbl) $ lintCmmGraph g +lintCmmDecl (CmmData {}) = return () -lintCmmBlock :: Platform -> BlockSet -> GenBasicBlock CmmStmt -> CmmLint () -lintCmmBlock platform labels (BasicBlock id stmts) - = addLintInfo (text "in basic block " <> ppr id) $ - mapM_ (lintCmmStmt platform labels) stmts + +lintCmmGraph :: CmmGraph -> CmmLint () +lintCmmGraph g = mapM_ (lintCmmBlock labels) blocks + where + blocks = toBlockList g + labels = setFromList (map entryLabel blocks) + + +lintCmmBlock :: BlockSet -> CmmBlock -> CmmLint () +lintCmmBlock labels block + = addLintInfo (\_ -> text "in basic block " <> ppr (entryLabel block)) $ do + let (_, middle, last) = blockSplit block + mapM_ lintCmmMiddle (blockToList middle) + lintCmmLast labels last -- ----------------------------------------------------------------------------- -- lintCmmExpr @@ -69,24 +73,24 @@ lintCmmBlock platform labels (BasicBlock id stmts) -- Checks whether a CmmExpr is "type-correct", and check for obvious-looking -- byte/word mismatches. -lintCmmExpr :: Platform -> CmmExpr -> CmmLint CmmType -lintCmmExpr platform (CmmLoad expr rep) = do - _ <- lintCmmExpr platform expr +lintCmmExpr :: CmmExpr -> CmmLint CmmType +lintCmmExpr (CmmLoad expr rep) = do + _ <- lintCmmExpr expr -- Disabled, if we have the inlining phase before the lint phase, -- we can have funny offsets due to pointer tagging. -- EZY -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $ -- cmmCheckWordAddress expr return rep -lintCmmExpr platform expr@(CmmMachOp op args) = do - tys <- mapM (lintCmmExpr platform) args +lintCmmExpr expr@(CmmMachOp op args) = do + tys <- mapM lintCmmExpr args if map (typeWidth . cmmExprType) args == machOpArgReps op - then cmmCheckMachOp op args tys - else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op) -lintCmmExpr platform (CmmRegOff reg offset) - = lintCmmExpr platform (CmmMachOp (MO_Add rep) - [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)]) + then cmmCheckMachOp op args tys + else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op) +lintCmmExpr (CmmRegOff reg offset) + = lintCmmExpr (CmmMachOp (MO_Add rep) + [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)]) where rep = typeWidth (cmmRegType reg) -lintCmmExpr _ expr = +lintCmmExpr expr = return (cmmExprType expr) -- Check for some common byte/word mismatches (eg. Sp + 1) @@ -119,43 +123,61 @@ notNodeReg :: CmmExpr -> Bool notNodeReg (CmmReg reg) | reg == nodeReg = False notNodeReg _ = True -lintCmmStmt :: Platform -> BlockSet -> CmmStmt -> CmmLint () -lintCmmStmt platform labels = lint - where lint (CmmNop) = return () - lint (CmmComment {}) = return () - lint stmt@(CmmAssign reg expr) = do - erep <- lintCmmExpr platform expr - let reg_ty = cmmRegType reg +lintCmmMiddle :: CmmNode O O -> CmmLint () +lintCmmMiddle node = case node of + CmmComment _ -> return () + + CmmAssign reg expr -> do + erep <- lintCmmExpr expr + let reg_ty = cmmRegType reg if (erep `cmmEqType_ignoring_ptrhood` reg_ty) then return () - else cmmLintAssignErr stmt erep reg_ty - lint (CmmStore l r) = do - _ <- lintCmmExpr platform l - _ <- lintCmmExpr platform r + else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty + + CmmStore l r -> do + _ <- lintCmmExpr l + _ <- lintCmmExpr r return () - lint (CmmCall target _res args _) = - do lintTarget platform labels target - mapM_ (lintCmmExpr platform . hintlessCmm) args - lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr platform e >> checkCond e - lint (CmmSwitch e branches) = do + + CmmUnsafeForeignCall target _formals actuals -> do + lintTarget target + mapM_ lintCmmExpr actuals + + +lintCmmLast :: BlockSet -> CmmNode O C -> CmmLint () +lintCmmLast labels node = case node of + CmmBranch id -> checkTarget id + + CmmCondBranch e t f -> do + mapM_ checkTarget [t,f] + _ <- lintCmmExpr e + checkCond e + + CmmSwitch e branches -> do mapM_ checkTarget $ catMaybes branches - erep <- lintCmmExpr platform e + erep <- lintCmmExpr e if (erep `cmmEqType_ignoring_ptrhood` bWord) then return () - else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e <> - text " :: " <> ppr erep) - lint (CmmJump e _) = lintCmmExpr platform e >> return () - lint (CmmReturn) = return () - lint (CmmBranch id) = checkTarget id - checkTarget id = if setMember id labels then return () - else cmmLintErr (text "Branch to nonexistent id" <+> ppr id) - -lintTarget :: Platform -> BlockSet -> CmmCallTarget -> CmmLint () -lintTarget platform _ (CmmCallee e _) = do _ <- lintCmmExpr platform e - return () -lintTarget _ _ (CmmPrim _ Nothing) = return () -lintTarget platform labels (CmmPrim _ (Just stmts)) - = mapM_ (lintCmmStmt platform labels) stmts + else cmmLintErr (text "switch scrutinee is not a word: " <> + ppr e <> text " :: " <> ppr erep) + + CmmCall { cml_target = target, cml_cont = cont } -> do + _ <- lintCmmExpr target + maybe (return ()) checkTarget cont + + CmmForeignCall tgt _ args succ _ _ -> do + lintTarget tgt + mapM_ lintCmmExpr args + checkTarget succ + where + checkTarget id + | setMember id labels = return () + | otherwise = cmmLintErr (\_ -> text "Branch to nonexistent id" <+> ppr id) + + +lintTarget :: ForeignTarget -> CmmLint () +lintTarget (ForeignTarget e _) = lintCmmExpr e >> return () +lintTarget (PrimTarget {}) = return () checkCond :: CmmExpr -> CmmLint () @@ -163,7 +185,7 @@ checkCond (CmmMachOp mop _) | isComparisonMachOp mop = return () checkCond (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth = return () -- constant values checkCond expr = cmmLintErr (hang (text "expression is not a conditional:") 2 - (ppr expr)) + (ppr expr)) -- ----------------------------------------------------------------------------- -- CmmLint monad @@ -173,37 +195,36 @@ checkCond expr newtype CmmLint a = CmmLint { unCL :: Either SDoc a } instance Monad CmmLint where - CmmLint m >>= k = CmmLint $ case m of - Left e -> Left e - Right a -> unCL (k a) - return a = CmmLint (Right a) + CmmLint m >>= k = CmmLint $ \p -> case m p of + Left e -> Left e + Right a -> unCL (k a) p + return a = CmmLint (\_ -> Right a) cmmLintErr :: SDoc -> CmmLint a -cmmLintErr msg = CmmLint (Left msg) +cmmLintErr msg = CmmLint (\p -> Left (msg p)) addLintInfo :: SDoc -> CmmLint a -> CmmLint a -addLintInfo info thing = CmmLint $ - case unCL thing of - Left err -> Left (hang info 2 err) - Right a -> Right a +addLintInfo info thing = CmmLint $ \p -> + case unCL thing p of + Left err -> Left (hang (info p) 2 err) + Right a -> Right a cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a cmmLintMachOpErr expr argsRep opExpectsRep - = cmmLintErr (text "in MachOp application: " $$ - nest 2 (ppr expr) $$ - (text "op is expecting: " <+> ppr opExpectsRep) $$ - (text "arguments provide: " <+> ppr argsRep)) + = cmmLintErr (text "in MachOp application: " $$ + nest 2 (ppr expr) $$ + (text "op is expecting: " <+> ppr opExpectsRep) $$ + (text "arguments provide: " <+> ppr argsRep)) -cmmLintAssignErr :: CmmStmt -> CmmType -> CmmType -> CmmLint a +cmmLintAssignErr :: CmmNode e x -> CmmType -> CmmType -> CmmLint a cmmLintAssignErr stmt e_ty r_ty - = cmmLintErr (text "in assignment: " $$ - nest 2 (vcat [ppr stmt, - text "Reg ty:" <+> ppr r_ty, - text "Rhs ty:" <+> ppr e_ty])) - - + = cmmLintErr (text "in assignment: " $$ + nest 2 (vcat [ppr stmt, + text "Reg ty:" <+> ppr r_ty, + text "Rhs ty:" <+> ppr e_ty])) + cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a cmmLintDubiousWordOffset expr = cmmLintErr (text "offset is not a multiple of words: " $$ - nest 2 (ppr expr)) + nest 2 (ppr expr)) diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs index 9a5bb2d5ae..ac9c38b448 100644 --- a/compiler/cmm/CmmLive.hs +++ b/compiler/cmm/CmmLive.hs @@ -18,10 +18,9 @@ import Control.Monad import OptimizationFuel import PprCmmExpr () -import Compiler.Hoopl +import Hoopl import Maybes import Outputable -import UniqSet ----------------------------------------------------------------------------- -- Calculating what variables are live on entry to a basic block @@ -33,8 +32,10 @@ type CmmLive = RegSet -- | The dataflow lattice liveLattice :: DataflowLattice CmmLive liveLattice = DataflowLattice "live LocalReg's" emptyRegSet add - where add _ (OldFact old) (NewFact new) = case unionUniqSets old new of - join -> (changeIf $ sizeUniqSet join > sizeUniqSet old, join) + where add _ (OldFact old) (NewFact new) = + (changeIf $ sizeRegSet join > sizeRegSet old, join) + where !join = plusRegSet old new + -- | A mapping from block labels to the variables live on entry type BlockEntryLiveness = BlockEnv CmmLive @@ -43,16 +44,17 @@ type BlockEntryLiveness = BlockEnv CmmLive -- | Calculated liveness info for a CmmGraph ----------------------------------------------------------------------------- -cmmLiveness :: CmmGraph -> FuelUniqSM BlockEntryLiveness +cmmLiveness :: CmmGraph -> BlockEntryLiveness cmmLiveness graph = - liftM check $ liftM snd $ dataflowPassBwd graph [] $ analBwd liveLattice xferLive + check $ dataflowAnalBwd graph [] $ analBwd liveLattice xferLive where entry = g_entry graph - check facts = noLiveOnEntry entry (expectJust "check" $ mapLookup entry facts) facts + check facts = noLiveOnEntry entry + (expectJust "check" $ mapLookup entry facts) facts -- | On entry to the procedure, there had better not be any LocalReg's live-in. noLiveOnEntry :: BlockId -> CmmLive -> a -> a noLiveOnEntry bid in_fact x = - if isEmptyUniqSet in_fact then x + if nullRegSet in_fact then x else pprPanic "LocalReg's live-in to graph" (ppr bid <+> ppr in_fact) -- | The transfer equations use the traditional 'gen' and 'kill' @@ -60,42 +62,42 @@ noLiveOnEntry bid in_fact x = gen :: UserOfLocalRegs a => a -> RegSet -> RegSet gen a live = foldRegsUsed extendRegSet live a kill :: DefinerOfLocalRegs a => a -> RegSet -> RegSet -kill a live = foldRegsDefd delOneFromUniqSet live a +kill a live = foldRegsDefd deleteFromRegSet live a -gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a) => a -> CmmLive -> CmmLive +gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a) + => a -> CmmLive -> CmmLive gen_kill a = gen a . kill a -- | The transfer function --- EZY: Bits of this analysis are duplicated in CmmSpillReload, though --- it's not really easy to efficiently reuse all of this. Keep in mind --- if you need to update this analysis. xferLive :: BwdTransfer CmmNode CmmLive xferLive = mkBTransfer3 fst mid lst where fst _ f = f mid :: CmmNode O O -> CmmLive -> CmmLive mid n f = gen_kill n f lst :: CmmNode O C -> FactBase CmmLive -> CmmLive - -- slightly inefficient: kill is unnecessary for emptyRegSet - lst n f = gen_kill n - $ case n of CmmCall{} -> emptyRegSet - CmmForeignCall{} -> emptyRegSet - _ -> joinOutFacts liveLattice n f + lst n f = gen_kill n $ joinOutFacts liveLattice n f ----------------------------------------------------------------------------- -- Removing assignments to dead variables ----------------------------------------------------------------------------- -removeDeadAssignments :: CmmGraph -> FuelUniqSM CmmGraph +removeDeadAssignments :: CmmGraph -> FuelUniqSM (CmmGraph, BlockEnv CmmLive) removeDeadAssignments g = - liftM fst $ dataflowPassBwd g [] $ analRewBwd liveLattice xferLive rewrites - where rewrites = deepBwdRw3 nothing middle nothing - -- Beware: deepBwdRw with one polymorphic function seems more reasonable here, - -- but GHC panics while compiling, see bug #4045. + dataflowPassBwd g [] $ analRewBwd liveLattice xferLive rewrites + where rewrites = mkBRewrite3 nothing middle nothing + -- SDM: no need for deepBwdRw here, we only rewrite to empty + -- Beware: deepBwdRw with one polymorphic function seems more + -- reasonable here, but GHC panics while compiling, see bug + -- #4045. middle :: CmmNode O O -> Fact O CmmLive -> CmmReplGraph O O - middle (CmmAssign (CmmLocal reg') _) live | not (reg' `elemRegSet` live) = return $ Just emptyGraph + middle (CmmAssign (CmmLocal reg') _) live + | not (reg' `elemRegSet` live) + = return $ Just emptyGraph -- XXX maybe this should be somewhere else... - middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs = return $ Just emptyGraph - middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs = return $ Just emptyGraph + middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs + = return $ Just emptyGraph + middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs + = return $ Just emptyGraph middle _ _ = return Nothing nothing :: CmmNode e x -> Fact x CmmLive -> CmmReplGraph e x diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs index 4844af9d9a..cd46794580 100644 --- a/compiler/cmm/CmmNode.hs +++ b/compiler/cmm/CmmNode.hs @@ -18,7 +18,7 @@ module CmmNode ( CmmNode(..), ForeignHint(..), CmmFormal, CmmActual, UpdFrameOffset, Convention(..), ForeignConvention(..), ForeignTarget(..), mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf, - mapExpM, mapExpDeepM, wrapRecExpM + mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors ) where import CmmExpr @@ -35,15 +35,17 @@ import Prelude hiding (succ) ------------------------ -- CmmNode +#define ULabel {-# UNPACK #-} !Label + data CmmNode e x where - CmmEntry :: Label -> CmmNode C O + CmmEntry :: ULabel -> CmmNode C O CmmComment :: FastString -> CmmNode O O - CmmAssign :: CmmReg -> CmmExpr -> CmmNode O O + CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O -- Assign to register - CmmStore :: CmmExpr -> CmmExpr -> CmmNode O O + CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O -- Assign to memory location. Size is -- given by cmmExprType of the rhs. @@ -60,11 +62,12 @@ data CmmNode e x where -- bug for what can be put in arguments, see -- Note [Register Parameter Passing] - CmmBranch :: Label -> CmmNode O C -- Goto another block in the same procedure + CmmBranch :: ULabel -> CmmNode O C + -- Goto another block in the same procedure CmmCondBranch :: { -- conditional branch cml_pred :: CmmExpr, - cml_true, cml_false :: Label + cml_true, cml_false :: ULabel } -> CmmNode O C CmmSwitch :: CmmExpr -> [Maybe Label] -> CmmNode O C -- Table branch @@ -78,6 +81,11 @@ data CmmNode e x where cml_cont :: Maybe Label, -- Label of continuation (Nothing for return or tail call) + -- + -- Note [Continuation BlockId]: these BlockIds are called + -- Continuation BlockIds, and are the only BlockIds that can + -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or + -- (CmmStackSlot (Young b) _). -- ToDO: add this: -- cml_args_regs :: [GlobalReg], @@ -117,7 +125,7 @@ data CmmNode e x where tgt :: ForeignTarget, -- call target and convention res :: [CmmFormal], -- zero or more results args :: [CmmActual], -- zero or more arguments; see Note [Register parameter passing] - succ :: Label, -- Label of continuation + succ :: ULabel, -- Label of continuation updfr :: UpdFrameOffset, -- where the update frame is (for building infotable) intrbl:: Bool -- whether or not the call is interruptible } -> CmmNode O C @@ -218,14 +226,6 @@ data Convention | GC -- Entry to the garbage collector: uses the node reg! | PrimOpCall -- Calling prim ops | PrimOpReturn -- Returning from prim ops - | Foreign -- Foreign call/return - ForeignConvention - | Private - -- Used for control transfers within a (pre-CPS) procedure All - -- jump sites known, never pushed on the stack (hence no SRT) - -- You can choose whatever calling convention you please - -- (provided you make sure all the call sites agree)! - -- This data type eventually to be extended to record the convention. deriving( Eq ) data ForeignConvention @@ -283,37 +283,6 @@ instance DefinerOfLocalRegs (CmmNode e x) where fold f z n = foldRegsDefd f z n -instance UserOfSlots (CmmNode e x) where - foldSlotsUsed f z n = case n of - CmmAssign _ expr -> fold f z expr - CmmStore addr rval -> fold f (fold f z addr) rval - CmmUnsafeForeignCall _ _ args -> fold f z args - CmmCondBranch expr _ _ -> fold f z expr - CmmSwitch expr _ -> fold f z expr - CmmCall {cml_target=tgt} -> fold f z tgt - CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args - _ -> z - where fold :: forall a b. - UserOfSlots a => - (b -> SubArea -> b) -> b -> a -> b - fold f z n = foldSlotsUsed f z n - -instance UserOfSlots ForeignTarget where - foldSlotsUsed f z (ForeignTarget e _) = foldSlotsUsed f z e - foldSlotsUsed _f z (PrimTarget _) = z - -instance DefinerOfSlots (CmmNode e x) where - foldSlotsDefd f z n = case n of - CmmStore (CmmStackSlot a i) expr -> f z (a, i, widthInBytes $ typeWidth $ cmmExprType expr) - CmmForeignCall {res=res} -> fold f z $ map foreign_call_slot res - _ -> z - where - fold :: forall a b. - DefinerOfSlots a => - (b -> SubArea -> b) -> b -> a -> b - fold f z n = foldSlotsDefd f z n - foreign_call_slot r = case widthInBytes $ typeWidth $ localRegType r of w -> (RegSlot r, w, w) - ----------------------------------- -- mapping Expr in CmmNode @@ -416,4 +385,20 @@ foldExp f (CmmCall {cml_target=tgt}) z = f tgt z foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z -foldExpDeep f = foldExp $ wrapRecExpf f +foldExpDeep f = foldExp go + where -- go :: CmmExpr -> z -> z + go e@(CmmMachOp _ es) z = gos es $! f e z + go e@(CmmLoad addr _) z = go addr $! f e z + go e z = f e z + + gos [] z = z + gos (e:es) z = gos es $! f e z + +-- ----------------------------------------------------------------------------- + +mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C +mapSuccessors f (CmmBranch bid) = CmmBranch (f bid) +mapSuccessors f (CmmCondBranch p y n) = CmmCondBranch p (f y) (f n) +mapSuccessors f (CmmSwitch e arms) = CmmSwitch e (map (fmap f) arms) +mapSuccessors f n = n + diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index 8cc18fc1ca..7c7ed393d9 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -145,7 +145,7 @@ To inline _smi: -} countUses :: UserOfLocalRegs a => a -> UniqFM Int -countUses a = foldRegsUsed (\m r -> addToUFM m r (count m r + 1)) emptyUFM a +countUses a = foldRegsUsed (\m r -> addToUFM_C (+) m r 1) emptyUFM a where count m r = lookupWithDefaultUFM m (0::Int) r cmmMiniInline :: DynFlags -> [CmmBasicBlock] -> [CmmBasicBlock] @@ -157,25 +157,16 @@ cmmMiniInlineStmts :: DynFlags -> UniqFM Int -> [CmmStmt] -> [CmmStmt] cmmMiniInlineStmts _ _ [] = [] cmmMiniInlineStmts dflags uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts) -- not used: just discard this assignment - | Nothing <- lookupUFM uses u - = cmmMiniInlineStmts dflags uses stmts + | 0 <- lookupWithDefaultUFM uses 0 u + = cmmMiniInlineStmts uses stmts - -- used (literal): try to inline at all the use sites - | Just n <- lookupUFM uses u, isLit expr - = - ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $ - case lookForInlineLit u expr stmts of - (m, stmts') - | n == m -> cmmMiniInlineStmts dflags (delFromUFM uses u) stmts' - | otherwise -> - stmt : cmmMiniInlineStmts dflags (adjustUFM (\x -> x - m) uses u) stmts' - - -- used (foldable to literal): try to inline at all the use sites + -- used (foldable to small thing): try to inline at all the use sites | Just n <- lookupUFM uses u, - e@(CmmLit _) <- wrapRecExp foldExp expr + e <- wrapRecExp foldExp expr, + isTiny e = - ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $ - case lookForInlineLit u e stmts of + ncgDebugTrace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $ + case lookForInlineMany u e stmts of (m, stmts') | n == m -> cmmMiniInlineStmts dflags (delFromUFM uses u) stmts' | otherwise -> @@ -188,6 +179,11 @@ cmmMiniInlineStmts dflags uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) ncgDebugTrace ("nativeGen: inlining " ++ showSDoc dflags (pprStmt stmt)) $ cmmMiniInlineStmts dflags uses stmts' where + isTiny (CmmLit _) = True + isTiny (CmmReg (CmmGlobal _)) = True + -- not CmmLocal: that might invalidate the usage analysis results + isTiny _ = False + platform = targetPlatform dflags foldExp (CmmMachOp op args) = cmmMachOpFold platform op args foldExp e = e @@ -201,26 +197,28 @@ cmmMiniInlineStmts platform uses (stmt:stmts) -- register, and a list of statements. Inlines the expression at all -- use sites of the register. Returns the number of substituations -- made and the, possibly modified, list of statements. -lookForInlineLit :: Unique -> CmmExpr -> [CmmStmt] -> (Int, [CmmStmt]) -lookForInlineLit _ _ [] = (0, []) -lookForInlineLit u expr stmts@(stmt : rest) - | Just n <- lookupUFM (countUses stmt) u - = case lookForInlineLit u expr rest of - (m, stmts) -> let z = n + m - in z `seq` (z, inlineStmt u expr stmt : stmts) - - | ok_to_skip - = case lookForInlineLit u expr rest of +lookForInlineMany :: Unique -> CmmExpr -> [CmmStmt] -> (Int, [CmmStmt]) +lookForInlineMany u expr stmts = lookForInlineMany' u expr regset stmts + where regset = foldRegsUsed extendRegSet emptyRegSet expr + +lookForInlineMany' :: Unique -> CmmExpr -> RegSet -> [CmmStmt] -> (Int, [CmmStmt]) +lookForInlineMany' _ _ _ [] = (0, []) +lookForInlineMany' u expr regset stmts@(stmt : rest) + | Just n <- lookupUFM (countUses stmt) u, okToInline expr stmt + = let stmt' = inlineStmt u expr stmt in + if okToSkip stmt' u expr regset + then case lookForInlineMany' u expr regset rest of + (m, stmts) -> let z = n + m + in z `seq` (z, stmt' : stmts) + else (n, stmt' : rest) + + | okToSkip stmt u expr regset + = case lookForInlineMany' u expr regset rest of (n, stmts) -> (n, stmt : stmts) | otherwise = (0, stmts) - where - -- We skip over assignments to registers, unless the register - -- being assigned to is the one we're inlining. - ok_to_skip = case stmt of - CmmAssign (CmmLocal (LocalReg u' _)) _ | u' == u -> False - _other -> True + lookForInline :: Unique -> CmmExpr -> [CmmStmt] -> Maybe [CmmStmt] lookForInline u expr stmts = lookForInline' u expr regset stmts @@ -229,10 +227,10 @@ lookForInline u expr stmts = lookForInline' u expr regset stmts lookForInline' :: Unique -> CmmExpr -> RegSet -> [CmmStmt] -> Maybe [CmmStmt] lookForInline' _ _ _ [] = panic "lookForInline' []" lookForInline' u expr regset (stmt : rest) - | Just 1 <- lookupUFM (countUses stmt) u, ok_to_inline + | Just 1 <- lookupUFM (countUses stmt) u, okToInline expr stmt = Just (inlineStmt u expr stmt : rest) - | ok_to_skip + | okToSkip stmt u expr regset = case lookForInline' u expr regset rest of Nothing -> Nothing Just stmts -> Just (stmt:stmts) @@ -240,31 +238,36 @@ lookForInline' u expr regset (stmt : rest) | otherwise = Nothing - where - -- we don't inline into CmmCall if the expression refers to global - -- registers. This is a HACK to avoid global registers clashing with - -- C argument-passing registers, really the back-end ought to be able - -- to handle it properly, but currently neither PprC nor the NCG can - -- do it. See also CgForeignCall:load_args_into_temps. - ok_to_inline = case stmt of - CmmCall{} -> hasNoGlobalRegs expr - _ -> True - - -- Expressions aren't side-effecting. Temporaries may or may not - -- be single-assignment depending on the source (the old code - -- generator creates single-assignment code, but hand-written Cmm - -- and Cmm from the new code generator is not single-assignment.) - -- So we do an extra check to make sure that the register being - -- changed is not one we were relying on. I don't know how much of a - -- performance hit this is (we have to create a regset for every - -- instruction.) -- EZY - ok_to_skip = case stmt of - CmmNop -> True - CmmComment{} -> True - CmmAssign (CmmLocal r@(LocalReg u' _)) _rhs | u' /= u && not (r `elemRegSet` regset) -> True - CmmAssign g@(CmmGlobal _) _rhs -> not (g `regUsedIn` expr) - _other -> False +-- we don't inline into CmmCall if the expression refers to global +-- registers. This is a HACK to avoid global registers clashing with +-- C argument-passing registers, really the back-end ought to be able +-- to handle it properly, but currently neither PprC nor the NCG can +-- do it. See also CgForeignCall:load_args_into_temps. +okToInline :: CmmExpr -> CmmStmt -> Bool +okToInline expr CmmCall{} = hasNoGlobalRegs expr +okToInline _ _ = True + +-- Expressions aren't side-effecting. Temporaries may or may not +-- be single-assignment depending on the source (the old code +-- generator creates single-assignment code, but hand-written Cmm +-- and Cmm from the new code generator is not single-assignment.) +-- So we do an extra check to make sure that the register being +-- changed is not one we were relying on. I don't know how much of a +-- performance hit this is (we have to create a regset for every +-- instruction.) -- EZY +okToSkip stmt u expr regset + = case stmt of + CmmNop -> True + CmmComment{} -> True + CmmAssign (CmmLocal r@(LocalReg u' _)) _rhs | u' /= u && not (r `elemRegSet` regset) -> True + CmmAssign g@(CmmGlobal _) _rhs -> not (g `regUsedIn` expr) + CmmStore _ _ -> not_a_load expr + _other -> False + where + not_a_load (CmmMachOp _ args) = all not_a_load args + not_a_load (CmmLoad _ _) = False + not_a_load _ = True inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e) diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index 075ed22ea9..f46d49e022 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -230,35 +230,31 @@ lits :: { [ExtFCode CmmExpr] } cmmproc :: { ExtCode } -- TODO: add real SRT/info tables to parsed Cmm - : info maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}' - { do ((entry_ret_label, info, live, formals, gc_block, frame), stmts) <- + : info maybe_formals_without_hints '{' body '}' + { do ((entry_ret_label, info, live, formals), stmts) <- getCgStmtsEC' $ loopDecls $ do { (entry_ret_label, info, live) <- $1; formals <- sequence $2; - gc_block <- $3; - frame <- $4; - $6; - return (entry_ret_label, info, live, formals, gc_block, frame) } + $4; + return (entry_ret_label, info, live, formals) } blks <- code (cgStmtsToBlocks stmts) - code (emitInfoTableAndCode entry_ret_label (CmmInfo gc_block frame info) formals blks) } + code (emitInfoTableAndCode entry_ret_label info formals blks) } | info maybe_formals_without_hints ';' { do (entry_ret_label, info, live) <- $1; formals <- sequence $2; - code (emitInfoTableAndCode entry_ret_label (CmmInfo Nothing Nothing info) formals []) } + code (emitInfoTableAndCode entry_ret_label info formals []) } - | NAME maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}' + | NAME maybe_formals_without_hints '{' body '}' {% withThisPackage $ \pkg -> do newFunctionName $1 pkg - ((formals, gc_block, frame), stmts) <- + (formals, stmts) <- getCgStmtsEC' $ loopDecls $ do { formals <- sequence $2; - gc_block <- $3; - frame <- $4; - $6; - return (formals, gc_block, frame) } + $4; + return formals } blks <- code (cgStmtsToBlocks stmts) - code (emitProc (CmmInfo gc_block frame CmmNonInfoTable) (mkCmmCodeLabel pkg $1) formals blks) } + code (emitProc CmmNonInfoTable (mkCmmCodeLabel pkg $1) formals blks) } info :: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) } : 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')' @@ -599,18 +595,7 @@ formals_without_hints :: { [ExtFCode LocalReg] } formal_without_hint :: { ExtFCode LocalReg } : type NAME { newLocal $1 $2 } -maybe_frame :: { ExtFCode (Maybe UpdateFrame) } - : {- empty -} { return Nothing } - | 'jump' expr '(' exprs0 ')' { do { target <- $2; - args <- sequence $4; - return $ Just (UpdateFrame target args) } } - -maybe_gc_block :: { ExtFCode (Maybe BlockId) } - : {- empty -} { return Nothing } - | 'goto' NAME - { do l <- lookupLabel $2; return (Just l) } - -type :: { CmmType } +type :: { CmmType } : 'bits8' { b8 } | typenot8 { $1 } @@ -1073,7 +1058,8 @@ parseCmmFile dflags filename = do let msg = mkPlainErrMsg dflags span err return ((emptyBag, unitBag msg), Nothing) POk pst code -> do - cmm <- initC dflags no_module (getCmm (unEC code initEnv [] >> return ())) + st <- initC + let (cmm,_) = runC dflags no_module st (getCmm (unEC code initEnv [] >> return ())) let ms = getMessages pst if (errorsFound dflags ms) then return (ms, Nothing) diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs index 7361bbf385..adc27ab1ff 100644 --- a/compiler/cmm/CmmPipeline.hs +++ b/compiler/cmm/CmmPipeline.hs @@ -11,15 +11,17 @@ module CmmPipeline ( import CLabel import Cmm +import CmmLint import CmmLive import CmmBuildInfoTables import CmmCommonBlockElim import CmmProcPoint -import CmmSpillReload import CmmRewriteAssignments -import CmmStackLayout import CmmContFlowOpt import OptimizationFuel +import CmmLayoutStack +import Hoopl +import CmmUtils import DynFlags import ErrUtils @@ -28,6 +30,8 @@ import Data.Maybe import Control.Monad import Data.Map (Map) import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set import Outputable import StaticFlags @@ -53,32 +57,28 @@ import StaticFlags -- we actually need to do the initial pass. cmmPipeline :: HscEnv -- Compilation env including -- dynamic flags: -dcmm-lint -ddump-cps-cmm - -> (TopSRT, [CmmGroup]) -- SRT table and accumulating list of compiled procs + -> TopSRT -- SRT table and accumulating list of compiled procs -> CmmGroup -- Input C-- with Procedures - -> IO (TopSRT, [CmmGroup]) -- Output CPS transformed C-- -cmmPipeline hsc_env (topSRT, rst) prog = + -> IO (TopSRT, CmmGroup) -- Output CPS transformed C-- +cmmPipeline hsc_env topSRT prog = do let dflags = hsc_dflags hsc_env -- showPass dflags "CPSZ" - let tops = runCmmContFlowOpts prog - (cafEnvs, tops) <- liftM unzip $ mapM (cpsTop hsc_env) tops + (cafEnvs, tops) <- {-# SCC "tops" #-} liftM unzip $ mapM (cpsTop hsc_env) prog -- tops :: [[(CmmDecl,CAFSet]] (one list per group) - let topCAFEnv = mkTopCAFInfo (concat cafEnvs) + let topCAFEnv = {-# SCC "topCAFEnv" #-} mkTopCAFInfo (concat cafEnvs) -- folding over the groups - (topSRT, tops) <- foldM (toTops hsc_env topCAFEnv) (topSRT, []) tops + (topSRT, tops) <- {-# SCC "toTops" #-} foldM (toTops hsc_env topCAFEnv) (topSRT, []) tops let cmms :: CmmGroup cmms = reverse (concat tops) dumpIfSet_dyn dflags Opt_D_dump_cps_cmm "Post CPS Cmm" (ppr cmms) - -- SRT is not affected by control flow optimization pass - let prog' = runCmmContFlowOpts cmms - - return (topSRT, prog' : rst) + return (topSRT, cmms) {- [Note global fuel] ~~~~~~~~~~~~~~~~~~~~~ @@ -92,75 +92,63 @@ global to one compiler session. -- -ddump-cmmz cpsTop :: HscEnv -> CmmDecl -> IO ([(CLabel, CAFSet)], [(CAFSet, CmmDecl)]) -cpsTop _ p@(CmmData {}) = return ([], [(Map.empty, p)]) +cpsTop _ p@(CmmData {}) = return ([], [(Set.empty, p)]) cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) l g) = do - -- Why bother doing these early: dualLivenessWithInsertion, - -- insertLateReloads, rewriteAssignments? + ----------- Control-flow optimisations --------------- + g <- {-# SCC "cmmCfgOpts(1)" #-} return $ cmmCfgOpts g + dump Opt_D_dump_cmmz_cfg "Post control-flow optimsations" g ----------- Eliminate common blocks ------------------- - g <- return $ elimCommonBlocks g + g <- {-# SCC "elimCommonBlocks" #-} return $ elimCommonBlocks g dump Opt_D_dump_cmmz_cbe "Post common block elimination" g - -- Any work storing block Labels must be performed _after_ elimCommonBlocks + -- Any work storing block Labels must be performed _after_ + -- elimCommonBlocks ----------- Proc points ------------------- - let callPPs = callProcPoints g - procPoints <- run $ minimalProcPointSet (targetPlatform dflags) callPPs g - g <- run $ addProcPointProtocols callPPs procPoints g - dump Opt_D_dump_cmmz_proc "Post Proc Points Added" g - - ----------- Spills and reloads ------------------- - g <- run $ dualLivenessWithInsertion procPoints g - dump Opt_D_dump_cmmz_spills "Post spills and reloads" g - - ----------- Sink and inline assignments ------------------- - g <- runOptimization $ rewriteAssignments platform g - dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g - - ----------- Eliminate dead assignments ------------------- - g <- runOptimization $ removeDeadAssignments g - dump Opt_D_dump_cmmz_dead "Post remove dead assignments" g - - ----------- Zero dead stack slots (Debug only) --------------- - -- Debugging: stubbing slots on death can cause crashes early - g <- if opt_StubDeadValues - then run $ stubSlotsOnDeath g - else return g - dump Opt_D_dump_cmmz_stub "Post stub dead stack slots" g - - --------------- Stack layout ---------------- - slotEnv <- run $ liveSlotAnal g - let spEntryMap = getSpEntryMap entry_off g - mbpprTrace "live slot analysis results: " (ppr slotEnv) $ return () - let areaMap = layout procPoints spEntryMap slotEnv entry_off g - mbpprTrace "areaMap" (ppr areaMap) $ return () - - ------------ Manifest the stack pointer -------- - g <- run $ manifestSP spEntryMap areaMap entry_off g - dump Opt_D_dump_cmmz_sp "Post manifestSP" g - -- UGH... manifestSP can require updates to the procPointMap. - -- We can probably do something quicker here for the update... + let callPPs = {-# SCC "callProcPoints" #-} callProcPoints g + procPoints <- {-# SCC "minimalProcPointSet" #-} run $ + minimalProcPointSet (targetPlatform dflags) callPPs g + + ----------- Layout the stack and manifest Sp --------------- + -- (also does: removeDeadAssignments, and lowerSafeForeignCalls) + (g, stackmaps) <- {-# SCC "layoutStack" #-} + run $ cmmLayoutStack procPoints entry_off g + dump Opt_D_dump_cmmz_sp "Layout Stack" g + + g <- {-# SCC "sink" #-} run $ cmmSink g + dump Opt_D_dump_cmmz_rewrite "Sink assignments" g + +-- ----------- Sink and inline assignments ------------------- +-- g <- {-# SCC "rewriteAssignments" #-} runOptimization $ +-- rewriteAssignments platform g +-- dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g ------------- Split into separate procedures ------------ - procPointMap <- run $ procPointAnalysis procPoints g - dump Opt_D_dump_cmmz_procmap "procpoint map" procPointMap - gs <- run $ splitAtProcPoints l callPPs procPoints procPointMap - (CmmProc h l g) - mapM_ (dump Opt_D_dump_cmmz_split "Post splitting") gs - - ------------- More CAFs and foreign calls ------------ - cafEnv <- run $ cafAnal g - let localCAFs = catMaybes $ map (localCAFInfo cafEnv) gs + procPointMap <- {-# SCC "procPointAnalysis" #-} run $ + procPointAnalysis procPoints g + dumpWith dflags Opt_D_dump_cmmz_procmap "procpoint map" procPointMap + gs <- {-# SCC "splitAtProcPoints" #-} run $ + splitAtProcPoints l callPPs procPoints procPointMap (CmmProc h l g) + dumps Opt_D_dump_cmmz_split "Post splitting" gs + + ------------- More CAFs ------------------------------ + let cafEnv = {-# SCC "cafAnal" #-} cafAnal platform g + let localCAFs = {-# SCC "localCAFs" #-} catMaybes $ map (localCAFInfo platform cafEnv) gs mbpprTrace "localCAFs" (ppr localCAFs) $ return () - gs <- run $ mapM (lowerSafeForeignCalls areaMap) gs - mapM_ (dump Opt_D_dump_cmmz_lower "Post lowerSafeForeignCalls") gs - -- NO MORE GRAPH TRANSFORMATION AFTER HERE -- JUST MAKING INFOTABLES - gs <- return $ map (setInfoTableStackMap slotEnv areaMap) gs - mapM_ (dump Opt_D_dump_cmmz_info "after setInfoTableStackMap") gs - gs <- return $ map (bundleCAFs cafEnv) gs - mapM_ (dump Opt_D_dump_cmmz_cafs "after bundleCAFs") gs + gs <- {-# SCC "setInfoTableStackMap" #-} + return $ map (setInfoTableStackMap stackmaps) gs + dumps Opt_D_dump_cmmz_info "after setInfoTableStackMap" gs + + ----------- Control-flow optimisations --------------- + gs <- {-# SCC "cmmCfgOpts(2)" #-} return $ map cmmCfgOptsProc gs + dumps Opt_D_dump_cmmz_cfg "Post control-flow optimsations" gs + + gs <- {-# SCC "bundleCAFs" #-} return $ map (bundleCAFs cafEnv) gs + dumps Opt_D_dump_cmmz_cafs "after bundleCAFs" gs + return (localCAFs, gs) -- gs :: [ (CAFSet, CmmDecl) ] @@ -168,21 +156,40 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) where dflags = hsc_dflags hsc_env platform = targetPlatform dflags - mbpprTrace x y z = if dopt Opt_D_dump_cmmz dflags then pprTrace x y z else z - dump f = dumpWith ppr f - dumpWith pprFun f txt g = do - -- ToDo: No easy way of say "dump all the cmmz, *and* split - -- them into files." Also, -ddump-cmmz doesn't play nicely - -- with -ddump-to-file, since the headers get omitted. - dumpIfSet_dyn dflags f txt (pprFun g) - when (not (dopt f dflags)) $ - dumpIfSet_dyn dflags Opt_D_dump_cmmz txt (pprFun g) + mbpprTrace x y z | dopt Opt_D_dump_cmmz dflags = pprTrace x y z + | otherwise = z + dump = dumpGraph dflags + + dumps flag name + = mapM_ (dumpWith dflags flag name) + -- Runs a required transformation/analysis run = runInfiniteFuelIO (hsc_OptFuel hsc_env) -- Runs an optional transformation/analysis (and should -- thus be subject to optimization fuel) runOptimization = runFuelIO (hsc_OptFuel hsc_env) + +dumpGraph :: DynFlags -> DynFlag -> String -> CmmGraph -> IO () +dumpGraph dflags flag name g = do + when (dopt Opt_DoCmmLinting dflags) $ do_lint g + dumpWith dflags flag name g + where + do_lint g = case cmmLintGraph (targetPlatform dflags) g of + Just err -> do { printDump err + ; ghcExit dflags 1 + } + Nothing -> return () + +dumpWith :: Outputable a => DynFlags -> DynFlag -> String -> a -> IO () +dumpWith dflags flag txt g = do + -- ToDo: No easy way of say "dump all the cmmz, *and* split + -- them into files." Also, -ddump-cmmz doesn't play nicely + -- with -ddump-to-file, since the headers get omitted. + dumpIfSet_dyn dflags flag txt (ppr g) + when (not (dopt flag dflags)) $ + dumpIfSet_dyn dflags Opt_D_dump_cmmz txt (ppr g) + -- This probably belongs in CmmBuildInfoTables? -- We're just finishing the job here: once we know what CAFs are defined -- in non-static closures, we can build the SRTs. diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs index f50d850b3a..8dda51b9b7 100644 --- a/compiler/cmm/CmmProcPoint.hs +++ b/compiler/cmm/CmmProcPoint.hs @@ -4,7 +4,7 @@ module CmmProcPoint ( ProcPointSet, Status(..) , callProcPoints, minimalProcPointSet - , addProcPointProtocols, splitAtProcPoints, procPointAnalysis + , splitAtProcPoints, procPointAnalysis ) where @@ -28,7 +28,7 @@ import Platform import UniqSet import UniqSupply -import Compiler.Hoopl +import Hoopl import qualified Data.Map as Map @@ -103,34 +103,50 @@ instance Outputable Status where (hsep $ punctuate comma $ map ppr $ setElems ps) ppr ProcPoint = text "<procpt>" -lattice :: DataflowLattice Status -lattice = DataflowLattice "direct proc-point reachability" unreached add_to - where unreached = ReachedBy setEmpty - add_to _ (OldFact ProcPoint) _ = (NoChange, ProcPoint) - add_to _ _ (NewFact ProcPoint) = (SomeChange, ProcPoint) -- because of previous case - add_to _ (OldFact (ReachedBy p)) (NewFact (ReachedBy p')) = - let union = setUnion p' p - in if setSize union > setSize p then (SomeChange, ReachedBy union) - else (NoChange, ReachedBy p) -------------------------------------------------- +-- Proc point analysis + +procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelUniqSM (BlockEnv Status) +-- Once you know what the proc-points are, figure out +-- what proc-points each block is reachable from +procPointAnalysis procPoints g = + -- pprTrace "procPointAnalysis" (ppr procPoints) $ + dataflowAnalFwdBlocks g initProcPoints $ analFwd lattice forward + where initProcPoints = [(id, ProcPoint) | id <- setElems procPoints] + -- transfer equations forward :: FwdTransfer CmmNode Status -forward = mkFTransfer3 first middle ((mkFactBase lattice . ) . last) - where first :: CmmNode C O -> Status -> Status - first (CmmEntry id) ProcPoint = ReachedBy $ setSingleton id - first _ x = x +forward = mkFTransfer3 first middle last + where + first :: CmmNode C O -> Status -> Status + first (CmmEntry id) ProcPoint = ReachedBy $ setSingleton id + first _ x = x - middle _ x = x + middle _ x = x - last :: CmmNode O C -> Status -> [(Label, Status)] - last (CmmCall {cml_cont = Just k}) _ = [(k, ProcPoint)] - last (CmmForeignCall {succ = k}) _ = [(k, ProcPoint)] - last l x = map (\id -> (id, x)) (successors l) + last :: CmmNode O C -> Status -> FactBase Status + last l x = mkFactBase lattice $ map (\id -> (id, x)) (successors l) --- It is worth distinguishing two sets of proc points: --- those that are induced by calls in the original graph --- and those that are introduced because they're reachable from multiple proc points. +lattice :: DataflowLattice Status +lattice = DataflowLattice "direct proc-point reachability" unreached add_to + where unreached = ReachedBy setEmpty + add_to _ (OldFact ProcPoint) _ = (NoChange, ProcPoint) + add_to _ _ (NewFact ProcPoint) = (SomeChange, ProcPoint) + -- because of previous case + add_to _ (OldFact (ReachedBy p)) (NewFact (ReachedBy p')) + | setSize union > setSize p = (SomeChange, ReachedBy union) + | otherwise = (NoChange, ReachedBy p) + where + union = setUnion p' p + +---------------------------------------------------------------------- + +-- It is worth distinguishing two sets of proc points: those that are +-- induced by calls in the original graph and those that are +-- introduced because they're reachable from multiple proc points. +-- +-- Extract the set of Continuation BlockIds, see Note [Continuation BlockIds]. callProcPoints :: CmmGraph -> ProcPointSet callProcPoints g = foldGraphBlocks add (setSingleton (g_entry g)) g where add :: CmmBlock -> BlockSet -> BlockSet @@ -139,21 +155,17 @@ callProcPoints g = foldGraphBlocks add (setSingleton (g_entry g)) g CmmForeignCall {succ=k} -> setInsert k set _ -> set -minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph -> FuelUniqSM ProcPointSet +minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph + -> FuelUniqSM ProcPointSet -- Given the set of successors of calls (which must be proc-points) -- figure out the minimal set of necessary proc-points -minimalProcPointSet platform callProcPoints g = extendPPSet platform g (postorderDfs g) callProcPoints - -procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelUniqSM (BlockEnv Status) --- Once you know what the proc-points are, figure out --- what proc-points each block is reachable from -procPointAnalysis procPoints g = - liftM snd $ dataflowPassFwd g initProcPoints $ analFwd lattice forward - where initProcPoints = [(id, ProcPoint) | id <- setElems procPoints] +minimalProcPointSet platform callProcPoints g + = extendPPSet platform g (postorderDfs g) callProcPoints extendPPSet :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> FuelUniqSM ProcPointSet extendPPSet platform g blocks procPoints = do env <- procPointAnalysis procPoints g + -- pprTrace "extensPPSet" (ppr env) $ return () let add block pps = let id = entryLabel block in case mapLookup id env of Just ProcPoint -> setInsert id pps @@ -179,183 +191,13 @@ extendPPSet platform g blocks procPoints = pps -> extendPPSet g blocks (foldl extendBlockSet procPoints' pps) -} - case newPoint of Just id -> - if setMember id procPoints' then panic "added old proc pt" - else extendPPSet platform g blocks (setInsert id procPoints') - Nothing -> return procPoints' - - ------------------------------------------------------------------------- --- Computing Proc-Point Protocols -- ------------------------------------------------------------------------- + case newPoint of + Just id -> + if setMember id procPoints' + then panic "added old proc pt" + else extendPPSet platform g blocks (setInsert id procPoints') + Nothing -> return procPoints' -{- - -There is one major trick, discovered by Michael Adams, which is that -we want to choose protocols in a way that enables us to optimize away -some continuations. The optimization is very much like branch-chain -elimination, except that it involves passing results as well as -control. The idea is that if a call's continuation k does nothing but -CopyIn its results and then goto proc point P, the call's continuation -may be changed to P, *provided* P's protocol is identical to the -protocol for the CopyIn. We choose protocols to make this so. - -Here's an explanatory example; we begin with the source code (lines -separate basic blocks): - - ..1..; - x, y = g(); - goto P; - ------- - P: ..2..; - -Zipperization converts this code as follows: - - ..1..; - call g() returns to k; - ------- - k: CopyIn(x, y); - goto P; - ------- - P: ..2..; - -What we'd like to do is assign P the same CopyIn protocol as k, so we -can eliminate k: - - ..1..; - call g() returns to P; - ------- - P: CopyIn(x, y); ..2..; - -Of course, P may be the target of more than one continuation, and -different continuations may have different protocols. Michael Adams -implemented a voting mechanism, but he thinks a simple greedy -algorithm would be just as good, so that's what we do. - --} - -data Protocol = Protocol Convention [CmmFormal] Area - deriving Eq -instance Outputable Protocol where - ppr (Protocol c fs a) = text "Protocol" <+> ppr c <+> ppr fs <+> ppr a - --- | Function 'optimize_calls' chooses protocols only for those proc --- points that are relevant to the optimization explained above. --- The others are assigned by 'add_unassigned', which is not yet clever. - -addProcPointProtocols :: ProcPointSet -> ProcPointSet -> CmmGraph -> FuelUniqSM CmmGraph -addProcPointProtocols callPPs procPoints g = - do liveness <- cmmLiveness g - (protos, g') <- optimize_calls liveness g - blocks'' <- add_CopyOuts protos procPoints g' - return $ ofBlockMap (g_entry g) blocks'' - where optimize_calls liveness g = -- see Note [Separate Adams optimization] - do let (protos, blocks') = - foldGraphBlocks maybe_add_call (mapEmpty, mapEmpty) g - protos' = add_unassigned liveness procPoints protos - let g' = ofBlockMap (g_entry g) (add_CopyIns callPPs protos' blocks') - return (protos', removeUnreachableBlocks g') - maybe_add_call :: CmmBlock -> (BlockEnv Protocol, BlockEnv CmmBlock) - -> (BlockEnv Protocol, BlockEnv CmmBlock) - -- ^ If the block is a call whose continuation goes to a proc point - -- whose protocol either matches the continuation's or is not yet set, - -- redirect the call (cf 'newblock') and set the protocol if necessary - maybe_add_call block (protos, blocks) = - case lastNode block of - CmmCall tgt (Just k) args res s - | Just proto <- mapLookup k protos, - Just pee <- branchesToProcPoint k - -> let newblock = replaceLastNode block (CmmCall tgt (Just pee) - args res s) - changed_blocks = insertBlock newblock blocks - unchanged_blocks = insertBlock block blocks - in case mapLookup pee protos of - Nothing -> (mapInsert pee proto protos, changed_blocks) - Just proto' -> - if proto == proto' then (protos, changed_blocks) - else (protos, unchanged_blocks) - _ -> (protos, insertBlock block blocks) - - branchesToProcPoint :: BlockId -> Maybe BlockId - -- ^ Tells whether the named block is just a branch to a proc point - branchesToProcPoint id = - let block = mapLookup id (toBlockMap g) `orElse` - panic "branch out of graph" - in case blockToNodeList block of - (_, [], JustC (CmmBranch pee)) | setMember pee procPoints -> Just pee - _ -> Nothing - --- | For now, following a suggestion by Ben Lippmeier, we pass all --- live variables as arguments, hoping that a clever register --- allocator might help. - -add_unassigned :: BlockEnv CmmLive -> ProcPointSet -> BlockEnv Protocol -> - BlockEnv Protocol -add_unassigned = pass_live_vars_as_args - -pass_live_vars_as_args :: BlockEnv CmmLive -> ProcPointSet -> - BlockEnv Protocol -> BlockEnv Protocol -pass_live_vars_as_args _liveness procPoints protos = protos' - where protos' = setFold addLiveVars protos procPoints - addLiveVars :: BlockId -> BlockEnv Protocol -> BlockEnv Protocol - addLiveVars id protos = - case mapLookup id protos of - Just _ -> protos - Nothing -> let live = emptyRegSet - --lookupBlockEnv _liveness id `orElse` - --panic ("no liveness at block " ++ show id) - formals = uniqSetToList live - prot = Protocol Private formals $ CallArea $ Young id - in mapInsert id prot protos - - --- | Add copy-in instructions to each proc point that did not arise from a call --- instruction. (Proc-points that arise from calls already have their copy-in instructions.) - -add_CopyIns :: ProcPointSet -> BlockEnv Protocol -> BlockEnv CmmBlock -> BlockEnv CmmBlock -add_CopyIns callPPs protos blocks = mapFold maybe_insert_CopyIns mapEmpty blocks - where maybe_insert_CopyIns block blocks - | not $ setMember bid callPPs - , Just (Protocol c fs _area) <- mapLookup bid protos - = let nodes = copyInSlot c fs - (h, m, l) = blockToNodeList block - in insertBlock (blockOfNodeList (h, nodes ++ m, l)) blocks - | otherwise = insertBlock block blocks - where bid = entryLabel block - - --- | Add a CopyOut node before each procpoint. --- If the predecessor is a call, then the copy outs should already be done by the callee. --- Note: If we need to add copy-out instructions, they may require stack space, --- so we accumulate a map from the successors to the necessary stack space, --- then update the successors after we have finished inserting the copy-outs. - -add_CopyOuts :: BlockEnv Protocol -> ProcPointSet -> CmmGraph -> - FuelUniqSM (BlockEnv CmmBlock) -add_CopyOuts protos procPoints g = foldGraphBlocks mb_copy_out (return mapEmpty) g - where mb_copy_out :: CmmBlock -> FuelUniqSM (BlockEnv CmmBlock) -> - FuelUniqSM (BlockEnv CmmBlock) - mb_copy_out b z | entryLabel b == g_entry g = skip b z - mb_copy_out b z = - case lastNode b of - CmmCall {} -> skip b z -- copy out done by callee - CmmForeignCall {} -> skip b z -- copy out done by callee - _ -> copy_out b z - copy_out b z = foldr trySucc init (successors b) >>= finish - where init = (\bmap -> (b, bmap)) `liftM` z - trySucc succId z = - if setMember succId procPoints then - case mapLookup succId protos of - Nothing -> z - Just (Protocol c fs _area) -> insert z succId $ copyOutSlot c fs - else z - insert z succId m = - do (b, bmap) <- z - (b, bs) <- insertBetween b m succId - -- pprTrace "insert for succ" (ppr succId <> ppr m) $ do - return $ (b, foldl (flip insertBlock) bmap bs) - finish (b, bmap) = return $ insertBlock b bmap - skip b bs = insertBlock b `liftM` bs -- At this point, we have found a set of procpoints, each of which should be -- the entry point of a procedure. @@ -384,15 +226,18 @@ splitAtProcPoints entry_label callPPs procPoints procMap [] -> graphEnv [id] -> add graphEnv id bid b _ -> panic "Each block should be reachable from only one ProcPoint" - Nothing -> pprPanic "block not reached by a proc point?" (ppr bid) + Nothing -> graphEnv where bid = entryLabel b add graphEnv procId bid b = mapInsert procId graph' graphEnv where graph = mapLookup procId graphEnv `orElse` mapEmpty graph' = mapInsert bid b graph + graphEnv <- return $ foldGraphBlocks addBlock emptyBlockMap g + -- Build a map from proc point BlockId to pairs of: -- * Labels for their new procedures - -- * Labels for the info tables of their new procedures (only if the proc point is a callPP) + -- * Labels for the info tables of their new procedures (only if + -- the proc point is a callPP) -- Due to common blockification, we may overestimate the set of procpoints. let add_label map pp = Map.insert pp lbls map where lbls | pp == entry = (entry_label, Just entry_info_lbl) @@ -401,30 +246,15 @@ splitAtProcPoints entry_label callPPs procPoints procMap entry_info_lbl = cit_lbl info_tbl procLabels = foldl add_label Map.empty (filter (flip mapMember (toBlockMap g)) (setElems procPoints)) - -- For each procpoint, we need to know the SP offset on entry. - -- If the procpoint is: - -- - continuation of a call, the SP offset is in the call - -- - otherwise, 0 (and left out of the spEntryMap) - let add_sp_off :: CmmBlock -> BlockEnv CmmStackInfo -> BlockEnv CmmStackInfo - add_sp_off b env = - case lastNode b of - CmmCall {cml_cont = Just succ, cml_ret_args = off, cml_ret_off = updfr_off} -> - mapInsert succ (StackInfo { arg_space = off, updfr_space = Just updfr_off}) env - CmmForeignCall {succ = succ, updfr = updfr_off} -> - mapInsert succ (StackInfo { arg_space = wORD_SIZE, updfr_space = Just updfr_off}) env - _ -> env - spEntryMap = foldGraphBlocks add_sp_off (mapInsert entry stack_info emptyBlockMap) g - getStackInfo id = mapLookup id spEntryMap `orElse` StackInfo {arg_space = 0, updfr_space = Nothing} -- In each new graph, add blocks jumping off to the new procedures, -- and replace branches to procpoints with branches to the jump-off blocks let add_jump_block (env, bs) (pp, l) = do bid <- liftM mkBlockId getUniqueM - let b = blockOfNodeList (JustC (CmmEntry bid), [], JustC jump) - StackInfo {arg_space = argSpace, updfr_space = off} = getStackInfo pp - jump = CmmCall (CmmLit (CmmLabel l)) Nothing argSpace 0 - (off `orElse` 0) -- Jump's shouldn't need the offset... + let b = blockJoin (CmmEntry bid) emptyBlock jump + jump = CmmCall (CmmLit (CmmLabel l)) Nothing 0 0 0 return (mapInsert pp bid env, b : bs) - add_jumps (newGraphEnv) (ppId, blockEnv) = + + add_jumps newGraphEnv (ppId, blockEnv) = do let needed_jumps = -- find which procpoints we currently branch to mapFold add_if_branch_to_pp [] blockEnv add_if_branch_to_pp :: CmmBlock -> [(BlockId, CLabel)] -> [(BlockId, CLabel)] @@ -441,17 +271,16 @@ splitAtProcPoints entry_label callPPs procPoints procMap foldM add_jump_block (mapEmpty, []) needed_jumps -- update the entry block let b = expectJust "block in env" $ mapLookup ppId blockEnv - off = getStackInfo ppId blockEnv' = mapInsert ppId b blockEnv -- replace branches to procpoints with branches to jumps blockEnv'' = toBlockMap $ replaceBranches jumpEnv $ ofBlockMap ppId blockEnv' -- add the jump blocks to the graph blockEnv''' = foldl (flip insertBlock) blockEnv'' jumpBlocks - let g' = (off, ofBlockMap ppId blockEnv''') + let g' = ofBlockMap ppId blockEnv''' -- pprTrace "g' pre jumps" (ppr g') $ do return (mapInsert ppId g' newGraphEnv) graphEnv <- foldM add_jumps emptyBlockMap $ mapToList graphEnv - let to_proc (bid, (stack_info, g)) = case expectJust "pp label" $ Map.lookup bid procLabels of + let to_proc (bid, g) = case expectJust "pp label" $ Map.lookup bid procLabels of (lbl, Just info_lbl) | bid == entry -> CmmProc (TopInfo {info_tbl=info_tbl, stack_info=stack_info}) @@ -462,15 +291,22 @@ splitAtProcPoints entry_label callPPs procPoints procMap (lbl, Nothing) -> CmmProc (TopInfo {info_tbl=CmmNonInfoTable, stack_info=stack_info}) lbl (replacePPIds g) - -- References to procpoint IDs can now be replaced with the infotable's label - replacePPIds g = mapGraphNodes (id, mapExp repl, mapExp repl) g + where + stack_info = panic "No StackInfo" + + -- References to procpoint IDs can now be replaced with the + -- infotable's label + replacePPIds g = {-# SCC "replacePPIds" #-} + mapGraphNodes (id, mapExp repl, mapExp repl) g where repl e@(CmmLit (CmmBlock bid)) = case Map.lookup bid procLabels of Just (_, Just info_lbl) -> CmmLit (CmmLabel info_lbl) _ -> e repl e = e - -- The C back end expects to see return continuations before the call sites. - -- Here, we sort them in reverse order -- it gets reversed later. + + -- The C back end expects to see return continuations before the + -- call sites. Here, we sort them in reverse order -- it gets + -- reversed later. let (_, block_order) = foldl add_block_num (0::Int, emptyBlockMap) (postorderDfs g) add_block_num (i, map) block = (i+1, mapInsert (entryLabel block) i map) sort_fn (bid, _) (bid', _) = @@ -482,6 +318,27 @@ splitAtProcPoints entry_label callPPs procPoints procMap procs splitAtProcPoints _ _ _ _ t@(CmmData _ _) = return [t] + +-- Only called from CmmProcPoint.splitAtProcPoints. NB. does a +-- recursive lookup, see comment below. +replaceBranches :: BlockEnv BlockId -> CmmGraph -> CmmGraph +replaceBranches env cmmg + = {-# SCC "replaceBranches" #-} + ofBlockMap (g_entry cmmg) $ mapMap f $ toBlockMap cmmg + where + f block = replaceLastNode block $ last (lastNode block) + + last :: CmmNode O C -> CmmNode O C + last (CmmBranch id) = CmmBranch (lookup id) + last (CmmCondBranch e ti fi) = CmmCondBranch e (lookup ti) (lookup fi) + last (CmmSwitch e tbl) = CmmSwitch e (map (fmap lookup) tbl) + last l@(CmmCall {}) = l + last l@(CmmForeignCall {}) = l + lookup id = fmap lookup (mapLookup id env) `orElse` id + -- XXX: this is a recursive lookup, it follows chains + -- until the lookup returns Nothing, at which point we + -- return the last BlockId + ---------------------------------------------------------------- {- diff --git a/compiler/cmm/CmmRewriteAssignments.hs b/compiler/cmm/CmmRewriteAssignments.hs index ecf3f7e0c3..2c33b7b5ac 100644 --- a/compiler/cmm/CmmRewriteAssignments.hs +++ b/compiler/cmm/CmmRewriteAssignments.hs @@ -27,7 +27,7 @@ import UniqFM import Unique import BlockId -import Compiler.Hoopl hiding (Unique) +import Hoopl import Data.Maybe import Prelude hiding (succ, zip) @@ -404,8 +404,8 @@ clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False -- ToDo: Also catch MachOp case clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _) | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?) -clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr - where f (CmmLoad (CmmStackSlot (CallArea a') o') t) +clobbers (CmmStackSlot a o, rhs) (_, expr) = f expr + where f (CmmLoad (CmmStackSlot a' o') t) = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t)) f (CmmLoad e _) = containsStackSlot e f (CmmMachOp _ es) = or (map f es) @@ -416,9 +416,6 @@ clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es) containsStackSlot (CmmStackSlot{}) = True containsStackSlot _ = False -clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr - where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l' - f _ = False clobbers _ (_, e) = f e where f (CmmLoad (CmmStackSlot _ _) _) = False f (CmmLoad{}) = True -- conservative @@ -432,7 +429,7 @@ clobbers _ (_, e) = f e -- [ I32 ] -- [ F64 ] -- s' -w'- o' -type CallSubArea = (AreaId, Int, Int) -- area, offset, width +type CallSubArea = (Area, Int, Int) -- area, offset, width overlaps :: CallSubArea -> CallSubArea -> Bool overlaps (a, _, _) (a', _, _) | a /= a' = False overlaps (_, o, w) (_, o', w') = @@ -457,7 +454,7 @@ invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap invalidateVolatile k m = mapUFM p m where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize where exp CmmLit{} = True - exp (CmmLoad (CmmStackSlot (CallArea (Young k')) _) _) + exp (CmmLoad (CmmStackSlot (Young k') _) _) | k' == k = False exp (CmmLoad (CmmStackSlot _ _) _) = True exp (CmmMachOp _ es) = and (map exp es) @@ -596,10 +593,6 @@ assignmentRewrite = mkFRewrite3 first middle last where rep = typeWidth (localRegType r) _ -> old -- See Note [Soundness of store rewriting] - inlineExp assign old@(CmmLoad (CmmStackSlot (RegSlot r) _) _) - = case lookupUFM assign r of - Just (AlwaysInline x) -> x - _ -> old inlineExp _ old = old inlinable :: CmmNode e x -> Bool diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs deleted file mode 100644 index 9e762fe48a..0000000000 --- a/compiler/cmm/CmmSpillReload.hs +++ /dev/null @@ -1,166 +0,0 @@ -{-# LANGUAGE GADTs, NoMonoLocalBinds, FlexibleContexts #-} --- Norman likes local bindings --- If this module lives on I'd like to get rid of this flag in due course - -{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} - --- TODO: Get rid of this flag: -{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} - -module CmmSpillReload - ( dualLivenessWithInsertion - ) -where - -import BlockId -import Cmm -import CmmUtils -import CmmLive -import OptimizationFuel - -import Control.Monad -import Outputable hiding (empty) -import qualified Outputable as PP -import UniqSet - -import Compiler.Hoopl hiding (Unique) -import Data.Maybe -import Prelude hiding (succ, zip) - -{- Note [Overview of spill/reload] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The point of this module is to insert spills and reloads to establish -the invariant that at a call or any proc point with an established -protocol all live variables not expected in registers are sitting on the -stack. We use a backward dual liveness analysis (both traditional -register liveness as well as register slot liveness on the stack) to -insert spills and reloads. It should be followed by a forward -transformation to sink reloads as deeply as possible, so as to reduce -register pressure: this transformation is performed by -CmmRewriteAssignments. - -A variable can be expected to be live in a register, live on the -stack, or both. This analysis ensures that spills and reloads are -inserted as needed to make sure that every live variable needed -after a call is available on the stack. Spills are placed immediately -after their reaching definitions, but reloads are placed immediately -after a return from a call (the entry point.) - -Note that we offer no guarantees about the consistency of the value -in memory and the value in the register, except that they are -equal across calls/procpoints. If the variable is changed, this -mapping breaks: but as the original value of the register may still -be useful in a different context, the memory location is not updated. --} - -data DualLive = DualLive { on_stack :: RegSet, in_regs :: RegSet } - -changeStack, changeRegs :: (RegSet -> RegSet) -> DualLive -> DualLive -changeStack f live = live { on_stack = f (on_stack live) } -changeRegs f live = live { in_regs = f (in_regs live) } - -dualLiveLattice :: DataflowLattice DualLive -dualLiveLattice = DataflowLattice "variables live in registers and on stack" empty add - where empty = DualLive emptyRegSet emptyRegSet - add _ (OldFact old) (NewFact new) = (changeIf $ change1 || change2, DualLive stack regs) - where (change1, stack) = add1 (on_stack old) (on_stack new) - (change2, regs) = add1 (in_regs old) (in_regs new) - add1 old new = if sizeUniqSet join > sizeUniqSet old then (True, join) else (False, old) - where join = unionUniqSets old new - -dualLivenessWithInsertion :: BlockSet -> CmmGraph -> FuelUniqSM CmmGraph -dualLivenessWithInsertion procPoints g = - liftM fst $ dataflowPassBwd g [] $ analRewBwd dualLiveLattice - (dualLiveTransfers (g_entry g) procPoints) - (insertSpillsAndReloads g procPoints) - --- Note [Live registers on entry to procpoints] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- Remember that the transfer function is only ever run on the rewritten --- version of a graph, and the rewrite function for spills and reloads --- enforces the invariant that no local registers are live on entry to --- a procpoint. Accordingly, we check for this invariant here. An old --- version of this code incorrectly claimed that any live registers were --- live on the stack before entering the function: this is wrong, but --- didn't cause bugs because it never actually was invoked. - -dualLiveTransfers :: BlockId -> BlockSet -> (BwdTransfer CmmNode DualLive) -dualLiveTransfers entry procPoints = mkBTransfer3 first middle last - where first :: CmmNode C O -> DualLive -> DualLive - first (CmmEntry id) live -- See Note [Live registers on entry to procpoints] - | id == entry || setMember id procPoints = noLiveOnEntry id (in_regs live) live - | otherwise = live - - middle :: CmmNode O O -> DualLive -> DualLive - middle m = changeStack updSlots - . changeRegs updRegs - where -- Reuse middle of liveness analysis from CmmLive - updRegs = case getBTransfer3 xferLive of (_, middle, _) -> middle m - - updSlots live = foldSlotsUsed reload (foldSlotsDefd spill live m) m - spill live s@(RegSlot r, _, _) = check s $ deleteFromRegSet live r - spill live _ = live - reload live s@(RegSlot r, _, _) = check s $ extendRegSet live r - reload live _ = live - -- Ensure the assignment refers to the entirety of the - -- register slot (and not just a slice). - check (RegSlot (LocalReg _ ty), o, w) x - | o == w && w == widthInBytes (typeWidth ty) = x - check _ _ = panic "dualLiveTransfers: slices unsupported" - - -- Register analysis is identical to liveness analysis from CmmLive. - last :: CmmNode O C -> FactBase DualLive -> DualLive - last l fb = changeRegs (gen_kill l) $ case l of - CmmCall {cml_cont=Nothing} -> empty - CmmCall {cml_cont=Just k} -> keep_stack_only k - CmmForeignCall {succ=k} -> keep_stack_only k - _ -> joinOutFacts dualLiveLattice l fb - where empty = fact_bot dualLiveLattice - lkp k = fromMaybe empty (lookupFact k fb) - keep_stack_only k = DualLive (on_stack (lkp k)) emptyRegSet - -insertSpillsAndReloads :: CmmGraph -> BlockSet -> CmmBwdRewrite DualLive -insertSpillsAndReloads graph procPoints = deepBwdRw3 first middle nothing - -- Beware: deepBwdRw with one polymorphic function seems more reasonable here, - -- but GHC miscompiles it, see bug #4044. - where first :: CmmNode C O -> Fact O DualLive -> CmmReplGraph C O - first e@(CmmEntry id) live = return $ - if id /= (g_entry graph) && setMember id procPoints then - case map reload (uniqSetToList (in_regs live)) of - [] -> Nothing - is -> Just $ mkFirst e <*> mkMiddles is - else Nothing - -- EZY: There was some dead code for handling the case where - -- we were not splitting procedures. Check Git history if - -- you're interested (circa e26ea0f41). - - middle :: CmmNode O O -> Fact O DualLive -> CmmReplGraph O O - -- Don't add spills next to reloads. - middle (CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg') _) _)) _ | reg == reg' = return Nothing - -- Spill if register is live on stack. - middle m@(CmmAssign (CmmLocal reg) _) live - | reg `elemRegSet` on_stack live = return (Just (mkMiddles [m, spill reg])) - middle _ _ = return Nothing - - nothing _ _ = return Nothing - -spill, reload :: LocalReg -> CmmNode O O -spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r) -reload r = CmmAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r) - ---------------------- --- prettyprinting - -ppr_regs :: String -> RegSet -> SDoc -ppr_regs s regs = text s <+> commafy (map ppr $ uniqSetToList regs) - where commafy xs = hsep $ punctuate comma xs - -instance Outputable DualLive where - ppr (DualLive {in_regs = regs, on_stack = stack}) = - if isEmptyUniqSet regs && isEmptyUniqSet stack then - text "<nothing-live>" - else - nest 2 $ fsep [if isEmptyUniqSet regs then PP.empty - else (ppr_regs "live in regs =" regs), - if isEmptyUniqSet stack then PP.empty - else (ppr_regs "live on stack =" stack)] diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index 0756c87583..d831a8aba5 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -60,13 +60,14 @@ module CmmUtils( -- * Operations that probably don't belong here modifyGraph, - lastNode, replaceLastNode, insertBetween, + lastNode, replaceLastNode, ofBlockMap, toBlockMap, insertBlock, ofBlockList, toBlockList, bodyToBlockList, foldGraphBlocks, mapGraphNodes, postorderDfs, mapGraphNodes1, analFwd, analBwd, analRewFwd, analRewBwd, - dataflowPassFwd, dataflowPassBwd + dataflowPassFwd, dataflowPassBwd, dataflowAnalFwd, dataflowAnalBwd, + dataflowAnalFwdBlocks ) where #include "HsVersions.h" @@ -89,7 +90,7 @@ import Data.Word import Data.Maybe import Data.Bits import Control.Monad -import Compiler.Hoopl hiding ( Unique ) +import Hoopl --------------------------------------------------- -- @@ -402,13 +403,13 @@ mkLiveness (reg:regs) modifyGraph :: (Graph n C C -> Graph n' C C) -> GenCmmGraph n -> GenCmmGraph n' modifyGraph f g = CmmGraph {g_entry=g_entry g, g_graph=f (g_graph g)} -toBlockMap :: CmmGraph -> LabelMap CmmBlock +toBlockMap :: CmmGraph -> BlockEnv CmmBlock toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body -ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph +ofBlockMap :: BlockId -> BlockEnv CmmBlock -> CmmGraph ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO} -insertBlock :: CmmBlock -> LabelMap CmmBlock -> LabelMap CmmBlock +insertBlock :: CmmBlock -> BlockEnv CmmBlock -> BlockEnv CmmBlock insertBlock block map = ASSERT (isNothing $ mapLookup id map) mapInsert id block map @@ -418,7 +419,8 @@ toBlockList :: CmmGraph -> [CmmBlock] toBlockList g = mapElems $ toBlockMap g ofBlockList :: BlockId -> [CmmBlock] -> CmmGraph -ofBlockList entry blocks = CmmGraph {g_entry=entry, g_graph=GMany NothingO body NothingO} +ofBlockList entry blocks = CmmGraph { g_entry = entry + , g_graph = GMany NothingO body NothingO } where body = foldr addBlock emptyBody blocks bodyToBlockList :: Body CmmNode -> [CmmBlock] @@ -439,87 +441,67 @@ foldGraphBlocks :: (CmmBlock -> a -> a) -> a -> CmmGraph -> a foldGraphBlocks k z g = mapFold k z $ toBlockMap g postorderDfs :: CmmGraph -> [CmmBlock] -postorderDfs g = postorder_dfs_from (toBlockMap g) (g_entry g) - -------------------------------------------------- --- Manipulating CmmBlocks - -lastNode :: CmmBlock -> CmmNode O C -lastNode block = foldBlockNodesF3 (nothing, nothing, const) block () - where nothing :: a -> b -> () - nothing _ _ = () - -replaceLastNode :: Block CmmNode e C -> CmmNode O C -> Block CmmNode e C -replaceLastNode block last = blockOfNodeList (first, middle, JustC last) - where (first, middle, _) = blockToNodeList block - ----------------------------------------------------------------------- ------ Splicing between blocks --- Given a middle node, a block, and a successor BlockId, --- we can insert the middle node between the block and the successor. --- We return the updated block and a list of new blocks that must be added --- to the graph. --- The semantics is a bit tricky. We consider cases on the last node: --- o For a branch, we can just insert before the branch, --- but sometimes the optimizer does better if we actually insert --- a fresh basic block, enabling some common blockification. --- o For a conditional branch, switch statement, or call, we must insert --- a new basic block. --- o For a jump or return, this operation is impossible. - -insertBetween :: MonadUnique m => CmmBlock -> [CmmNode O O] -> BlockId -> m (CmmBlock, [CmmBlock]) -insertBetween b ms succId = insert $ lastNode b - where insert :: MonadUnique m => CmmNode O C -> m (CmmBlock, [CmmBlock]) - insert (CmmBranch bid) = - if bid == succId then - do (bid', bs) <- newBlocks - return (replaceLastNode b (CmmBranch bid'), bs) - else panic "tried invalid block insertBetween" - insert (CmmCondBranch c t f) = - do (t', tbs) <- if t == succId then newBlocks else return $ (t, []) - (f', fbs) <- if f == succId then newBlocks else return $ (f, []) - return (replaceLastNode b (CmmCondBranch c t' f'), tbs ++ fbs) - insert (CmmSwitch e ks) = - do (ids, bs) <- mapAndUnzipM mbNewBlocks ks - return (replaceLastNode b (CmmSwitch e ids), join bs) - insert (CmmCall {}) = - panic "unimp: insertBetween after a call -- probably not a good idea" - insert (CmmForeignCall {}) = - panic "unimp: insertBetween after a foreign call -- probably not a good idea" - - newBlocks :: MonadUnique m => m (BlockId, [CmmBlock]) - newBlocks = do id <- liftM mkBlockId $ getUniqueM - return $ (id, [blockOfNodeList (JustC (CmmEntry id), ms, JustC (CmmBranch succId))]) - mbNewBlocks :: MonadUnique m => Maybe BlockId -> m (Maybe BlockId, [CmmBlock]) - mbNewBlocks (Just k) = if k == succId then liftM fstJust newBlocks - else return (Just k, []) - mbNewBlocks Nothing = return (Nothing, []) - fstJust (id, bs) = (Just id, bs) +postorderDfs g = {-# SCC "postorderDfs" #-} postorder_dfs_from (toBlockMap g) (g_entry g) ------------------------------------------------- -- Running dataflow analysis and/or rewrites -- Constructing forward and backward analysis-only pass -analFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdPass m n f -analBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdPass m n f +analFwd :: DataflowLattice f -> FwdTransfer n f -> FwdPass FuelUniqSM n f +analBwd :: DataflowLattice f -> BwdTransfer n f -> BwdPass FuelUniqSM n f analFwd lat xfer = analRewFwd lat xfer noFwdRewrite analBwd lat xfer = analRewBwd lat xfer noBwdRewrite -- Constructing forward and backward analysis + rewrite pass -analRewFwd :: Monad m => DataflowLattice f -> FwdTransfer n f -> FwdRewrite m n f -> FwdPass m n f -analRewBwd :: Monad m => DataflowLattice f -> BwdTransfer n f -> BwdRewrite m n f -> BwdPass m n f +analRewFwd :: DataflowLattice f -> FwdTransfer n f + -> FwdRewrite FuelUniqSM n f + -> FwdPass FuelUniqSM n f + +analRewBwd :: DataflowLattice f + -> BwdTransfer n f + -> BwdRewrite FuelUniqSM n f + -> BwdPass FuelUniqSM n f analRewFwd lat xfer rew = FwdPass {fp_lattice = lat, fp_transfer = xfer, fp_rewrite = rew} analRewBwd lat xfer rew = BwdPass {bp_lattice = lat, bp_transfer = xfer, bp_rewrite = rew} -- Running forward and backward dataflow analysis + optional rewrite -dataflowPassFwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> FwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f) +dataflowPassFwd :: NonLocal n => + GenCmmGraph n -> [(BlockId, f)] + -> FwdPass FuelUniqSM n f + -> FuelUniqSM (GenCmmGraph n, BlockEnv f) dataflowPassFwd (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = do (graph, facts, NothingO) <- analyzeAndRewriteFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts) return (CmmGraph {g_entry=entry, g_graph=graph}, facts) -dataflowPassBwd :: NonLocal n => GenCmmGraph n -> [(BlockId, f)] -> BwdPass FuelUniqSM n f -> FuelUniqSM (GenCmmGraph n, BlockEnv f) +dataflowAnalFwd :: NonLocal n => + GenCmmGraph n -> [(BlockId, f)] + -> FwdPass FuelUniqSM n f + -> BlockEnv f +dataflowAnalFwd (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = + analyzeFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts) + +dataflowAnalFwdBlocks :: NonLocal n => + GenCmmGraph n -> [(BlockId, f)] + -> FwdPass FuelUniqSM n f + -> FuelUniqSM (BlockEnv f) +dataflowAnalFwdBlocks (CmmGraph {g_entry=entry, g_graph=graph}) facts fwd = do +-- (graph, facts, NothingO) <- analyzeAndRewriteFwd fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts) +-- return facts + return (analyzeFwdBlocks fwd (JustC [entry]) graph (mkFactBase (fp_lattice fwd) facts)) + +dataflowAnalBwd :: NonLocal n => + GenCmmGraph n -> [(BlockId, f)] + -> BwdPass FuelUniqSM n f + -> BlockEnv f +dataflowAnalBwd (CmmGraph {g_entry=entry, g_graph=graph}) facts bwd = + analyzeBwd bwd (JustC [entry]) graph (mkFactBase (bp_lattice bwd) facts) + +dataflowPassBwd :: NonLocal n => + GenCmmGraph n -> [(BlockId, f)] + -> BwdPass FuelUniqSM n f + -> FuelUniqSM (GenCmmGraph n, BlockEnv f) dataflowPassBwd (CmmGraph {g_entry=entry, g_graph=graph}) facts bwd = do (graph, facts, NothingO) <- analyzeAndRewriteBwd bwd (JustC [entry]) graph (mkFactBase (bp_lattice bwd) facts) return (CmmGraph {g_entry=entry, g_graph=graph}, facts) diff --git a/compiler/cmm/Hoopl.hs b/compiler/cmm/Hoopl.hs new file mode 100644 index 0000000000..404482e047 --- /dev/null +++ b/compiler/cmm/Hoopl.hs @@ -0,0 +1,124 @@ +module Hoopl ( + module Compiler.Hoopl, + module Hoopl.Dataflow, + deepBwdRw3, deepBwdRw, + thenFwdRw + ) where + +import Compiler.Hoopl hiding + ( Unique, + FwdTransfer(..), FwdRewrite(..), FwdPass(..), + BwdTransfer(..), BwdRewrite(..), BwdPass(..), + noFwdRewrite, noBwdRewrite, +-- analyzeAndRewriteFwd, analyzeAndRewriteBwd, + mkFactBase, Fact, + mkBRewrite, mkBRewrite3, mkBTransfer, mkBTransfer3, + mkFRewrite, mkFRewrite3, mkFTransfer, mkFTransfer3, + deepFwdRw, deepFwdRw3, thenFwdRw, iterFwdRw, + deepBwdRw, deepBwdRw3, thenBwdRw, iterBwdRw + ) + +import Hoopl.Dataflow +import OptimizationFuel +import Control.Monad + +deepFwdRw3 :: (n C O -> f -> FuelUniqSM (Maybe (Graph n C O))) + -> (n O O -> f -> FuelUniqSM (Maybe (Graph n O O))) + -> (n O C -> f -> FuelUniqSM (Maybe (Graph n O C))) + -> (FwdRewrite FuelUniqSM n f) +deepFwdRw :: (forall e x . n e x -> f -> FuelUniqSM (Maybe (Graph n e x))) -> FwdRewrite FuelUniqSM n f +deepFwdRw3 f m l = iterFwdRw $ mkFRewrite3 f m l +deepFwdRw f = deepFwdRw3 f f f + +-- N.B. rw3, rw3', and rw3a are triples of functions. +-- But rw and rw' are single functions. +thenFwdRw :: forall n f. + FwdRewrite FuelUniqSM n f + -> FwdRewrite FuelUniqSM n f + -> FwdRewrite FuelUniqSM n f +thenFwdRw rw3 rw3' = wrapFR2 thenrw rw3 rw3' + where + thenrw :: forall e x t t1. + (t -> t1 -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f))) + -> (t -> t1 -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f))) + -> t + -> t1 + -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f)) + thenrw rw rw' n f = rw n f >>= fwdRes + where fwdRes Nothing = rw' n f + fwdRes (Just gr) = return $ Just $ fadd_rw rw3' gr + +iterFwdRw :: forall m n f. + FwdRewrite FuelUniqSM n f + -> FwdRewrite FuelUniqSM n f +iterFwdRw rw3 = wrapFR iter rw3 + where iter :: forall a e x t. + (t -> a -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f))) + -> t + -> a + -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f)) + iter rw n = (liftM $ liftM $ fadd_rw (iterFwdRw rw3)) . rw n + +-- | Function inspired by 'rew' in the paper +_frewrite_cps :: ((Graph n e x, FwdRewrite FuelUniqSM n f) -> FuelUniqSM a) + -> FuelUniqSM a + -> (forall e x . n e x -> f -> FuelUniqSM (Maybe (Graph n e x, FwdRewrite FuelUniqSM n f))) + -> n e x + -> f + -> FuelUniqSM a +_frewrite_cps j n rw node f = + do mg <- rw node f + case mg of Nothing -> n + Just gr -> j gr + + + +-- | Function inspired by 'add' in the paper +fadd_rw :: FwdRewrite FuelUniqSM n f + -> (Graph n e x, FwdRewrite FuelUniqSM n f) + -> (Graph n e x, FwdRewrite FuelUniqSM n f) +fadd_rw rw2 (g, rw1) = (g, rw1 `thenFwdRw` rw2) + + + +deepBwdRw3 :: + (n C O -> f -> FuelUniqSM (Maybe (Graph n C O))) + -> (n O O -> f -> FuelUniqSM (Maybe (Graph n O O))) + -> (n O C -> FactBase f -> FuelUniqSM (Maybe (Graph n O C))) + -> (BwdRewrite FuelUniqSM n f) +deepBwdRw :: (forall e x . n e x -> Fact x f -> FuelUniqSM (Maybe (Graph n e x))) + -> BwdRewrite FuelUniqSM n f +deepBwdRw3 f m l = iterBwdRw $ mkBRewrite3 f m l +deepBwdRw f = deepBwdRw3 f f f + + +thenBwdRw :: forall n f. BwdRewrite FuelUniqSM n f -> BwdRewrite FuelUniqSM n f -> BwdRewrite FuelUniqSM n f +thenBwdRw rw1 rw2 = wrapBR2 f rw1 rw2 + where f :: forall t t1 t2 e x. + t + -> (t1 -> t2 -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f))) + -> (t1 -> t2 -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f))) + -> t1 + -> t2 + -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f)) + f _ rw1 rw2' n f = do + res1 <- rw1 n f + case res1 of + Nothing -> rw2' n f + Just gr -> return $ Just $ badd_rw rw2 gr + +iterBwdRw :: forall n f. BwdRewrite FuelUniqSM n f -> BwdRewrite FuelUniqSM n f +iterBwdRw rw = wrapBR f rw + where f :: forall t e x t1 t2. + t + -> (t1 -> t2 -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f))) + -> t1 + -> t2 + -> FuelUniqSM (Maybe (Graph n e x, BwdRewrite FuelUniqSM n f)) + f _ rw' n f = liftM (liftM (badd_rw (iterBwdRw rw))) (rw' n f) + +-- | Function inspired by 'add' in the paper +badd_rw :: BwdRewrite FuelUniqSM n f + -> (Graph n e x, BwdRewrite FuelUniqSM n f) + -> (Graph n e x, BwdRewrite FuelUniqSM n f) +badd_rw rw2 (g, rw1) = (g, rw1 `thenBwdRw` rw2) diff --git a/compiler/cmm/Hoopl/Dataflow.hs b/compiler/cmm/Hoopl/Dataflow.hs new file mode 100644 index 0000000000..cdab2cd2fe --- /dev/null +++ b/compiler/cmm/Hoopl/Dataflow.hs @@ -0,0 +1,890 @@ +{-# LANGUAGE RankNTypes, ScopedTypeVariables, GADTs, EmptyDataDecls, PatternGuards, TypeFamilies, MultiParamTypeClasses #-} +#if __GLASGOW_HASKELL__ >= 703 +{-# OPTIONS_GHC -fprof-auto-top #-} +#endif +#if __GLASGOW_HASKELL__ >= 701 +{-# LANGUAGE Trustworthy #-} +#endif + +module Hoopl.Dataflow + ( DataflowLattice(..), OldFact(..), NewFact(..), Fact, mkFactBase + , ChangeFlag(..) + , FwdPass(..), FwdTransfer, mkFTransfer, mkFTransfer3, getFTransfer3 + -- * Respecting Fuel + + -- $fuel + , FwdRewrite, mkFRewrite, mkFRewrite3, getFRewrite3, noFwdRewrite + , wrapFR, wrapFR2 + , BwdPass(..), BwdTransfer, mkBTransfer, mkBTransfer3, getBTransfer3 + , wrapBR, wrapBR2 + , BwdRewrite, mkBRewrite, mkBRewrite3, getBRewrite3, noBwdRewrite + , analyzeAndRewriteFwd, analyzeAndRewriteBwd + , analyzeFwd, analyzeFwdBlocks, analyzeBwd + ) +where + +import OptimizationFuel + +import Data.Maybe +import Data.Array + +import Compiler.Hoopl.Collections +import Compiler.Hoopl.Fuel +import Compiler.Hoopl.Graph hiding (Graph) -- hiding so we can redefine + -- and include definition in paper +import qualified Compiler.Hoopl.GraphUtil as U +import Compiler.Hoopl.Label +import Compiler.Hoopl.Dataflow (JoinFun) +import Compiler.Hoopl.Util + +import Compiler.Hoopl.Dataflow ( + DataflowLattice(..), OldFact(..), NewFact(..), Fact + , ChangeFlag(..), mkFactBase + , FwdPass(..), FwdRewrite(..), FwdTransfer(..), mkFRewrite, getFRewrite3, mkFTransfer, mkFTransfer3 + , wrapFR, wrapFR2 + , BwdPass(..), BwdRewrite(..), BwdTransfer(..), mkBTransfer, mkBTransfer3, getBTransfer3 + , wrapBR, wrapBR2 + , mkBRewrite, getBRewrite3 + ) + +-- import Debug.Trace + +noRewrite :: a -> b -> FuelUniqSM (Maybe c) +noRewrite _ _ = return Nothing + +noFwdRewrite :: FwdRewrite FuelUniqSM n f +noFwdRewrite = FwdRewrite3 (noRewrite, noRewrite, noRewrite) + +-- | Functions passed to 'mkFRewrite3' should not be aware of the fuel supply. +-- The result returned by 'mkFRewrite3' respects fuel. +mkFRewrite3 :: forall n f. + (n C O -> f -> FuelUniqSM (Maybe (Graph n C O))) + -> (n O O -> f -> FuelUniqSM (Maybe (Graph n O O))) + -> (n O C -> f -> FuelUniqSM (Maybe (Graph n O C))) + -> FwdRewrite FuelUniqSM n f +mkFRewrite3 f m l = FwdRewrite3 (lift f, lift m, lift l) + where lift :: forall t t1 a. (t -> t1 -> FuelUniqSM (Maybe a)) + -> t -> t1 -> FuelUniqSM (Maybe (a, FwdRewrite FuelUniqSM n f)) + {-# INLINE lift #-} + lift rw node fact = do + a <- rw node fact + case a of + Nothing -> return Nothing + Just a -> do f <- getFuel + if f == 0 + then return Nothing + else setFuel (f-1) >> return (Just (a,noFwdRewrite)) + +noBwdRewrite :: BwdRewrite FuelUniqSM n f +noBwdRewrite = BwdRewrite3 (noRewrite, noRewrite, noRewrite) + +mkBRewrite3 :: forall n f. + (n C O -> f -> FuelUniqSM (Maybe (Graph n C O))) + -> (n O O -> f -> FuelUniqSM (Maybe (Graph n O O))) + -> (n O C -> FactBase f -> FuelUniqSM (Maybe (Graph n O C))) + -> BwdRewrite FuelUniqSM n f +mkBRewrite3 f m l = BwdRewrite3 (lift f, lift m, lift l) + where lift :: forall t t1 a. (t -> t1 -> FuelUniqSM (Maybe a)) + -> t -> t1 -> FuelUniqSM (Maybe (a, BwdRewrite FuelUniqSM n f)) + {-# INLINE lift #-} + lift rw node fact = do + a <- rw node fact + case a of + Nothing -> return Nothing + Just a -> do f <- getFuel + if f == 0 + then return Nothing + else setFuel (f-1) >> return (Just (a,noBwdRewrite)) + +----------------------------------------------------------------------------- +-- Analyze and rewrite forward: the interface +----------------------------------------------------------------------------- + +-- | if the graph being analyzed is open at the entry, there must +-- be no other entry point, or all goes horribly wrong... +analyzeAndRewriteFwd + :: forall n f e x . NonLocal n => + FwdPass FuelUniqSM n f + -> MaybeC e [Label] + -> Graph n e x -> Fact e f + -> FuelUniqSM (Graph n e x, FactBase f, MaybeO x f) +analyzeAndRewriteFwd pass entries g f = + do (rg, fout) <- arfGraph pass (fmap targetLabels entries) g f + let (g', fb) = normalizeGraph rg + return (g', fb, distinguishedExitFact g' fout) + +distinguishedExitFact :: forall n e x f . Graph n e x -> Fact x f -> MaybeO x f +distinguishedExitFact g f = maybe g + where maybe :: Graph n e x -> MaybeO x f + maybe GNil = JustO f + maybe (GUnit {}) = JustO f + maybe (GMany _ _ x) = case x of NothingO -> NothingO + JustO _ -> JustO f + +---------------------------------------------------------------- +-- Forward Implementation +---------------------------------------------------------------- + +type Entries e = MaybeC e [Label] + +arfGraph :: forall n f e x . NonLocal n => + FwdPass FuelUniqSM n f -> + Entries e -> Graph n e x -> Fact e f -> FuelUniqSM (DG f n e x, Fact x f) +arfGraph pass@FwdPass { fp_lattice = lattice, + fp_transfer = transfer, + fp_rewrite = rewrite } entries g in_fact = graph g in_fact + where + {- nested type synonyms would be so lovely here + type ARF thing = forall e x . thing e x -> f -> m (DG f n e x, Fact x f) + type ARFX thing = forall e x . thing e x -> Fact e f -> m (DG f n e x, Fact x f) + -} + graph :: Graph n e x -> Fact e f -> FuelUniqSM (DG f n e x, Fact x f) + block :: forall e x . + Block n e x -> f -> FuelUniqSM (DG f n e x, Fact x f) + + body :: [Label] -> LabelMap (Block n C C) + -> Fact C f -> FuelUniqSM (DG f n C C, Fact C f) + -- Outgoing factbase is restricted to Labels *not* in + -- in the Body; the facts for Labels *in* + -- the Body are in the 'DG f n C C' + + cat :: forall e a x f1 f2 f3. + (f1 -> FuelUniqSM (DG f n e a, f2)) + -> (f2 -> FuelUniqSM (DG f n a x, f3)) + -> (f1 -> FuelUniqSM (DG f n e x, f3)) + + graph GNil f = return (dgnil, f) + graph (GUnit blk) f = block blk f + graph (GMany e bdy x) f = ((e `ebcat` bdy) `cat` exit x) f + where + ebcat :: MaybeO e (Block n O C) -> Body n -> Fact e f -> FuelUniqSM (DG f n e C, Fact C f) + exit :: MaybeO x (Block n C O) -> Fact C f -> FuelUniqSM (DG f n C x, Fact x f) + exit (JustO blk) f = arfx block blk f + exit NothingO f = return (dgnilC, f) + ebcat entry bdy f = c entries entry f + where c :: MaybeC e [Label] -> MaybeO e (Block n O C) + -> Fact e f -> FuelUniqSM (DG f n e C, Fact C f) + c NothingC (JustO entry) f = (block entry `cat` body (successors entry) bdy) f + c (JustC entries) NothingO f = body entries bdy f + c _ _ _ = error "bogus GADT pattern match failure" + + -- Lift from nodes to blocks + block BNil f = return (dgnil, f) + block (BlockCO n b) f = (node n `cat` block b) f + block (BlockCC l b n) f = (node l `cat` block b `cat` node n) f + block (BlockOC b n) f = (block b `cat` node n) f + + block (BMiddle n) f = node n f + block (BCat b1 b2) f = (block b1 `cat` block b2) f + block (BHead h n) f = (block h `cat` node n) f + block (BTail n t) f = (node n `cat` block t) f + + {-# INLINE node #-} + node :: forall e x . (ShapeLifter e x) + => n e x -> f -> FuelUniqSM (DG f n e x, Fact x f) + node n f + = do { grw <- frewrite rewrite n f + ; case grw of + Nothing -> return ( singletonDG f n + , ftransfer transfer n f ) + Just (g, rw) -> + let pass' = pass { fp_rewrite = rw } + f' = fwdEntryFact n f + in arfGraph pass' (fwdEntryLabel n) g f' } + + -- | Compose fact transformers and concatenate the resulting + -- rewritten graphs. + {-# INLINE cat #-} + cat ft1 ft2 f = do { (g1,f1) <- ft1 f + ; (g2,f2) <- ft2 f1 + ; let !g = g1 `dgSplice` g2 + ; return (g, f2) } + + arfx :: forall x . + (Block n C x -> f -> FuelUniqSM (DG f n C x, Fact x f)) + -> (Block n C x -> Fact C f -> FuelUniqSM (DG f n C x, Fact x f)) + arfx arf thing fb = + arf thing $ fromJust $ lookupFact (entryLabel thing) $ joinInFacts lattice fb + -- joinInFacts adds debugging information + + + -- Outgoing factbase is restricted to Labels *not* in + -- in the Body; the facts for Labels *in* + -- the Body are in the 'DG f n C C' + body entries blockmap init_fbase + = fixpoint Fwd lattice do_block entries blockmap init_fbase + where + lattice = fp_lattice pass + do_block :: forall x . Block n C x -> FactBase f + -> FuelUniqSM (DG f n C x, Fact x f) + do_block b fb = block b entryFact + where entryFact = getFact lattice (entryLabel b) fb + + +-- Join all the incoming facts with bottom. +-- We know the results _shouldn't change_, but the transfer +-- functions might, for example, generate some debugging traces. +joinInFacts :: DataflowLattice f -> FactBase f -> FactBase f +joinInFacts (lattice @ DataflowLattice {fact_bot = bot, fact_join = fj}) fb = + mkFactBase lattice $ map botJoin $ mapToList fb + where botJoin (l, f) = (l, snd $ fj l (OldFact bot) (NewFact f)) + +forwardBlockList :: (NonLocal n) + => [Label] -> Body n -> [Block n C C] +-- This produces a list of blocks in order suitable for forward analysis, +-- along with the list of Labels it may depend on for facts. +forwardBlockList entries blks = postorder_dfs_from blks entries + +---------------------------------------------------------------- +-- Forward Analysis only +---------------------------------------------------------------- + +-- | if the graph being analyzed is open at the entry, there must +-- be no other entry point, or all goes horribly wrong... +analyzeFwd + :: forall n f e . NonLocal n => + FwdPass FuelUniqSM n f + -> MaybeC e [Label] + -> Graph n e C -> Fact e f + -> FactBase f +analyzeFwd FwdPass { fp_lattice = lattice, + fp_transfer = FwdTransfer3 (ftr, mtr, ltr) } + entries g in_fact = graph g in_fact + where + graph :: Graph n e C -> Fact e f -> FactBase f + graph (GMany entry blockmap NothingO) + = case (entries, entry) of + (NothingC, JustO entry) -> block entry `cat` body (successors entry) + (JustC entries, NothingO) -> body entries + _ -> error "bogus GADT pattern match failure" + where + body :: [Label] -> Fact C f -> Fact C f + body entries f + = fixpointAnal Fwd lattice do_block entries blockmap f + where + do_block :: forall x . Block n C x -> FactBase f -> Fact x f + do_block b fb = block b entryFact + where entryFact = getFact lattice (entryLabel b) fb + + -- NB. eta-expand block, GHC can't do this by itself. See #5809. + block :: forall e x . Block n e x -> f -> Fact x f + block BNil f = f + block (BlockCO n b) f = (ftr n `cat` block b) f + block (BlockCC l b n) f = (ftr l `cat` (block b `cat` ltr n)) f + block (BlockOC b n) f = (block b `cat` ltr n) f + + block (BMiddle n) f = mtr n f + block (BCat b1 b2) f = (block b1 `cat` block b2) f + block (BHead h n) f = (block h `cat` mtr n) f + block (BTail n t) f = (mtr n `cat` block t) f + + {-# INLINE cat #-} + cat :: forall f1 f2 f3 . (f1 -> f2) -> (f2 -> f3) -> (f1 -> f3) + cat ft1 ft2 = \f -> ft2 $! ft1 f + +-- | if the graph being analyzed is open at the entry, there must +-- be no other entry point, or all goes horribly wrong... +analyzeFwdBlocks + :: forall n f e . NonLocal n => + FwdPass FuelUniqSM n f + -> MaybeC e [Label] + -> Graph n e C -> Fact e f + -> FactBase f +analyzeFwdBlocks FwdPass { fp_lattice = lattice, + fp_transfer = FwdTransfer3 (ftr, _, ltr) } + entries g in_fact = graph g in_fact + where + graph :: Graph n e C -> Fact e f -> FactBase f + graph (GMany entry blockmap NothingO) + = case (entries, entry) of + (NothingC, JustO entry) -> block entry `cat` body (successors entry) + (JustC entries, NothingO) -> body entries + _ -> error "bogus GADT pattern match failure" + where + body :: [Label] -> Fact C f -> Fact C f + body entries f + = fixpointAnal Fwd lattice do_block entries blockmap f + where + do_block :: forall x . Block n C x -> FactBase f -> Fact x f + do_block b fb = block b entryFact + where entryFact = getFact lattice (entryLabel b) fb + + -- NB. eta-expand block, GHC can't do this by itself. See #5809. + block :: forall e x . Block n e x -> f -> Fact x f + block BNil f = f + block (BlockCO n _) f = ftr n f + block (BlockCC l _ n) f = (ftr l `cat` ltr n) f + block (BlockOC _ n) f = ltr n f + + {-# INLINE cat #-} + cat :: forall f1 f2 f3 . (f1 -> f2) -> (f2 -> f3) -> (f1 -> f3) + cat ft1 ft2 = \f -> ft2 $! ft1 f + +---------------------------------------------------------------- +-- Backward Analysis only +---------------------------------------------------------------- + +-- | if the graph being analyzed is open at the entry, there must +-- be no other entry point, or all goes horribly wrong... +analyzeBwd + :: forall n f e . NonLocal n => + BwdPass FuelUniqSM n f + -> MaybeC e [Label] + -> Graph n e C -> Fact C f + -> FactBase f +analyzeBwd BwdPass { bp_lattice = lattice, + bp_transfer = BwdTransfer3 (ftr, mtr, ltr) } + entries g in_fact = graph g in_fact + where + graph :: Graph n e C -> Fact C f -> FactBase f + graph (GMany entry blockmap NothingO) + = case (entries, entry) of + (NothingC, JustO entry) -> body (successors entry) + (JustC entries, NothingO) -> body entries + _ -> error "bogus GADT pattern match failure" + where + body :: [Label] -> Fact C f -> Fact C f + body entries f + = fixpointAnal Bwd lattice do_block entries blockmap f + where + do_block :: forall x . Block n C x -> Fact x f -> FactBase f + do_block b fb = mapSingleton (entryLabel b) (block b fb) + + -- NB. eta-expand block, GHC can't do this by itself. See #5809. + block :: forall e x . Block n e x -> Fact x f -> f + block BNil f = f + block (BlockCO n b) f = (ftr n `cat` block b) f + block (BlockCC l b n) f = ((ftr l `cat` block b) `cat` ltr n) f + block (BlockOC b n) f = (block b `cat` ltr n) f + + block (BMiddle n) f = mtr n f + block (BCat b1 b2) f = (block b1 `cat` block b2) f + block (BHead h n) f = (block h `cat` mtr n) f + block (BTail n t) f = (mtr n `cat` block t) f + + {-# INLINE cat #-} + cat :: forall f1 f2 f3 . (f2 -> f3) -> (f1 -> f2) -> (f1 -> f3) + cat ft1 ft2 = \f -> ft1 $! ft2 f + +----------------------------------------------------------------------------- +-- Backward analysis and rewriting: the interface +----------------------------------------------------------------------------- + + +-- | if the graph being analyzed is open at the exit, I don't +-- quite understand the implications of possible other exits +analyzeAndRewriteBwd + :: NonLocal n + => BwdPass FuelUniqSM n f + -> MaybeC e [Label] -> Graph n e x -> Fact x f + -> FuelUniqSM (Graph n e x, FactBase f, MaybeO e f) +analyzeAndRewriteBwd pass entries g f = + do (rg, fout) <- arbGraph pass (fmap targetLabels entries) g f + let (g', fb) = normalizeGraph rg + return (g', fb, distinguishedEntryFact g' fout) + +distinguishedEntryFact :: forall n e x f . Graph n e x -> Fact e f -> MaybeO e f +distinguishedEntryFact g f = maybe g + where maybe :: Graph n e x -> MaybeO e f + maybe GNil = JustO f + maybe (GUnit {}) = JustO f + maybe (GMany e _ _) = case e of NothingO -> NothingO + JustO _ -> JustO f + + +----------------------------------------------------------------------------- +-- Backward implementation +----------------------------------------------------------------------------- + +arbGraph :: forall n f e x . + NonLocal n => + BwdPass FuelUniqSM n f -> + Entries e -> Graph n e x -> Fact x f -> FuelUniqSM (DG f n e x, Fact e f) +arbGraph pass@BwdPass { bp_lattice = lattice, + bp_transfer = transfer, + bp_rewrite = rewrite } entries g in_fact = graph g in_fact + where + {- nested type synonyms would be so lovely here + type ARB thing = forall e x . thing e x -> Fact x f -> m (DG f n e x, f) + type ARBX thing = forall e x . thing e x -> Fact x f -> m (DG f n e x, Fact e f) + -} + graph :: Graph n e x -> Fact x f -> FuelUniqSM (DG f n e x, Fact e f) + block :: forall e x . Block n e x -> Fact x f -> FuelUniqSM (DG f n e x, f) + body :: [Label] -> Body n -> Fact C f -> FuelUniqSM (DG f n C C, Fact C f) + node :: forall e x . (ShapeLifter e x) + => n e x -> Fact x f -> FuelUniqSM (DG f n e x, f) + cat :: forall e a x info info' info''. + (info' -> FuelUniqSM (DG f n e a, info'')) + -> (info -> FuelUniqSM (DG f n a x, info')) + -> (info -> FuelUniqSM (DG f n e x, info'')) + + graph GNil f = return (dgnil, f) + graph (GUnit blk) f = block blk f + graph (GMany e bdy x) f = ((e `ebcat` bdy) `cat` exit x) f + where + ebcat :: MaybeO e (Block n O C) -> Body n -> Fact C f -> FuelUniqSM (DG f n e C, Fact e f) + exit :: MaybeO x (Block n C O) -> Fact x f -> FuelUniqSM (DG f n C x, Fact C f) + exit (JustO blk) f = arbx block blk f + exit NothingO f = return (dgnilC, f) + ebcat entry bdy f = c entries entry f + where c :: MaybeC e [Label] -> MaybeO e (Block n O C) + -> Fact C f -> FuelUniqSM (DG f n e C, Fact e f) + c NothingC (JustO entry) f = (block entry `cat` body (successors entry) bdy) f + c (JustC entries) NothingO f = body entries bdy f + c _ _ _ = error "bogus GADT pattern match failure" + + -- Lift from nodes to blocks + block BNil f = return (dgnil, f) + block (BlockCO n b) f = (node n `cat` block b) f + block (BlockCC l b n) f = (node l `cat` block b `cat` node n) f + block (BlockOC b n) f = (block b `cat` node n) f + + block (BMiddle n) f = node n f + block (BCat b1 b2) f = (block b1 `cat` block b2) f + block (BHead h n) f = (block h `cat` node n) f + block (BTail n t) f = (node n `cat` block t) f + + {-# INLINE node #-} + node n f + = do { bwdres <- brewrite rewrite n f + ; case bwdres of + Nothing -> return (singletonDG entry_f n, entry_f) + where entry_f = btransfer transfer n f + Just (g, rw) -> + do { let pass' = pass { bp_rewrite = rw } + ; (g, f) <- arbGraph pass' (fwdEntryLabel n) g f + ; return (g, bwdEntryFact lattice n f)} } + + -- | Compose fact transformers and concatenate the resulting + -- rewritten graphs. + {-# INLINE cat #-} + cat ft1 ft2 f = do { (g2,f2) <- ft2 f + ; (g1,f1) <- ft1 f2 + ; let !g = g1 `dgSplice` g2 + ; return (g, f1) } + + arbx :: forall x . + (Block n C x -> Fact x f -> FuelUniqSM (DG f n C x, f)) + -> (Block n C x -> Fact x f -> FuelUniqSM (DG f n C x, Fact C f)) + + arbx arb thing f = do { (rg, f) <- arb thing f + ; let fb = joinInFacts (bp_lattice pass) $ + mapSingleton (entryLabel thing) f + ; return (rg, fb) } + -- joinInFacts adds debugging information + + -- Outgoing factbase is restricted to Labels *not* in + -- in the Body; the facts for Labels *in* + -- the Body are in the 'DG f n C C' + body entries blockmap init_fbase + = fixpoint Bwd (bp_lattice pass) do_block entries blockmap init_fbase + where + do_block :: forall x. Block n C x -> Fact x f -> FuelUniqSM (DG f n C x, LabelMap f) + do_block b f = do (g, f) <- block b f + return (g, mapSingleton (entryLabel b) f) + + +{- + +The forward and backward cases are not dual. In the forward case, the +entry points are known, and one simply traverses the body blocks from +those points. In the backward case, something is known about the exit +points, but this information is essentially useless, because we don't +actually have a dual graph (that is, one with edges reversed) to +compute with. (Even if we did have a dual graph, it would not avail +us---a backward analysis must include reachable blocks that don't +reach the exit, as in a procedure that loops forever and has side +effects.) + +-} + +----------------------------------------------------------------------------- +-- fixpoint +----------------------------------------------------------------------------- + +data Direction = Fwd | Bwd + +-- | fixpointing for analysis-only +-- +fixpointAnal :: forall n f. NonLocal n + => Direction + -> DataflowLattice f + -> (Block n C C -> Fact C f -> Fact C f) + -> [Label] + -> LabelMap (Block n C C) + -> Fact C f -> FactBase f + +fixpointAnal direction DataflowLattice{ fact_bot = bot, fact_join = join } + do_block entries blockmap init_fbase + = loop start init_fbase + where + blocks = sortBlocks direction entries blockmap + n = length blocks + block_arr = {-# SCC "block_arr" #-} listArray (0,n-1) blocks + start = {-# SCC "start" #-} [0..n-1] + dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks + + loop + :: IntHeap -- blocks still to analyse + -> FactBase f -- current factbase (increases monotonically) + -> FactBase f + + loop [] fbase = fbase + loop (ix:todo) fbase = + let + blk = block_arr ! ix + + out_facts = {-# SCC "do_block" #-} do_block blk fbase + + !(todo', fbase') = {-# SCC "mapFoldWithKey" #-} + mapFoldWithKey (updateFact join dep_blocks) + (todo,fbase) out_facts + in + -- trace ("analysing: " ++ show (entryLabel blk)) $ + -- trace ("fbase': " ++ show (mapKeys fbase')) $ return () + -- trace ("changed: " ++ show changed) $ return () + -- trace ("to analyse: " ++ show to_analyse) $ return () + + loop todo' fbase' + + +-- | fixpointing for combined analysis/rewriting +-- +fixpoint :: forall n f. NonLocal n + => Direction + -> DataflowLattice f + -> (Block n C C -> Fact C f -> FuelUniqSM (DG f n C C, Fact C f)) + -> [Label] + -> LabelMap (Block n C C) + -> (Fact C f -> FuelUniqSM (DG f n C C, Fact C f)) + +fixpoint direction DataflowLattice{ fact_bot = bot, fact_join = join } + do_block entries blockmap init_fbase + = do + -- trace ("fixpoint: " ++ show (case direction of Fwd -> True; Bwd -> False) ++ " " ++ show (mapKeys blockmap) ++ show entries ++ " " ++ show (mapKeys init_fbase)) $ return() + (fbase, newblocks) <- loop start init_fbase mapEmpty + -- trace ("fixpoint DONE: " ++ show (mapKeys fbase) ++ show (mapKeys newblocks)) $ return() + return (GMany NothingO newblocks NothingO, + mapDeleteList (mapKeys blockmap) fbase) + -- The successors of the Graph are the the Labels + -- for which we have facts and which are *not* in + -- the blocks of the graph + where + blocks = sortBlocks direction entries blockmap + n = length blocks + block_arr = {-# SCC "block_arr" #-} listArray (0,n-1) blocks + start = {-# SCC "start" #-} [0..n-1] + dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks + + loop + :: IntHeap + -> FactBase f -- current factbase (increases monotonically) + -> LabelMap (DBlock f n C C) -- transformed graph + -> FuelUniqSM (FactBase f, LabelMap (DBlock f n C C)) + + loop [] fbase newblocks = return (fbase, newblocks) + loop (ix:todo) fbase !newblocks = do + let blk = block_arr ! ix + + -- trace ("analysing: " ++ show (entryLabel blk)) $ return () + (rg, out_facts) <- do_block blk fbase + let !(todo', fbase') = + mapFoldWithKey (updateFact join dep_blocks) + (todo,fbase) out_facts + + -- trace ("fbase': " ++ show (mapKeys fbase')) $ return () + -- trace ("changed: " ++ show changed) $ return () + -- trace ("to analyse: " ++ show to_analyse) $ return () + + let newblocks' = case rg of + GMany _ blks _ -> mapUnion blks newblocks + + loop todo' fbase' newblocks' + + +{- Note [TxFactBase invariants] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The TxFactBase is used only during a fixpoint iteration (or "sweep"), +and accumulates facts (and the transformed code) during the fixpoint +iteration. + +* tfb_fbase increases monotonically, across all sweeps + +* At the beginning of each sweep + tfb_cha = NoChange + tfb_lbls = {} + +* During each sweep we process each block in turn. Processing a block + is done thus: + 1. Read from tfb_fbase the facts for its entry label (forward) + or successors labels (backward) + 2. Transform those facts into new facts for its successors (forward) + or entry label (backward) + 3. Augment tfb_fbase with that info + We call the labels read in step (1) the "in-labels" of the sweep + +* The field tfb_lbls is the set of in-labels of all blocks that have + been processed so far this sweep, including the block that is + currently being processed. tfb_lbls is initialised to {}. It is a + subset of the Labels of the *original* (not transformed) blocks. + +* The tfb_cha field is set to SomeChange iff we decide we need to + perform another iteration of the fixpoint loop. It is initialsed to NoChange. + + Specifically, we set tfb_cha to SomeChange in step (3) iff + (a) The fact in tfb_fbase for a block L changes + (b) L is in tfb_lbls + Reason: until a label enters the in-labels its accumuated fact in tfb_fbase + has not been read, hence cannot affect the outcome + +Note [Unreachable blocks] +~~~~~~~~~~~~~~~~~~~~~~~~~ +A block that is not in the domain of tfb_fbase is "currently unreachable". +A currently-unreachable block is not even analyzed. Reason: consider +constant prop and this graph, with entry point L1: + L1: x:=3; goto L4 + L2: x:=4; goto L4 + L4: if x>3 goto L2 else goto L5 +Here L2 is actually unreachable, but if we process it with bottom input fact, +we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4. + +* If a currently-unreachable block is not analyzed, then its rewritten + graph will not be accumulated in tfb_rg. And that is good: + unreachable blocks simply do not appear in the output. + +* Note that clients must be careful to provide a fact (even if bottom) + for each entry point. Otherwise useful blocks may be garbage collected. + +* Note that updateFact must set the change-flag if a label goes from + not-in-fbase to in-fbase, even if its fact is bottom. In effect the + real fact lattice is + UNR + bottom + the points above bottom + +* Even if the fact is going from UNR to bottom, we still call the + client's fact_join function because it might give the client + some useful debugging information. + +* All of this only applies for *forward* ixpoints. For the backward + case we must treat every block as reachable; it might finish with a + 'return', and therefore have no successors, for example. +-} + + +----------------------------------------------------------------------------- +-- Pieces that are shared by fixpoint and fixpoint_anal +----------------------------------------------------------------------------- + +-- | Sort the blocks into the right order for analysis. +sortBlocks :: NonLocal n => Direction -> [Label] -> LabelMap (Block n C C) + -> [Block n C C] +sortBlocks direction entries blockmap + = case direction of Fwd -> fwd + Bwd -> reverse fwd + where fwd = forwardBlockList entries blockmap + +-- | construct a mapping from L -> block indices. If the fact for L +-- changes, re-analyse the given blocks. +mkDepBlocks :: NonLocal n => Direction -> [Block n C C] -> LabelMap [Int] +mkDepBlocks Fwd blocks = go blocks 0 mapEmpty + where go [] !_ m = m + go (b:bs) !n m = go bs (n+1) $! mapInsert (entryLabel b) [n] m +mkDepBlocks Bwd blocks = go blocks 0 mapEmpty + where go [] !_ m = m + go (b:bs) !n m = go bs (n+1) $! go' (successors b) m + where go' [] m = m + go' (l:ls) m = go' ls (mapInsertWith (++) l [n] m) + + +-- | After some new facts have been generated by analysing a block, we +-- fold this function over them to generate (a) a list of block +-- indices to (re-)analyse, and (b) the new FactBase. +-- +updateFact :: JoinFun f -> LabelMap [Int] + -> Label -> f -- out fact + -> (IntHeap, FactBase f) + -> (IntHeap, FactBase f) + +updateFact fact_join dep_blocks lbl new_fact (todo, fbase) + = case lookupFact lbl fbase of + Nothing -> let !z = mapInsert lbl new_fact fbase in (changed, z) + -- Note [no old fact] + Just old_fact -> + case fact_join lbl (OldFact old_fact) (NewFact new_fact) of + (NoChange, _) -> (todo, fbase) + (_, f) -> let !z = mapInsert lbl f fbase in (changed, z) + where + changed = foldr insertIntHeap todo $ + mapFindWithDefault [] lbl dep_blocks + +{- +Note [no old fact] + +We know that the new_fact is >= _|_, so we don't need to join. However, +if the new fact is also _|_, and we have already analysed its block, +we don't need to record a change. So there's a tradeoff here. It turns +out that always recording a change is faster. +-} + +----------------------------------------------------------------------------- +-- DG: an internal data type for 'decorated graphs' +-- TOTALLY internal to Hoopl; each block is decorated with a fact +----------------------------------------------------------------------------- + +type Graph = Graph' Block +type DG f = Graph' (DBlock f) +data DBlock f n e x = DBlock f (Block n e x) -- ^ block decorated with fact + +instance NonLocal n => NonLocal (DBlock f n) where + entryLabel (DBlock _ b) = entryLabel b + successors (DBlock _ b) = successors b + +--- constructors + +dgnil :: DG f n O O +dgnilC :: DG f n C C +dgSplice :: NonLocal n => DG f n e a -> DG f n a x -> DG f n e x + +---- observers + +normalizeGraph :: forall n f e x . + NonLocal n => DG f n e x + -> (Graph n e x, FactBase f) + -- A Graph together with the facts for that graph + -- The domains of the two maps should be identical + +normalizeGraph g = (graphMapBlocks dropFact g, facts g) + where dropFact :: DBlock t t1 t2 t3 -> Block t1 t2 t3 + dropFact (DBlock _ b) = b + facts :: DG f n e x -> FactBase f + facts GNil = noFacts + facts (GUnit _) = noFacts + facts (GMany _ body exit) = bodyFacts body `mapUnion` exitFacts exit + exitFacts :: MaybeO x (DBlock f n C O) -> FactBase f + exitFacts NothingO = noFacts + exitFacts (JustO (DBlock f b)) = mapSingleton (entryLabel b) f + bodyFacts :: LabelMap (DBlock f n C C) -> FactBase f + bodyFacts body = mapFoldWithKey f noFacts body + where f :: forall t a x. (NonLocal t) => Label -> DBlock a t C x -> LabelMap a -> LabelMap a + f lbl (DBlock f _) fb = mapInsert lbl f fb + +--- implementation of the constructors (boring) + +dgnil = GNil +dgnilC = GMany NothingO emptyBody NothingO + +dgSplice = U.splice fzCat + where fzCat :: DBlock f n e O -> DBlock t n O x -> DBlock f n e x + fzCat (DBlock f b1) (DBlock _ b2) = DBlock f $! b1 `U.cat` b2 + -- NB. strictness, this function is hammered. + +---------------------------------------------------------------- +-- Utilities +---------------------------------------------------------------- + +-- Lifting based on shape: +-- - from nodes to blocks +-- - from facts to fact-like things +-- Lowering back: +-- - from fact-like things to facts +-- Note that the latter two functions depend only on the entry shape. +class ShapeLifter e x where + singletonDG :: f -> n e x -> DG f n e x + fwdEntryFact :: NonLocal n => n e x -> f -> Fact e f + fwdEntryLabel :: NonLocal n => n e x -> MaybeC e [Label] + ftransfer :: FwdTransfer n f -> n e x -> f -> Fact x f + frewrite :: FwdRewrite m n f -> n e x + -> f -> m (Maybe (Graph n e x, FwdRewrite m n f)) +-- @ end node.tex + bwdEntryFact :: NonLocal n => DataflowLattice f -> n e x -> Fact e f -> f + btransfer :: BwdTransfer n f -> n e x -> Fact x f -> f + brewrite :: BwdRewrite m n f -> n e x + -> Fact x f -> m (Maybe (Graph n e x, BwdRewrite m n f)) + +instance ShapeLifter C O where + singletonDG f n = gUnitCO (DBlock f (BlockCO n BNil)) + fwdEntryFact n f = mapSingleton (entryLabel n) f + bwdEntryFact lat n fb = getFact lat (entryLabel n) fb + ftransfer (FwdTransfer3 (ft, _, _)) n f = ft n f + btransfer (BwdTransfer3 (bt, _, _)) n f = bt n f + frewrite (FwdRewrite3 (fr, _, _)) n f = fr n f + brewrite (BwdRewrite3 (br, _, _)) n f = br n f + fwdEntryLabel n = JustC [entryLabel n] + +instance ShapeLifter O O where + singletonDG f = gUnitOO . DBlock f . BMiddle + fwdEntryFact _ f = f + bwdEntryFact _ _ f = f + ftransfer (FwdTransfer3 (_, ft, _)) n f = ft n f + btransfer (BwdTransfer3 (_, bt, _)) n f = bt n f + frewrite (FwdRewrite3 (_, fr, _)) n f = fr n f + brewrite (BwdRewrite3 (_, br, _)) n f = br n f + fwdEntryLabel _ = NothingC + +instance ShapeLifter O C where + singletonDG f n = gUnitOC (DBlock f (BlockOC BNil n)) + fwdEntryFact _ f = f + bwdEntryFact _ _ f = f + ftransfer (FwdTransfer3 (_, _, ft)) n f = ft n f + btransfer (BwdTransfer3 (_, _, bt)) n f = bt n f + frewrite (FwdRewrite3 (_, _, fr)) n f = fr n f + brewrite (BwdRewrite3 (_, _, br)) n f = br n f + fwdEntryLabel _ = NothingC + +{- +class ShapeLifter e x where + singletonDG :: f -> n e x -> DG f n e x + +instance ShapeLifter C O where + singletonDG f n = gUnitCO (DBlock f (BlockCO n BNil)) + +instance ShapeLifter O O where + singletonDG f = gUnitOO . DBlock f . BMiddle + +instance ShapeLifter O C where + singletonDG f n = gUnitOC (DBlock f (BlockOC BNil n)) +-} + +-- Fact lookup: the fact `orelse` bottom +getFact :: DataflowLattice f -> Label -> FactBase f -> f +getFact lat l fb = case lookupFact l fb of Just f -> f + Nothing -> fact_bot lat + + + +{- Note [Respects fuel] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-} +-- $fuel +-- A value of type 'FwdRewrite' or 'BwdRewrite' /respects fuel/ if +-- any function contained within the value satisfies the following properties: +-- +-- * When fuel is exhausted, it always returns 'Nothing'. +-- +-- * When it returns @Just g rw@, it consumes /exactly/ one unit +-- of fuel, and new rewrite 'rw' also respects fuel. +-- +-- Provided that functions passed to 'mkFRewrite', 'mkFRewrite3', +-- 'mkBRewrite', and 'mkBRewrite3' are not aware of the fuel supply, +-- the results respect fuel. +-- +-- It is an /unchecked/ run-time error for the argument passed to 'wrapFR', +-- 'wrapFR2', 'wrapBR', or 'warpBR2' to return a function that does not respect fuel. + +-- ----------------------------------------------------------------------------- +-- a Heap of Int + +-- We should really use a proper Heap here, but my attempts to make +-- one have not succeeded in beating the simple ordered list. Another +-- alternative is IntSet (using deleteFindMin), but that was also +-- slower than the ordered list in my experiments --SDM 25/1/2012 + +type IntHeap = [Int] -- ordered + +insertIntHeap :: Int -> [Int] -> [Int] +insertIntHeap x [] = [x] +insertIntHeap x (y:ys) + | x < y = x : y : ys + | x == y = x : ys + | otherwise = y : insertIntHeap x ys diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs index 04586b1029..797b785de2 100644 --- a/compiler/cmm/MkGraph.hs +++ b/compiler/cmm/MkGraph.hs @@ -1,29 +1,19 @@ {-# LANGUAGE GADTs #-} --- ToDo: remove -fno-warn-warnings-deprecations -{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-} --- ToDo: remove -fno-warn-incomplete-patterns -{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} - --- Module for building CmmAGraphs. - --- As the CmmAGraph is a wrapper over Graph CmmNode O x, it is different --- from Hoopl's AGraph. The current clients expect functions with the --- same names Hoopl uses, so this module cannot be in the same namespace --- as Compiler.Hoopl. - module MkGraph - ( CmmAGraph - , emptyAGraph, (<*>), catAGraphs, outOfLine - , mkLabel, mkMiddle, mkLast - , withFreshLabel, withUnique, lgraphOfAGraph, labelAGraph + ( CmmAGraph, CgStmt(..) + , (<*>), catAGraphs + , mkLabel, mkMiddle, mkLast, outOfLine + , lgraphOfAGraph, labelAGraph , stackStubExpr - , mkNop, mkAssign, mkStore, mkCall, mkCmmCall, mkSafeCall, mkUnsafeCall, mkFinalCall - , mkJump, mkDirectJump, mkForeignJump, mkJumpGC, mkCbranch, mkSwitch - , mkReturn, mkReturnSimple, mkComment, mkCallEntry - , mkBranch, mkCmmIfThenElse, mkCmmIfThen, mkCmmWhileDo - , copyInOflow, copyInSlot, copyOutOflow, copyOutSlot + , mkNop, mkAssign, mkStore, mkUnsafeCall, mkFinalCall, mkCallReturnsTo + , mkJump, mkDirectJump, mkForeignJump, mkForeignJumpExtra, mkJumpGC + , mkCbranch, mkSwitch + , mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch + , copyInOflow, copyOutOflow + , noExtraStack + , toCall, Transfer(..) ) where @@ -31,250 +21,232 @@ import BlockId import Cmm import CmmCallConv (assignArgumentsPos, ParamLocation(..)) + import Compiler.Hoopl hiding (Unique, (<*>), mkFirst, mkMiddle, mkLast, mkLabel, mkBranch, Shape(..)) -import qualified Compiler.Hoopl as H -import Compiler.Hoopl.GHC (uniqueToLbl) import FastString import ForeignCall import Outputable import Prelude hiding (succ) import SMRep (ByteOff) -import StaticFlags -import Unique import UniqSupply -import Util +import OrdList #include "HsVersions.h" -{- -A 'CmmAGraph' is an abstract version of a 'Graph CmmNode O x' from module -'Cmm'. The difference is that the 'CmmAGraph' can be eigher open of closed at -exit and it can supply fresh Labels and Uniques. - -It also supports a splicing operation <*>, which is different from the Hoopl's -<*>, because it splices two CmmAGraphs. Specifically, it can splice Graph -O C and Graph O x. In this case, the open beginning of the second graph is -thrown away. In the debug mode this sequence is checked to be empty or -containing a branch (see note [Branch follows branch]). - -When an CmmAGraph open at exit is being converted to a CmmGraph, the output -exit sequence is considered unreachable. If the graph consist of one block -only, if it not the case and we crash. Otherwise we just throw the exit -sequence away (and in debug mode we test that it really was unreachable). --} - -{- -Node [Branch follows branch] -============================ -Why do we say it's ok for a Branch to follow a Branch? -Because the standard constructor mkLabel has fall-through -semantics. So if you do a mkLabel, you finish the current block, -giving it a label, and start a new one that branches to that label. -Emitting a Branch at this point is fine: - goto L1; L2: ...stuff... --} - -data CmmGraphOC = Opened (Graph CmmNode O O) - | Closed (Graph CmmNode O C) -type CmmAGraph = UniqSM CmmGraphOC -- Graph open at entry - -{- -MS: I began with - newtype CmmAGraph = forall x. AG (UniqSM (Graph CmmNode O x)) -but that does not work well, because we cannot take the graph -out of the monad -- we do not know the type of what we would take -out and pattern matching does not help, as we cannot pattern match -on a graph inside the monad. --} -data Transfer = Call | Jump | Ret deriving Eq +----------------------------------------------------------------------------- +-- Building Graphs + + +-- | CmmAGraph is a chunk of code consisting of: +-- +-- * ordinary statements (assignments, stores etc.) +-- * jumps +-- * labels +-- * out-of-line labelled blocks +-- +-- The semantics is that control falls through labels and out-of-line +-- blocks. Everything after a jump up to the next label is by +-- definition unreachable code, and will be discarded. +-- +-- Two CmmAGraphs can be stuck together with <*>, with the meaning that +-- control flows from the first to the second. +-- +-- A 'CmmAGraph' can be turned into a 'CmmGraph' (closed at both ends) +-- by providing a label for the entry point; see 'labelAGraph'. +-- +type CmmAGraph = OrdList CgStmt + +data CgStmt + = CgLabel BlockId + | CgStmt (CmmNode O O) + | CgLast (CmmNode O C) + | CgFork BlockId CmmAGraph + +flattenCmmAGraph :: BlockId -> CmmAGraph -> CmmGraph +flattenCmmAGraph id stmts = + CmmGraph { g_entry = id, + g_graph = GMany NothingO body NothingO } + where + (block, blocks) = flatten (fromOL stmts) + entry = blockJoinHead (CmmEntry id) block + body = foldr addBlock emptyBody (entry:blocks) + + flatten :: [CgStmt] -> (Block CmmNode O C, [Block CmmNode C C]) + flatten [] = panic "flatten []" + + -- A label at the end of a function or fork: this label must not be reachable, + -- but it might be referred to from another BB that also isn't reachable. + -- Eliminating these has to be done with a dead-code analysis. For now, + -- we just make it into a well-formed block by adding a recursive jump. + flatten [CgLabel id] + = (goto_id, [blockJoinHead (CmmEntry id) goto_id] ) + where goto_id = blockJoinTail emptyBlock (CmmBranch id) + + -- A jump/branch: throw away all the code up to the next label, because + -- it is unreachable. Be careful to keep forks that we find on the way. + flatten (CgLast stmt : stmts) + = case dropWhile isOrdinaryStmt stmts of + [] -> + ( sing, [] ) + [CgLabel id] -> + ( sing, [blockJoin (CmmEntry id) emptyBlock (CmmBranch id)] ) + (CgLabel id : stmts) -> + ( sing, blockJoinHead (CmmEntry id) block : blocks ) + where (block,blocks) = flatten stmts + (CgFork fork_id stmts : ss) -> + flatten (CgFork fork_id stmts : CgLast stmt : ss) + _ -> panic "MkGraph.flatten" + where + sing = blockJoinTail emptyBlock stmt + + flatten (s:ss) = + case s of + CgStmt stmt -> (blockCons stmt block, blocks) + CgLabel id -> (blockJoinTail emptyBlock (CmmBranch id), + blockJoinHead (CmmEntry id) block : blocks) + CgFork fork_id stmts -> + (block, blockJoinHead (CmmEntry fork_id) fork_block : fork_blocks ++ blocks) + where (fork_block, fork_blocks) = flatten (fromOL stmts) + _ -> panic "MkGraph.flatten" + where (block,blocks) = flatten ss + +isOrdinaryStmt :: CgStmt -> Bool +isOrdinaryStmt (CgStmt _) = True +isOrdinaryStmt (CgLast _) = True +isOrdinaryStmt _ = False + + ---------- AGraph manipulation -emptyAGraph :: CmmAGraph (<*>) :: CmmAGraph -> CmmAGraph -> CmmAGraph +(<*>) = appOL + catAGraphs :: [CmmAGraph] -> CmmAGraph +catAGraphs = concatOL + +-- | created a sequence "goto id; id:" as an AGraph +mkLabel :: BlockId -> CmmAGraph +mkLabel bid = unitOL (CgLabel bid) + +-- | creates an open AGraph from a given node +mkMiddle :: CmmNode O O -> CmmAGraph +mkMiddle middle = unitOL (CgStmt middle) -mkLabel :: BlockId -> CmmAGraph -- created a sequence "goto id; id:" as an AGraph -mkMiddle :: CmmNode O O -> CmmAGraph -- creates an open AGraph from a given node -mkLast :: CmmNode O C -> CmmAGraph -- created a closed AGraph from a given node +-- | created a closed AGraph from a given node +mkLast :: CmmNode O C -> CmmAGraph +mkLast last = unitOL (CgLast last) -withFreshLabel :: String -> (BlockId -> CmmAGraph) -> CmmAGraph -withUnique :: (Unique -> CmmAGraph) -> CmmAGraph +-- | A labelled code block; should end in a last node +outOfLine :: BlockId -> CmmAGraph -> CmmAGraph +outOfLine l g = unitOL (CgFork l g) +-- | allocate a fresh label for the entry point lgraphOfAGraph :: CmmAGraph -> UniqSM CmmGraph - -- ^ allocate a fresh label for the entry point +lgraphOfAGraph g = do u <- getUniqueM + return (flattenCmmAGraph (mkBlockId u) g) + +-- | use the given BlockId as the label of the entry point labelAGraph :: BlockId -> CmmAGraph -> UniqSM CmmGraph - -- ^ use the given BlockId as the label of the entry point +labelAGraph lbl ag = return (flattenCmmAGraph lbl ag) ---------- No-ops mkNop :: CmmAGraph +mkNop = nilOL + mkComment :: FastString -> CmmAGraph +#ifdef DEBUG +-- SDM: generating all those comments takes time, this saved about 4% for me +mkComment fs = mkMiddle $ CmmComment fs +#else +mkComment _ = nilOL +#endif ---------- Assignment and store mkAssign :: CmmReg -> CmmExpr -> CmmAGraph -mkStore :: CmmExpr -> CmmExpr -> CmmAGraph +mkAssign l r = mkMiddle $ CmmAssign l r ----------- Calls -mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual] -> - UpdFrameOffset -> CmmAGraph -mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] -> - UpdFrameOffset -> CmmAGraph - -- Native C-- calling convention -mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset -> Bool -> CmmAGraph -mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph -mkFinalCall :: CmmExpr -> CCallConv -> [CmmActual] -> UpdFrameOffset -> CmmAGraph - -- Never returns; like exit() or barf() +mkStore :: CmmExpr -> CmmExpr -> CmmAGraph +mkStore l r = mkMiddle $ CmmStore l r ---------- Control transfer -mkJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkDirectJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkJumpGC :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph -mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph -mkSwitch :: CmmExpr -> [Maybe BlockId] -> CmmAGraph +mkJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkJump e actuals updfr_off = + lastWithArgs Jump Old NativeNodeCall actuals updfr_off $ + toCall e Nothing updfr_off 0 + +mkDirectJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkDirectJump e actuals updfr_off = + lastWithArgs Jump Old NativeDirectCall actuals updfr_off $ + toCall e Nothing updfr_off 0 + +mkJumpGC :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkJumpGC e actuals updfr_off = + lastWithArgs Jump Old GC actuals updfr_off $ + toCall e Nothing updfr_off 0 + +mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset + -> CmmAGraph +mkForeignJump conv e actuals updfr_off = + mkForeignJumpExtra conv e actuals updfr_off noExtraStack + +mkForeignJumpExtra :: Convention -> CmmExpr -> [CmmActual] + -> UpdFrameOffset -> (ByteOff, [(CmmExpr, ByteOff)]) + -> CmmAGraph +mkForeignJumpExtra conv e actuals updfr_off extra_stack = + lastWithArgsAndExtraStack Jump Old conv actuals updfr_off extra_stack $ + toCall e Nothing updfr_off 0 + +mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph +mkCbranch pred ifso ifnot = mkLast (CmmCondBranch pred ifso ifnot) + +mkSwitch :: CmmExpr -> [Maybe BlockId] -> CmmAGraph +mkSwitch e tbl = mkLast $ CmmSwitch e tbl + mkReturn :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkReturn e actuals updfr_off = + lastWithArgs Ret Old NativeReturn actuals updfr_off $ + toCall e Nothing updfr_off 0 + mkReturnSimple :: [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkReturnSimple actuals updfr_off = + mkReturn e actuals updfr_off + where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord mkBranch :: BlockId -> CmmAGraph -mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> CmmAGraph -mkCmmIfThen :: CmmExpr -> CmmAGraph -> CmmAGraph -mkCmmWhileDo :: CmmExpr -> CmmAGraph -> CmmAGraph - -outOfLine :: CmmAGraph -> CmmAGraph --- ^ The argument is an CmmAGraph that must have an --- empty entry sequence and be closed at the end. --- The result is a new CmmAGraph that is open at the --- end and goes directly from entry to exit, with the --- original graph sitting to the side out-of-line. --- --- Example: mkMiddle (x = 3) --- <*> outOfLine (mkLabel L <*> ...stuff...) --- <*> mkMiddle (y = x) --- Control will flow directly from x=3 to y=x; --- the block starting with L is "on the side". --- --- N.B. algebraically forall g g' : g <*> outOfLine g' == outOfLine g' <*> g +mkBranch bid = mkLast (CmmBranch bid) + +mkFinalCall :: CmmExpr -> CCallConv -> [CmmActual] -> UpdFrameOffset + -> CmmAGraph +mkFinalCall f _ actuals updfr_off = + lastWithArgs Call Old NativeDirectCall actuals updfr_off $ + toCall f Nothing updfr_off 0 + +mkCallReturnsTo :: CmmExpr -> Convention -> [CmmActual] + -> BlockId + -> ByteOff + -> UpdFrameOffset + -> (ByteOff, [(CmmExpr,ByteOff)]) + -> CmmAGraph +mkCallReturnsTo f callConv actuals ret_lbl ret_off updfr_off extra_stack = do + lastWithArgsAndExtraStack Call (Young ret_lbl) callConv actuals + updfr_off extra_stack $ + toCall f (Just ret_lbl) updfr_off ret_off + +mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph +mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as + -------------------------------------------------------------------------- --- ================ IMPLEMENTATION ================-- - --------------------------------------------------- --- Raw CmmAGraph handling - -emptyAGraph = return $ Opened emptyGraph -ag <*> ah = do g <- ag - h <- ah - return (case (g, h) of - (Opened g, Opened h) -> Opened $ g H.<*> h - (Opened g, Closed h) -> Closed $ g H.<*> h - (Closed g, Opened GNil) -> Closed g - (Closed g, Opened (GUnit e)) -> note_unreachable e $ Closed g - (Closed g, Opened (GMany (JustO e) b x)) -> note_unreachable e $ Opened $ g H.|*><*| GMany NothingO b x - (Closed g, Closed (GMany (JustO e) b x)) -> note_unreachable e $ Closed $ g H.|*><*| GMany NothingO b x - :: CmmGraphOC) -catAGraphs = foldl (<*>) emptyAGraph - -outOfLine ag = withFreshLabel "outOfLine" $ \l -> - do g <- ag - return (case g of - Closed (GMany (JustO e) b _) -> note_unreachable e $ Opened $ - GMany (JustO $ BLast $ CmmBranch l) b (JustO $ BFirst $ CmmEntry l) - _ -> panic "outOfLine" - :: CmmGraphOC) - -note_unreachable :: Block CmmNode O x -> a -> a -note_unreachable block graph = - ASSERT (block_is_empty_or_label) -- Note [Branch follows branch] - graph - where block_is_empty_or_label :: Bool - block_is_empty_or_label = case blockToNodeList block of - (NothingC, [], NothingC) -> True - (NothingC, [], JustC (CmmBranch _)) -> True - _ -> False - -mkLabel bid = return $ Opened $ H.mkLast (CmmBranch bid) |*><*| H.mkFirst (CmmEntry bid) -mkMiddle middle = return $ Opened $ H.mkMiddle middle -mkLast last = return $ Closed $ H.mkLast last - -withUnique f = getUniqueM >>= f -withFreshLabel _name f = getUniqueM >>= f . uniqueToLbl . intToUnique . getKey -lgraphOfAGraph g = do u <- getUniqueM - labelAGraph (mkBlockId u) g - -labelAGraph lbl ag = do g <- ag - return $ CmmGraph {g_entry=lbl, g_graph=H.mkFirst (CmmEntry lbl) H.<*> closed g} - where closed :: CmmGraphOC -> Graph CmmNode O C - closed (Closed g) = g - closed (Opened g@(GMany entry body (JustO exit))) = - ASSERT (entryLabel exit `notElem` map entryLabel (postorder_dfs g)) - GMany entry body NothingO - closed (Opened _) = panic "labelAGraph" - --------------------------------------------------- --- CmmAGraph constructions - -mkNop = emptyAGraph -mkComment fs = mkMiddle $ CmmComment fs -mkStore l r = mkMiddle $ CmmStore l r - --- NEED A COMPILER-DEBUGGING FLAG HERE --- Sanity check: any value assigned to a pointer must be non-zero. --- If it's 0, cause a crash immediately. -mkAssign l r = if opt_StubDeadValues then assign l r <*> check l else assign l r - where assign l r = mkMiddle (CmmAssign l r) - check (CmmGlobal _) = mkNop - check l@(CmmLocal reg) = -- if a ptr arg is NULL, cause a crash! - if isGcPtrType ty then - mkCmmIfThen (CmmMachOp (MO_Eq w) [r, stackStubExpr w]) - (assign l (CmmLoad (CmmLit (CmmInt 0 w)) ty)) - else mkNop - where ty = localRegType reg - w = typeWidth ty - r = CmmReg l -- Why are we inserting extra blocks that simply branch to the successors? -- Because in addition to the branch instruction, @mkBranch@ will insert -- a necessary adjustment to the stack pointer. -mkCbranch pred ifso ifnot = mkLast (CmmCondBranch pred ifso ifnot) -mkSwitch e tbl = mkLast $ CmmSwitch e tbl - -mkSafeCall t fs as upd i = withFreshLabel "safe call" $ body - where - body k = - ( mkStore (CmmStackSlot (CallArea (Young k)) (widthInBytes wordWidth)) - (CmmLit (CmmBlock k)) - <*> mkLast (CmmForeignCall {tgt=t, res=fs, args=as, succ=k, updfr=upd, intrbl=i}) - <*> mkLabel k) -mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as -mkBranch bid = mkLast (CmmBranch bid) - -mkCmmIfThenElse e tbranch fbranch = - withFreshLabel "end of if" $ \endif -> - withFreshLabel "start of then" $ \tid -> - withFreshLabel "start of else" $ \fid -> - mkCbranch e tid fid <*> - mkLabel tid <*> tbranch <*> mkBranch endif <*> - mkLabel fid <*> fbranch <*> mkLabel endif - -mkCmmIfThen e tbranch - = withFreshLabel "end of if" $ \endif -> - withFreshLabel "start of then" $ \tid -> - mkCbranch e tid endif <*> - mkLabel tid <*> tbranch <*> mkLabel endif - -mkCmmWhileDo e body = - withFreshLabel "loop test" $ \test -> - withFreshLabel "loop head" $ \head -> - withFreshLabel "end while" $ \endwhile -> - -- Forrest Baskett's while-loop layout - mkBranch test <*> mkLabel head <*> body - <*> mkLabel test <*> mkCbranch e head endwhile - <*> mkLabel endwhile -- For debugging purposes, we can stub out dead stack slots: stackStubExpr :: Width -> CmmExpr @@ -286,12 +258,9 @@ stackStubExpr w = CmmLit (CmmInt 0 w) -- Therefore, for copying arguments and results, we provide different -- functions to pass the arguments in an overflow area and to pass them in spill slots. copyInOflow :: Convention -> Area -> [CmmFormal] -> (Int, CmmAGraph) -copyInSlot :: Convention -> [CmmFormal] -> [CmmNode O O] -copyOutSlot :: Convention -> [LocalReg] -> [CmmNode O O] copyInOflow conv area formals = (offset, catAGraphs $ map mkMiddle nodes) where (offset, nodes) = copyIn oneCopyOflowI conv area formals -copyInSlot c f = snd $ copyIn oneCopySlotI c (panic "no area for copying to slots") f type SlotCopier = Area -> (LocalReg, ByteOff) -> (ByteOff, [CmmNode O O]) -> (ByteOff, [CmmNode O O]) @@ -312,104 +281,86 @@ copyIn oflow conv area formals = adjust rst x@(_, RegisterParam _) = x : rst -- Copy-in one arg, using overflow space if needed. -oneCopyOflowI, oneCopySlotI :: SlotCopier +oneCopyOflowI :: SlotCopier oneCopyOflowI area (reg, off) (n, ms) = (max n off, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty) : ms) where ty = localRegType reg --- Copy-in one arg, using spill slots if needed -- used for calling conventions at --- a procpoint that is not a return point. The offset is irrelevant here... -oneCopySlotI _ (reg, _) (n, ms) = - (n, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg) w) ty) : ms) - where ty = localRegType reg - w = widthInBytes (typeWidth ty) - - -- Factoring out the common parts of the copyout functions yielded something -- more complicated: -copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] -> UpdFrameOffset -> - (Int, CmmAGraph) +data Transfer = Call | Jump | Ret deriving Eq + +copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] + -> UpdFrameOffset + -> (ByteOff, [(CmmExpr,ByteOff)]) -- extra stack stuff + -> (Int, CmmAGraph) + -- Generate code to move the actual parameters into the locations --- required by the calling convention. This includes a store for the return address. +-- required by the calling convention. This includes a store for the +-- return address. -- --- The argument layout function ignores the pointer to the info table, so we slot that --- in here. When copying-out to a young area, we set the info table for return --- and adjust the offsets of the other parameters. --- If this is a call instruction, we adjust the offsets of the other parameters. -copyOutOflow conv transfer area@(CallArea a) actuals updfr_off - = foldr co (init_offset, emptyAGraph) args' +-- The argument layout function ignores the pointer to the info table, +-- so we slot that in here. When copying-out to a young area, we set +-- the info table for return and adjust the offsets of the other +-- parameters. If this is a call instruction, we adjust the offsets +-- of the other parameters. +copyOutOflow conv transfer area actuals updfr_off + (extra_stack_off, extra_stack_stuff) + = foldr co (init_offset, mkNop) (args' ++ stack_params) where co (v, RegisterParam r) (n, ms) = (n, mkAssign (CmmGlobal r) v <*> ms) co (v, StackParam off) (n, ms) = (max n off, mkStore (CmmStackSlot area off) v <*> ms) + stack_params = [ (e, StackParam (off + init_offset)) + | (e,off) <- extra_stack_stuff ] + (setRA, init_offset) = - case a of Young id -> id `seq` -- Generate a store instruction for - -- the return address if making a call + case area of + Young id -> id `seq` -- Generate a store instruction for + -- the return address if making a call if transfer == Call then ([(CmmLit (CmmBlock id), StackParam init_offset)], widthInBytes wordWidth) else ([], 0) - Old -> ([], updfr_off) + Old -> ([], updfr_off) + + arg_offset = init_offset + extra_stack_off args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it args = assignArgumentsPos conv cmmExprType actuals args' = foldl adjust setRA args - where adjust rst (v, StackParam off) = (v, StackParam (off + init_offset)) : rst + where adjust rst (v, StackParam off) = (v, StackParam (off + arg_offset)) : rst adjust rst x@(_, RegisterParam _) = x : rst -copyOutOflow _ _ (RegSlot _) _ _ = panic "cannot copy arguments into a register slot" --- Args passed only in registers and stack slots; no overflow space. --- No return address may apply! -copyOutSlot conv actuals = foldr co [] args - where co (v, RegisterParam r) ms = CmmAssign (CmmGlobal r) (toExp v) : ms - co (v, StackParam off) ms = CmmStore (CmmStackSlot (RegSlot v) off) (toExp v) : ms - toExp r = CmmReg (CmmLocal r) - args = assignArgumentsPos conv localRegType actuals mkCallEntry :: Convention -> [CmmFormal] -> (Int, CmmAGraph) -mkCallEntry conv formals = copyInOflow conv (CallArea Old) formals +mkCallEntry conv formals = copyInOflow conv Old formals -lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] -> UpdFrameOffset -> - (ByteOff -> CmmAGraph) -> CmmAGraph +lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] + -> UpdFrameOffset + -> (ByteOff -> CmmAGraph) + -> CmmAGraph lastWithArgs transfer area conv actuals updfr_off last = - let (outArgs, copies) = copyOutOflow conv transfer area actuals updfr_off in + lastWithArgsAndExtraStack transfer area conv actuals + updfr_off noExtraStack last + +lastWithArgsAndExtraStack :: Transfer -> Area -> Convention -> [CmmActual] + -> UpdFrameOffset -> (ByteOff, [(CmmExpr,ByteOff)]) + -> (ByteOff -> CmmAGraph) + -> CmmAGraph +lastWithArgsAndExtraStack transfer area conv actuals updfr_off + extra_stack last = + let (outArgs, copies) = copyOutOflow conv transfer area actuals + updfr_off extra_stack in copies <*> last outArgs --- The area created for the jump and return arguments is the same area as the --- procedure entry. -old :: Area -old = CallArea Old -toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff -> CmmAGraph +noExtraStack :: (ByteOff, [(CmmExpr,ByteOff)]) +noExtraStack = (0,[]) + +toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff + -> CmmAGraph toCall e cont updfr_off res_space arg_space = mkLast $ CmmCall e cont arg_space res_space updfr_off -mkJump e actuals updfr_off = - lastWithArgs Jump old NativeNodeCall actuals updfr_off $ toCall e Nothing updfr_off 0 -mkDirectJump e actuals updfr_off = - lastWithArgs Jump old NativeDirectCall actuals updfr_off $ toCall e Nothing updfr_off 0 -mkJumpGC e actuals updfr_off = - lastWithArgs Jump old GC actuals updfr_off $ toCall e Nothing updfr_off 0 -mkForeignJump conv e actuals updfr_off = - lastWithArgs Jump old conv actuals updfr_off $ toCall e Nothing updfr_off 0 -mkReturn e actuals updfr_off = - lastWithArgs Ret old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0 - -- where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord -mkReturnSimple actuals updfr_off = - lastWithArgs Ret old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0 - where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord - -mkFinalCall f _ actuals updfr_off = - lastWithArgs Call old NativeDirectCall actuals updfr_off $ toCall f Nothing updfr_off 0 - -mkCmmCall f results actuals = mkCall f (NativeDirectCall, NativeReturn) results actuals - --- I'm dropping the SRT, but that should be okay: we plan to reconstruct it later. -mkCall f (callConv, retConv) results actuals updfr_off = - withFreshLabel "call successor" $ \k -> - let area = CallArea $ Young k - (off, copyin) = copyInOflow retConv area results - copyout = lastWithArgs Call area callConv actuals updfr_off - (toCall f (Just k) updfr_off off) - in (copyout <*> mkLabel k <*> copyin) diff --git a/compiler/cmm/OldCmm.hs b/compiler/cmm/OldCmm.hs index fc4706c8c4..00bbe6d2ee 100644 --- a/compiler/cmm/OldCmm.hs +++ b/compiler/cmm/OldCmm.hs @@ -9,9 +9,7 @@ module OldCmm ( CmmGroup, GenCmmGroup, RawCmmGroup, CmmDecl, RawCmmDecl, ListGraph(..), - - CmmInfo(..), CmmInfoTable(..), ClosureTypeInfo(..), UpdateFrame(..), - + UpdateFrame(..), CmmInfoTable(..), ClosureTypeInfo(..), CmmStatic(..), CmmStatics(..), CmmFormal, CmmActual, cmmMapGraph, cmmTopMapGraph, @@ -52,13 +50,6 @@ import ForeignCall -- Info Tables ----------------------------------------------------------------------------- -data CmmInfo - = CmmInfo - (Maybe BlockId) -- GC target. Nothing <=> CPS won't do stack check - -- JD: NOT USED BY NEW CODE GEN - (Maybe UpdateFrame) -- Update frame - CmmInfoTable -- Info table - -- | A frame that is to be pushed before entry to the function. -- Used to handle 'update' frames. data UpdateFrame @@ -85,8 +76,8 @@ data UpdateFrame newtype ListGraph i = ListGraph [GenBasicBlock i] -- | Cmm with the info table as a data type -type CmmGroup = GenCmmGroup CmmStatics CmmInfo (ListGraph CmmStmt) -type CmmDecl = GenCmmDecl CmmStatics CmmInfo (ListGraph CmmStmt) +type CmmGroup = GenCmmGroup CmmStatics CmmInfoTable (ListGraph CmmStmt) +type CmmDecl = GenCmmDecl CmmStatics CmmInfoTable (ListGraph CmmStmt) -- | Cmm with the info tables converted to a list of 'CmmStatic' along with the info -- table label. If we are building without tables-next-to-code there will be no statics @@ -225,16 +216,9 @@ instance UserOfLocalRegs CmmCallTarget where foldRegsUsed f set (CmmCallee e _) = foldRegsUsed f set e foldRegsUsed f set (CmmPrim _ mStmts) = foldRegsUsed f set mStmts -instance UserOfSlots CmmCallTarget where - foldSlotsUsed f set (CmmCallee e _) = foldSlotsUsed f set e - foldSlotsUsed _ set (CmmPrim {}) = set - instance UserOfLocalRegs a => UserOfLocalRegs (CmmHinted a) where foldRegsUsed f set a = foldRegsUsed f set (hintlessCmm a) -instance UserOfSlots a => UserOfSlots (CmmHinted a) where - foldSlotsUsed f set a = foldSlotsUsed f set (hintlessCmm a) - instance DefinerOfLocalRegs a => DefinerOfLocalRegs (CmmHinted a) where foldRegsDefd f set a = foldRegsDefd f set (hintlessCmm a) diff --git a/compiler/cmm/OldCmmLint.hs b/compiler/cmm/OldCmmLint.hs new file mode 100644 index 0000000000..72e40ce4f8 --- /dev/null +++ b/compiler/cmm/OldCmmLint.hs @@ -0,0 +1,209 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow 2004-2006 +-- +-- CmmLint: checking the correctness of Cmm statements and expressions +-- +----------------------------------------------------------------------------- + +{-# OPTIONS -fno-warn-tabs #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and +-- detab the module (please do the detabbing in a separate patch). See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces +-- for details + +module OldCmmLint ( + cmmLint, cmmLintTop + ) where + +import BlockId +import OldCmm +import CLabel +import Outputable +import OldPprCmm() +import Constants +import FastString +import Platform + +import Data.Maybe + +-- ----------------------------------------------------------------------------- +-- Exported entry points: + +cmmLint :: (Outputable d, Outputable h) + => Platform -> GenCmmGroup d h (ListGraph CmmStmt) -> Maybe SDoc +cmmLint platform tops = runCmmLint platform (mapM_ (lintCmmDecl platform)) tops + +cmmLintTop :: (Outputable d, Outputable h) + => Platform -> GenCmmDecl d h (ListGraph CmmStmt) -> Maybe SDoc +cmmLintTop platform top = runCmmLint platform (lintCmmDecl platform) top + +runCmmLint :: Outputable a + => Platform -> (a -> CmmLint b) -> a -> Maybe SDoc +runCmmLint _ l p = + case unCL (l p) of + Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"), + nest 2 err, + ptext $ sLit ("Program was:"), + nest 2 (ppr p)]) + Right _ -> Nothing + +lintCmmDecl :: Platform -> (GenCmmDecl h i (ListGraph CmmStmt)) -> CmmLint () +lintCmmDecl platform (CmmProc _ lbl (ListGraph blocks)) + = addLintInfo (text "in proc " <> pprCLabel platform lbl) $ + let labels = foldl (\s b -> setInsert (blockId b) s) setEmpty blocks + in mapM_ (lintCmmBlock platform labels) blocks + +lintCmmDecl _ (CmmData {}) + = return () + +lintCmmBlock :: Platform -> BlockSet -> GenBasicBlock CmmStmt -> CmmLint () +lintCmmBlock platform labels (BasicBlock id stmts) + = addLintInfo (text "in basic block " <> ppr id) $ + mapM_ (lintCmmStmt platform labels) stmts + +-- ----------------------------------------------------------------------------- +-- lintCmmExpr + +-- Checks whether a CmmExpr is "type-correct", and check for obvious-looking +-- byte/word mismatches. + +lintCmmExpr :: Platform -> CmmExpr -> CmmLint CmmType +lintCmmExpr platform (CmmLoad expr rep) = do + _ <- lintCmmExpr platform expr + -- Disabled, if we have the inlining phase before the lint phase, + -- we can have funny offsets due to pointer tagging. -- EZY + -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $ + -- cmmCheckWordAddress expr + return rep +lintCmmExpr platform expr@(CmmMachOp op args) = do + tys <- mapM (lintCmmExpr platform) args + if map (typeWidth . cmmExprType) args == machOpArgReps op + then cmmCheckMachOp op args tys + else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op) +lintCmmExpr platform (CmmRegOff reg offset) + = lintCmmExpr platform (CmmMachOp (MO_Add rep) + [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)]) + where rep = typeWidth (cmmRegType reg) +lintCmmExpr _ expr = + return (cmmExprType expr) + +-- Check for some common byte/word mismatches (eg. Sp + 1) +cmmCheckMachOp :: MachOp -> [CmmExpr] -> [CmmType] -> CmmLint CmmType +cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] tys + = cmmCheckMachOp op [reg, lit] tys +cmmCheckMachOp op _ tys + = return (machOpResultType op tys) + +isOffsetOp :: MachOp -> Bool +isOffsetOp (MO_Add _) = True +isOffsetOp (MO_Sub _) = True +isOffsetOp _ = False + +-- This expression should be an address from which a word can be loaded: +-- check for funny-looking sub-word offsets. +_cmmCheckWordAddress :: CmmExpr -> CmmLint () +_cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)]) + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 + = cmmLintDubiousWordOffset e +_cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg]) + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral wORD_SIZE /= 0 + = cmmLintDubiousWordOffset e +_cmmCheckWordAddress _ + = return () + +-- No warnings for unaligned arithmetic with the node register, +-- which is used to extract fields from tagged constructor closures. +notNodeReg :: CmmExpr -> Bool +notNodeReg (CmmReg reg) | reg == nodeReg = False +notNodeReg _ = True + +lintCmmStmt :: Platform -> BlockSet -> CmmStmt -> CmmLint () +lintCmmStmt platform labels = lint + where lint (CmmNop) = return () + lint (CmmComment {}) = return () + lint stmt@(CmmAssign reg expr) = do + erep <- lintCmmExpr platform expr + let reg_ty = cmmRegType reg + if (erep `cmmEqType_ignoring_ptrhood` reg_ty) + then return () + else cmmLintAssignErr stmt erep reg_ty + lint (CmmStore l r) = do + _ <- lintCmmExpr platform l + _ <- lintCmmExpr platform r + return () + lint (CmmCall target _res args _) = + do lintTarget platform labels target + mapM_ (lintCmmExpr platform . hintlessCmm) args + lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr platform e >> checkCond e + lint (CmmSwitch e branches) = do + mapM_ checkTarget $ catMaybes branches + erep <- lintCmmExpr platform e + if (erep `cmmEqType_ignoring_ptrhood` bWord) + then return () + else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e <> + text " :: " <> ppr erep) + lint (CmmJump e _) = lintCmmExpr platform e >> return () + lint (CmmReturn) = return () + lint (CmmBranch id) = checkTarget id + checkTarget id = if setMember id labels then return () + else cmmLintErr (text "Branch to nonexistent id" <+> ppr id) + +lintTarget :: Platform -> BlockSet -> CmmCallTarget -> CmmLint () +lintTarget platform _ (CmmCallee e _) = do _ <- lintCmmExpr platform e + return () +lintTarget _ _ (CmmPrim _ Nothing) = return () +lintTarget platform labels (CmmPrim _ (Just stmts)) + = mapM_ (lintCmmStmt platform labels) stmts + + +checkCond :: CmmExpr -> CmmLint () +checkCond (CmmMachOp mop _) | isComparisonMachOp mop = return () +checkCond (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth = return () -- constant values +checkCond expr + = cmmLintErr (hang (text "expression is not a conditional:") 2 + (ppr expr)) + +-- ----------------------------------------------------------------------------- +-- CmmLint monad + +-- just a basic error monad: + +newtype CmmLint a = CmmLint { unCL :: Either SDoc a } + +instance Monad CmmLint where + CmmLint m >>= k = CmmLint $ case m of + Left e -> Left e + Right a -> unCL (k a) + return a = CmmLint (Right a) + +cmmLintErr :: SDoc -> CmmLint a +cmmLintErr msg = CmmLint (Left msg) + +addLintInfo :: SDoc -> CmmLint a -> CmmLint a +addLintInfo info thing = CmmLint $ + case unCL thing of + Left err -> Left (hang info 2 err) + Right a -> Right a + +cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a +cmmLintMachOpErr expr argsRep opExpectsRep + = cmmLintErr (text "in MachOp application: " $$ + nest 2 (ppr expr) $$ + (text "op is expecting: " <+> ppr opExpectsRep) $$ + (text "arguments provide: " <+> ppr argsRep)) + +cmmLintAssignErr :: CmmStmt -> CmmType -> CmmType -> CmmLint a +cmmLintAssignErr stmt e_ty r_ty + = cmmLintErr (text "in assignment: " $$ + nest 2 (vcat [ppr stmt, + text "Reg ty:" <+> ppr r_ty, + text "Rhs ty:" <+> ppr e_ty])) + + + +cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a +cmmLintDubiousWordOffset expr + = cmmLintErr (text "offset is not a multiple of words: " $$ + nest 2 (ppr expr)) diff --git a/compiler/cmm/OldPprCmm.hs b/compiler/cmm/OldPprCmm.hs index d6a12221fb..a30be9c6c7 100644 --- a/compiler/cmm/OldPprCmm.hs +++ b/compiler/cmm/OldPprCmm.hs @@ -63,10 +63,6 @@ instance Outputable instr => Outputable (GenBasicBlock instr) where instance Outputable CmmStmt where ppr s = pprStmt s -instance Outputable CmmInfo where - ppr i = pprInfo i - - -- -------------------------------------------------------------------------- instance Outputable CmmSafety where ppr CmmUnsafe = ptext (sLit "_unsafe_call_") @@ -74,21 +70,6 @@ instance Outputable CmmSafety where ppr (CmmSafe srt) = ppr srt -- -------------------------------------------------------------------------- --- Info tables. The current pretty printer needs refinement --- but will work for now. --- --- For ideas on how to refine it, they used to be printed in the --- style of C--'s 'stackdata' declaration, just inside the proc body, --- and were labelled with the procedure name ++ "_info". -pprInfo :: CmmInfo -> SDoc -pprInfo (CmmInfo _gc_target update_frame info_table) = - vcat [{-ptext (sLit "gc_target: ") <> - maybe (ptext (sLit "<none>")) ppr gc_target,-} - ptext (sLit "update_frame: ") <> - maybe (ptext (sLit "<none>")) pprUpdateFrame update_frame, - ppr info_table] - --- -------------------------------------------------------------------------- -- Basic blocks look like assembly blocks. -- lbl: stmt ; stmt ; .. pprBBlock :: Outputable stmt => GenBasicBlock stmt -> SDoc diff --git a/compiler/cmm/OptimizationFuel.hs b/compiler/cmm/OptimizationFuel.hs index a85b11bcc6..6e968c0b1d 100644 --- a/compiler/cmm/OptimizationFuel.hs +++ b/compiler/cmm/OptimizationFuel.hs @@ -8,10 +8,8 @@ module OptimizationFuel ( OptimizationFuel, amountOfFuel, tankFilledTo, unlimitedFuel, anyFuelLeft, oneLessFuel , OptFuelState, initOptFuelState - , FuelConsumer, FuelUsingMonad, FuelState - , fuelGet, fuelSet, lastFuelPass, setFuelPass - , fuelExhausted, fuelDec1, tryWithFuel - , runFuelIO, runInfiniteFuelIO, fuelConsumingPass + , FuelConsumer, FuelState + , runFuelIO, runInfiniteFuelIO , FuelUniqSM , liftUniq ) @@ -62,25 +60,20 @@ anyFuelLeft (OptimizationFuel f) = f > 0 oneLessFuel (OptimizationFuel f) = ASSERT (f > 0) (OptimizationFuel (f - 1)) unlimitedFuel = OptimizationFuel infiniteFuel -data FuelState = FuelState { fs_fuel :: OptimizationFuel, fs_lastpass :: String } -newtype FuelUniqSM a = FUSM { unFUSM :: FuelState -> UniqSM (a, FuelState) } - -fuelConsumingPass :: String -> FuelConsumer a -> FuelUniqSM a -fuelConsumingPass name f = do setFuelPass name - fuel <- fuelGet - let (a, fuel') = f fuel - fuelSet fuel' - return a +data FuelState = FuelState { fs_fuel :: {-# UNPACK #-} !OptimizationFuel, + fs_lastpass :: String } +newtype FuelUniqSM a = FUSM { unFUSM :: UniqSupply -> FuelState -> (# a, UniqSupply, FuelState #) } runFuelIO :: OptFuelState -> FuelUniqSM a -> IO a runFuelIO fs (FUSM f) = do pass <- readIORef (pass_ref fs) fuel <- readIORef (fuel_ref fs) u <- mkSplitUniqSupply 'u' - let (a, FuelState fuel' pass') = initUs_ u $ f (FuelState fuel pass) - writeIORef (pass_ref fs) pass' - writeIORef (fuel_ref fs) fuel' - return a + case f u (FuelState fuel pass) of + (# a, _, FuelState fuel' pass' #) -> do + writeIORef (pass_ref fs) pass' + writeIORef (fuel_ref fs) fuel' + return a -- ToDo: Do we need the pass_ref when we are doing infinite fueld -- transformations? @@ -88,21 +81,32 @@ runInfiniteFuelIO :: OptFuelState -> FuelUniqSM a -> IO a runInfiniteFuelIO fs (FUSM f) = do pass <- readIORef (pass_ref fs) u <- mkSplitUniqSupply 'u' - let (a, FuelState _ pass') = initUs_ u $ f (FuelState unlimitedFuel pass) - writeIORef (pass_ref fs) pass' - return a + case f u (FuelState unlimitedFuel pass) of + (# a, _, FuelState _fuel pass' #) -> do + writeIORef (pass_ref fs) pass' + return a instance Monad FuelUniqSM where - FUSM f >>= k = FUSM (\s -> f s >>= \(a, s') -> unFUSM (k a) s') - return a = FUSM (\s -> return (a, s)) + FUSM f >>= k = FUSM (\u s -> case f u s of (# a, u', s' #) -> + unFUSM (k a) u' s') + return a = FUSM (\u s -> (# a, u, s #)) instance MonadUnique FuelUniqSM where - getUniqueSupplyM = liftUniq getUniqueSupplyM - getUniqueM = liftUniq getUniqueM - getUniquesM = liftUniq getUniquesM + getUniqueSupplyM = + FUSM $ \us f -> case splitUniqSupply us of + (us1,us2) -> (# us1, us2, f #) + + getUniqueM = + FUSM $ \us f -> case splitUniqSupply us of + (us1,us2) -> (# uniqFromSupply us1, us2, f #) + + getUniquesM = + FUSM $ \us f -> case splitUniqSupply us of + (us1,us2) -> (# uniqsFromSupply us1, us2, f #) + liftUniq :: UniqSM x -> FuelUniqSM x -liftUniq x = FUSM (\s -> x >>= (\u -> return (u, s))) +liftUniq x = FUSM (\u s -> case initUs u x of (a,u') -> (# a, u', s #)) class Monad m => FuelUsingMonad m where fuelGet :: m OptimizationFuel @@ -110,25 +114,14 @@ class Monad m => FuelUsingMonad m where lastFuelPass :: m String setFuelPass :: String -> m () -fuelExhausted :: FuelUsingMonad m => m Bool -fuelExhausted = fuelGet >>= return . anyFuelLeft - -fuelDec1 :: FuelUsingMonad m => m () -fuelDec1 = fuelGet >>= fuelSet . oneLessFuel - -tryWithFuel :: FuelUsingMonad m => a -> m (Maybe a) -tryWithFuel r = do f <- fuelGet - if anyFuelLeft f then fuelSet (oneLessFuel f) >> return (Just r) - else return Nothing - instance FuelUsingMonad FuelUniqSM where fuelGet = extract fs_fuel lastFuelPass = extract fs_lastpass - fuelSet fuel = FUSM (\s -> return ((), s { fs_fuel = fuel })) - setFuelPass pass = FUSM (\s -> return ((), s { fs_lastpass = pass })) + fuelSet fuel = FUSM (\u s -> (# (), u, s { fs_fuel = fuel } #)) + setFuelPass pass = FUSM (\u s -> (# (), u, s { fs_lastpass = pass } #)) extract :: (FuelState -> a) -> FuelUniqSM a -extract f = FUSM (\s -> return (f s, s)) +extract f = FUSM (\u s -> (# f s, u, s #)) instance FuelMonad FuelUniqSM where getFuel = liftM amountOfFuel fuelGet @@ -137,6 +130,6 @@ instance FuelMonad FuelUniqSM where -- Don't bother to checkpoint the unique supply; it doesn't matter instance CheckpointMonad FuelUniqSM where type Checkpoint FuelUniqSM = FuelState - checkpoint = FUSM $ \fuel -> return (fuel, fuel) - restart fuel = FUSM $ \_ -> return ((), fuel) + checkpoint = FUSM $ \u fuel -> (# fuel, u, fuel #) + restart fuel = FUSM $ \u _ -> (# (), u, fuel #) diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs index 183708c08e..dee6ee881e 100644 --- a/compiler/cmm/PprCmm.hs +++ b/compiler/cmm/PprCmm.hs @@ -146,8 +146,6 @@ pprConvention Slow = text "<slow-convention>" pprConvention GC = text "<gc-convention>" pprConvention PrimOpCall = text "<primop-call-convention>" pprConvention PrimOpReturn = text "<primop-ret-convention>" -pprConvention (Foreign c) = ppr c -pprConvention (Private {}) = text "<private-convention>" pprForeignConvention :: ForeignConvention -> SDoc pprForeignConvention (ForeignConvention c as rs) = ppr c <> ppr as <> ppr rs diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs index 7503127555..119f2b7239 100644 --- a/compiler/cmm/PprCmmExpr.hs +++ b/compiler/cmm/PprCmmExpr.hs @@ -237,12 +237,8 @@ pprLocalReg (LocalReg uniq rep) -- Stack areas pprArea :: Area -> SDoc -pprArea (RegSlot r) = hcat [ text "slot<", ppr r, text ">" ] -pprArea (CallArea id) = pprAreaId id - -pprAreaId :: AreaId -> SDoc -pprAreaId Old = text "old" -pprAreaId (Young id) = hcat [ text "young<", ppr id, text ">" ] +pprArea Old = text "old" +pprArea (Young id) = hcat [ text "young<", ppr id, text ">" ] -- needs to be kept in syn with CmmExpr.hs.GlobalReg -- diff --git a/compiler/cmm/SMRep.lhs b/compiler/cmm/SMRep.lhs index ce30799bf6..8b3308ef97 100644 --- a/compiler/cmm/SMRep.lhs +++ b/compiler/cmm/SMRep.lhs @@ -21,6 +21,7 @@ module SMRep ( StgWord, StgHalfWord, hALF_WORD_SIZE, hALF_WORD_SIZE_IN_BITS, WordOff, ByteOff, + roundUpToWords, -- * Closure repesentation SMRep(..), -- CmmInfo sees the rep; no one else does @@ -57,6 +58,7 @@ import FastString import Data.Char( ord ) import Data.Word +import Data.Bits \end{code} @@ -69,6 +71,9 @@ import Data.Word \begin{code} type WordOff = Int -- Word offset, or word count type ByteOff = Int -- Byte offset, or byte count + +roundUpToWords :: ByteOff -> ByteOff +roundUpToWords n = (n + (wORD_SIZE - 1)) .&. (complement (wORD_SIZE - 1)) \end{code} StgWord is a type representing an StgWord on the target platform. @@ -93,6 +98,7 @@ hALF_WORD_SIZE_IN_BITS = 32 #endif \end{code} + %************************************************************************ %* * \subsubsection[SMRep-datatype]{@SMRep@---storage manager representation} diff --git a/compiler/cmm/cmm-notes b/compiler/cmm/cmm-notes index 81882c8c0e..0e6a2341f2 100644 --- a/compiler/cmm/cmm-notes +++ b/compiler/cmm/cmm-notes @@ -24,27 +24,10 @@ More notes (June 11) * Check in ClosureInfo:
-- NB: Results here should line up with the results of SMRep.rtsClosureType
-* Possible refactoring: Nuke AGraph in favour of
- mkIfThenElse :: Expr -> Graph -> Graph -> FCode Graph
- or even
- mkIfThenElse :: HasUniques m => Expr -> Graph -> Graph -> m Graph
- (Remmber that the .cmm file parser must use this function)
-
- or parameterise FCode over its envt; the CgState part seem useful for both
-
* "Remove redundant reloads" in CmmSpillReload should be redundant; since
insertLateReloads is now gone, every reload is reloading a live variable.
Test and nuke.
-* Stack layout is very like register assignment: find non-conflicting assigments.
- In particular we can use colouring or linear scan (etc).
-
- We'd fine-grain interference (on a word by word basis) to get maximum overlap.
- But that may make very big interference graphs. So linear scan might be
- more attactive.
-
- NB: linear scan does on-the-fly live range splitting.
-
* When stubbing dead slots be careful not to write into an area that
overlaps with an area that's in use. So stubbing needs to *follow*
stack layout.
@@ -81,10 +64,6 @@ Things to do: Old.Cmm. We should abstract it to work on both representations, it needs only to
convert a CmmInfoTable to [CmmStatic].
- - The MkGraph currenty uses a different semantics for <*> than Hoopl. Maybe
- we could convert codeGen/StgCmm* clients to the Hoopl's semantics?
- It's all deeply unsatisfactory.
-
- Improve performance of Hoopl.
A nofib comparison of -fasm vs -fnewcodegen nofib compilation parameters
@@ -101,18 +80,12 @@ Things to do: When compiling nofib, ghc-head + libraries compiled with -fnew-codegen
is 31.4% slower (http://fox.auryn.cz/msrc/0517_hoopl/32bit.oldghcoldgen.zipghcoldgen.txt).
- So we generate a bit better code, but it takes us longer!
-
EZY: Also importantly, Hoopl uses dramatically more memory than the
old code generator.
- Are all blockToNodeList and blockOfNodeList really needed? Maybe we could
splice blocks instead?
- In the CmmContFlowOpt.blockConcat, using Dataflow seems too clumsy. Still,
- a block catenation function would be probably nicer than blockToNodeList
- / blockOfNodeList combo.
-
- lowerSafeForeignCall seems too lowlevel. Just use Dataflow. After that
delete splitEntrySeq from HooplUtils.
@@ -129,10 +102,6 @@ Things to do: - NB that CmmProcPoint line 283 has a hack that works around a GADT-related
bug in 6.10.
- - SDM (2010-02-26) can we remove the Foreign constructor from Convention?
- Reason: we never generate code for a function with the Foreign
- calling convention, and the code for calling foreign calls is generated
-
- AsmCodeGen has a generic Cmm optimiser; move this into new pipeline
EZY (2011-04-16): The mini-inliner has been generalized and ported,
but the constant folding and other optimizations need to still be
@@ -161,16 +130,6 @@ Things to do: - Top-level SRT threading is a bit ugly
- - Add type/newtype for CmmModule = [CmmGroup] -- A module
- CmmGroup = [CmmTop] -- A .o file
- CmmTop = Proc | Data -- A procedure or data
-
- - This is a *change*: currently a CmmGroup is one function's-worth of code
- regardless of SplitObjs. Question: can we *always* generate M.o if there
- is just one element in the list (rather than M/M1.o, M/M2.o etc)
-
- One SRT per group.
-
- See "CAFs" below; we want to totally refactor the way SRTs are calculated
- Pull out Areas into its own module
|