diff options
author | Simon Marlow <marlowsd@gmail.com> | 2012-02-03 15:03:06 +0000 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2012-02-08 15:50:43 +0000 |
commit | 76999b605423f530ec17562d772eda1c1672db53 (patch) | |
tree | 3ca1208d02ca753ffe07e13fe1bdbc1388d7e9cf | |
parent | cd38928495f9186646273432151259f3d654b7e2 (diff) | |
download | haskell-76999b605423f530ec17562d772eda1c1672db53.tar.gz |
New stack layout algorithm
Also:
- improvements to code generation: push slow-call continuations
on the stack instead of generating explicit continuations
- remove unused CmmInfo wrapper type (replace with CmmInfoTable)
- squash Area and AreaId together, remove now-unused RegSlot
- comment out old unused stack-allocation code that no longer
compiles after removal of RegSlot
28 files changed, 934 insertions, 263 deletions
diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index e31a95ac7d..67e81ec268 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -111,7 +111,8 @@ type CmmBwdRewrite f = BwdRewrite FuelUniqSM CmmNode f -- Info Tables ----------------------------------------------------------------------------- -data CmmTopInfo = TopInfo {info_tbl :: CmmInfoTable, stack_info :: CmmStackInfo} +data CmmTopInfo = TopInfo { info_tbl :: CmmInfoTable + , stack_info :: CmmStackInfo } data CmmStackInfo = StackInfo { diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs index 4bc258e7de..0e2b26a221 100644 --- a/compiler/cmm/CmmBuildInfoTables.hs +++ b/compiler/cmm/CmmBuildInfoTables.hs @@ -18,7 +18,7 @@ module CmmBuildInfoTables , TopSRT, emptySRT, srtToData , bundleCAFs , lowerSafeForeignCalls - , cafTransfers, liveSlotTransfers + , cafTransfers , mkLiveness ) where @@ -98,7 +98,7 @@ foldSet = Set.foldr -- Also, don't forget to stop at the old end of the stack (oldByte), -- which may differ depending on whether there is an update frame. - +{- type RegSlotInfo = ( Int -- Offset from oldest byte of Old area , LocalReg -- The register @@ -172,15 +172,18 @@ live_ptrs oldByte slotEnv areaMap bid = slots :: SubAreaSet -- The SubAreaSet for 'bid' slots = expectJust "live_ptrs slots" $ mapLookup bid slotEnv youngByte = expectJust "live_ptrs bid_pos" $ Map.lookup (CallArea (Young bid)) areaMap +-} -- Construct the stack maps for a procedure _if_ it needs an infotable. -- When wouldn't a procedure need an infotable? If it is a procpoint that -- is not the successor of a call. +{- setInfoTableStackMap :: SlotEnv -> AreaMap -> CmmDecl -> CmmDecl setInfoTableStackMap slotEnv areaMap t@(CmmProc (TopInfo {stack_info=StackInfo {updfr_space = Just updfr_off}}) _ (CmmGraph {g_entry = eid})) = updInfo (const (live_ptrs updfr_off slotEnv areaMap eid)) id t +-} setInfoTableStackMap _ _ t = t @@ -500,8 +503,8 @@ lowerSafeForeignCall entry areaMap blocks bid m saveRetVals = foldl (<**>) mkNop $ map (M.mkMiddle . spill) rs spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r) regSlot r@(LocalReg _ _) = CmmRegOff (CmmGlobal Sp) (sp_off - offset) - where offset = w + expectJust "lowerForeign" (Map.lookup (RegSlot r) areaMap) - sp_off = wORD_SIZE + expectJust "lowerForeign" (Map.lookup (CallArea area) areaMap) + where offset = w + expectJust "lowerForeign" Nothing -- XXX need to fix this: (Map.lookup (RegSlot r) areaMap) + sp_off = wORD_SIZE + expectJust "lowerForeign" (Map.lookup area areaMap) area = if succ == entry then Old else Young succ w = widthInBytes $ typeWidth $ localRegType r -- Note: The successor must be a procpoint, and we have already split, diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index f686aa918b..36e7b8ec62 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -184,7 +184,7 @@ replaceLabels env g exp :: CmmExpr -> CmmExpr exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid)) - exp (CmmStackSlot (CallArea (Young id)) i) = CmmStackSlot (CallArea (Young (lookup id))) i + exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i exp e = e mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs index 8faf42b3bb..20795f7c82 100644 --- a/compiler/cmm/CmmCvt.hs +++ b/compiler/cmm/CmmCvt.hs @@ -19,7 +19,7 @@ import Outputable cmmOfZgraph :: CmmGroup -> Old.CmmGroup cmmOfZgraph tops = map mapTop tops - where mapTop (CmmProc h l g) = CmmProc (Old.CmmInfo Nothing Nothing (info_tbl h)) l (ofZgraph g) + where mapTop (CmmProc h l g) = CmmProc (info_tbl h) l (ofZgraph g) mapTop (CmmData s ds) = CmmData s ds data ValueDirection = Arguments | Results diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 5abbed3522..c22f8d5b76 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -18,8 +18,8 @@ module CmmExpr , RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet , regSetToList - , regUsedIn, regSlot - , Area(..), AreaId(..), SubArea, SubAreaSet, AreaMap, isStackSlotOf + , regUsedIn + , Area(..), SubArea, SubAreaSet, AreaMap , module CmmMachOp , module CmmType ) @@ -71,11 +71,6 @@ data CmmReg -- | A stack area is either the stack slot where a variable is spilled -- or the stack space where function arguments and results are passed. data Area - = RegSlot LocalReg - | CallArea AreaId - deriving (Eq, Ord) - -data AreaId = Old -- See Note [Old Area] | Young BlockId -- Invariant: must be a continuation BlockId -- See Note [Continuation BlockId] in CmmNode. @@ -287,17 +282,6 @@ reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es _ `regUsedIn` CmmStackSlot _ _ = False ----------------------------------------------------------------------------- --- Stack slots ------------------------------------------------------------------------------ - -isStackSlotOf :: CmmExpr -> LocalReg -> Bool -isStackSlotOf (CmmStackSlot (RegSlot r) _) r' = r == r' -isStackSlotOf _ _ = False - -regSlot :: LocalReg -> CmmExpr -regSlot r = CmmStackSlot (RegSlot r) (widthInBytes $ typeWidth $ localRegType r) - ------------------------------------------------------------------------------ -- Stack slot use information for expressions and other types [_$_] ----------------------------------------------------------------------------- diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs index 678d0add7c..7006c74ff7 100644 --- a/compiler/cmm/CmmInfo.hs +++ b/compiler/cmm/CmmInfo.hs @@ -88,7 +88,7 @@ mkInfoTable :: Platform -> CmmDecl -> UniqSM [RawCmmDecl] mkInfoTable _ (CmmData sec dat) = return [CmmData sec dat] -mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks) +mkInfoTable platform (CmmProc info entry_label blocks) | CmmNonInfoTable <- info -- Code without an info table. Easy. = return [CmmProc Nothing entry_label blocks] @@ -97,7 +97,8 @@ mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks) ; return (top_decls ++ mkInfoTableAndCode info_lbl info_cts entry_label blocks) } - | otherwise = panic "mkInfoTable" -- Patern match overlap check not clever enough + | otherwise = panic "mkInfoTable" + -- Patern match overlap check not clever enough ----------------------------------------------------- type InfoTableContents = ( [CmmLit] -- The standard part diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs new file mode 100644 index 0000000000..24c241ea88 --- /dev/null +++ b/compiler/cmm/CmmLayoutStack.hs @@ -0,0 +1,545 @@ +{-# LANGUAGE RecordWildCards, GADTs #-} +module CmmLayoutStack ( + cmmLayoutStack + ) where + +import Cmm +import BlockId +import CmmUtils +import CmmLive +import CmmProcPoint +import SMRep +import Hoopl +import OptimizationFuel +import Constants +import UniqSupply +import Maybes +import UniqFM +import Util + +import FastString +import Outputable +import Data.Map (Map) +import qualified Data.Map as Map +import qualified Data.Set as Set +import Control.Monad.Fix +import Data.Array as Array + +#include "HsVersions.h" + + +data StackSlot = Occupied | Empty + -- Occupied: a return address or part of an update frame + +instance Outputable StackSlot where + ppr Occupied = ptext (sLit "XXX") + ppr Empty = ptext (sLit "---") + +-- All stack locations are expressed as positive byte offsets from the +-- "base", which is defined to be the address above the return address +-- on the stack on entry to this CmmProc. +-- +-- | | <- base +-- |-----------| +-- | ret | <- base + 8 +-- |-----------| +-- . . +-- . . +-- +-- Lower addresses have higher StackLocs. +-- +type StackLoc = ByteOff + +data StackMap = StackMap + { sm_sp :: StackLoc + -- ^ the offset of Sp relative to the base on entry + -- to this block. + , sm_args :: ByteOff + , sm_ret_off :: ByteOff + -- ^ the number of bytes of arguments in the area for this block + -- Defn: the offset of young(L) relative to the base is given by + -- (sm_sp - sm_args) of the StackMap for block L. + , sm_regs :: UniqFM (LocalReg,StackLoc) + -- ^ regs on the stack + } + +instance Outputable StackMap where + ppr StackMap{..} = + text "Sp = " <> int sm_sp $$ + text "sm_args = " <> int sm_args $$ + text "sm_ret_off = " <> int sm_ret_off $$ + text "sm_regs = " <> ppr (eltsUFM sm_regs) + + +cmmLayoutStack :: ProcPointSet -> ByteOff -> CmmGraph -> FuelUniqSM CmmGraph +cmmLayoutStack procpoints entry_args + graph@(CmmGraph { g_entry = entry }) + = do + pprTrace "cmmLayoutStack" (ppr entry_args) $ return () + liveness <- cmmLiveness graph + pprTrace "liveness" (ppr liveness) $ return () + let blocks = postorderDfs graph + + (_rec_stackmaps, rec_high_sp, new_blocks) <- liftUniq $ + mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) -> + layout procpoints liveness entry entry_args + rec_stackmaps rec_high_sp blocks + + pprTrace ("Sp HWM") (ppr rec_high_sp) $ + return (ofBlockList entry new_blocks) + + + +layout :: BlockSet -- proc points + -> BlockEnv CmmLive -- liveness + -> BlockId -- entry + -> ByteOff -- stack args on entry + + -> BlockEnv StackMap -- [final] stack maps + -> ByteOff -- [final] Sp high water mark + + -> [CmmBlock] -- [in] blocks + + -> UniqSM + ( BlockEnv StackMap -- [out] stack maps + , ByteOff -- [out] Sp high water mark + , [CmmBlock] -- [out] new blocks + ) + +layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks + = go blocks init_stackmap entry_args [] + where + sp_high = final_hwm - entry_args + -- The stack check value is adjusted by the Sp offset on + -- entry to the proc, which is entry_args. We are + -- assuming that we only do a stack check at the + -- beginning of a proc, and we don't modify Sp before the + -- check. + + (updfr, cont_info) = collectContInfo blocks + + init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args + , sm_args = entry_args + , sm_ret_off = updfr + , sm_regs = emptyUFM + } + + go [] acc_stackmaps acc_hwm acc_blocks + = return (acc_stackmaps, acc_hwm, acc_blocks) + + go (b0 : bs) acc_stackmaps acc_hwm acc_blocks + = do + let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0 + + pprTrace "layout" (ppr entry_lbl <+> ppr acc_stackmaps) $ return () + + let stack0@StackMap { sm_sp = sp0 } + = mapFindWithDefault + (pprPanic "no stack map for" (ppr entry_lbl)) + entry_lbl acc_stackmaps + + -- update the stack map to include the effects of assignments + -- in this block + let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0 + + -- insert reloads if necessary + let middle1 = if entry_lbl `setMember` procpoints + then foldr blockCons middle0 (insertReloads stack0) + else middle0 + + (saves, out, sp_off, last1, fixup_blocks) + <- handleLastNode procpoints liveness cont_info + acc_stackmaps stack1 last0 + + let hwm' = maximum (acc_hwm : map sm_sp (mapElems out)) + middle2 = maybeAddSpAdj sp_off $ foldl blockSnoc middle1 saves + + fiddle_middle = mapExpDeep (areaToSp sp0 sp_high final_stackmaps) + fiddle_last = mapExpDeep (areaToSp (sp0 - sp_off) sp_high + final_stackmaps) + + stackmaps' = mapUnion acc_stackmaps out + newblock = blockJoin entry0 middle2 last1 + newblock' = blockMapNodes3 (id, fiddle_middle, fiddle_last) newblock + fixup_blocks' = map (blockMapNodes3 (id, fiddle_middle, id)) + fixup_blocks + + pprTrace "layout2" (ppr out) $ return () + + go bs stackmaps' hwm' (newblock' : fixup_blocks' ++ acc_blocks) + + +-- This doesn't seem right somehow. We need to find out whether this +-- proc will push some update frame material at some point, so that we +-- can avoid using that area of the stack for spilling. The +-- updfr_space field of the CmmProc *should* tell us, but it doesn't +-- (I think maybe it gets filled in later when we do proc-point +-- splitting). +-- +-- So we'll just take the max of all the cml_ret_offs. This could be +-- unnecessarily pessimistic, but probably not in the code we +-- generate. + +collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff) +collectContInfo blocks + = (maximum ret_offs, mapFromList (catMaybes mb_argss)) + where + (mb_argss, ret_offs) = mapAndUnzip get_cont blocks + + get_cont b = + case lastNode b of + CmmCall { cml_cont = Just l, .. } + -> (Just (l, cml_ret_args), cml_ret_off) + CmmForeignCall { .. } + -> (Just (succ, 0), updfr) -- ?? + _other -> (Nothing, 0) + + +maybeAddSpAdj :: ByteOff -> Block CmmNode O O -> Block CmmNode O O +maybeAddSpAdj 0 block = block +maybeAddSpAdj sp_off block + = block `blockSnoc` CmmAssign spReg (cmmOffset (CmmReg spReg) sp_off) + + +procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap +procMiddle stackmaps node sm + = case node of + CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) t) + -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) } + where loc = getStackLoc area off stackmaps + CmmAssign (CmmLocal r) _other + -> sm { sm_regs = delFromUFM (sm_regs sm) r } + _other + -> sm + +getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc +getStackLoc Old n _ = n +getStackLoc (Young l) n stackmaps = + case mapLookup l stackmaps of + Nothing -> pprPanic "getStackLoc" (ppr l) + Just sm -> sm_sp sm - sm_args sm + n + +-- ----------------------------------------------------------------------------- +-- Handling stack allocation for a last node + +handleLastNode + :: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff + -> BlockEnv StackMap -> StackMap + -> CmmNode O C + -> UniqSM + ( [CmmNode O O] -- assignments to save live variables + , BlockEnv StackMap -- stackmaps for the continuations + , ByteOff -- amount to adjust Sp before the jump + , CmmNode O C -- new last node + , [CmmBlock] -- new blocks + ) + +handleLastNode procpoints liveness cont_info stackmaps + stack0@StackMap { sm_sp = sp0 } last + = case last of + -- At each return / tail call, + -- adjust Sp to point to the last argument pushed, which + -- is cml_args, after popping any other junk from the stack. + CmmCall{ cml_cont = Nothing, .. } -> do + let sp_off = sp0 - cml_args + return ([], mapEmpty, sp_off, last, []) + + -- At each CmmCall with a continuation: + CmmCall{ cml_cont = Just cont_lbl, .. } + -- If we have already seen this continuation before, then + -- we just have to make the stack look the same: + | Just cont_stack <- mapLookup cont_lbl stackmaps + -> + return ( fixupStack stack0 cont_stack + , stackmaps + , sp0 - sm_sp cont_stack + , last + , [] ) + + -- a continuation we haven't seen before: + -- allocate the stack frame for it. + | otherwise -> do + + -- get the set of LocalRegs live in the continuation + let target_live = mapFindWithDefault Set.empty cont_lbl + liveness + + -- the stack from the base to cml_ret_off is off-limits. + -- our new stack frame contains: + -- * saved live variables + -- * the return address [young(C) + 8] + -- * the args for the call, + -- which are replaced by the return values at the return + -- point. + + -- everything up to cml_ret_off is off-limits: mark it Occupied + -- stack2 contains cml_ret_off, plus everything we need to save + (stack2, assigs) = allocate cml_ret_off target_live stack0 + + -- Sp is currently pointing to sp0, + -- we want it to point to (sm_sp stack2 + cml_args) + -- so the difference is sp0 - (sm_sp stack2 + cml_args) + sp_off = sp0 - (sm_sp stack2 + cml_args) + + -- And the Sp at the continuation is: + -- sm_sp stack2 + cml_ret_args + cont_stack = stack2{ sm_sp = sm_sp stack2 + cml_ret_args + , sm_args = cml_ret_args + , sm_ret_off = cml_ret_off + } + + -- emit the necessary assignments of LocalRegs to stack slots + -- emit an Sp adjustment, taking into account the call area + -- + return ( assigs + , mapSingleton cont_lbl cont_stack + , sp_off + , last + , [] -- no new blocks + ) + + CmmBranch{..} -> handleProcPoints + CmmCondBranch{..} -> handleProcPoints + CmmSwitch{..} -> handleProcPoints + + where + handleProcPoints :: UniqSM ( [CmmNode O O] + , BlockEnv StackMap + , ByteOff + , CmmNode O C + , [CmmBlock] ) + + handleProcPoints = do + pps <- mapM handleProcPoint (successors last) + let lbl_map :: LabelMap Label + lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ] + fix_lbl l = mapLookup l lbl_map `orElse` l + return ( [] + , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] + , 0 + , mapSuccessors fix_lbl last + , concat [ blk | (_,_,_,blk) <- pps ] ) + + -- For each proc point that is a successor of this block, we need to + -- (a) if the proc point already has a stackmap, we need to + -- shuffle the current stack to make it look the same. + -- We have to insert a new block to make this happen. + -- (b) otherwise, call "allocate live stack0" to make the + -- stack map for the proc point + handleProcPoint :: BlockId + -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock]) + handleProcPoint l + | not (l `setMember` procpoints) = return (l, l, stack0, []) + | otherwise = do + tmp <- getUniqueM + let tmp_lbl = mkBlockId tmp + (assigs, stack3) = case mapLookup l stackmaps of + Just pp_sm -> (fixupStack stack0 pp_sm, pp_sm) + Nothing -> pprTrace "first visit to proc point" (ppr l <+> ppr live $$ ppr stack1) $ (assigs, stack2) + where + live = mapFindWithDefault Set.empty l liveness + (stack1, assigs) = allocate (sm_ret_off stack0) live stack0 + cont_args = mapFindWithDefault 0 l cont_info + stack2 = stack1 { sm_sp = sm_sp stack1 + cont_args + , sm_args = cont_args + } + + sp_off = sp0 - sm_sp stack3 + + block = blockJoin + (CmmEntry tmp_lbl) + (maybeAddSpAdj sp_off (blockFromList assigs)) + (CmmBranch l) + -- + return (l, tmp_lbl, stack3, [block]) + + + passthrough :: BlockEnv StackMap + passthrough = mapFromList (zip (successors last) (repeat stack0)) + + +-- | create a sequence of assignments to establish the new StackMap, +-- given the old StackMap. +fixupStack :: StackMap -> StackMap -> [CmmNode O O] +fixupStack old_stack new_stack = concatMap move new_locs + where + old_map :: Map LocalReg ByteOff + old_map = Map.fromList (stackSlotRegs old_stack) + new_locs = stackSlotRegs new_stack + + move (r,n) + | Just m <- Map.lookup r old_map, n == m = [] + | otherwise = [CmmStore (CmmStackSlot Old n) + (CmmReg (CmmLocal r))] + +-- ----------------------------------------------------------------------------- +-- Updating references to CallAreas + +{- +After running layout, we need to update all the references to stack areas. + +Sp(L) is the Sp offset on entry to block L relative to the base of the +OLD area. + +SpArgs(L) is the size of the young area for L, i.e. the number of +arguments. + + - in block L, each reference to (OldArea[N]) turns into + [Sp + Sp(L) - N] + + - in block L, each reference to (Young(L')[N]) turns into + [Sp + Sp(L) - Sp(L') + SpArgs(L') - N] + + - be careful with the last node of each block: Sp has already been adjusted + to be Sp + Sp(L) - Sp(L') +-} + +areaToSp :: ByteOff -> ByteOff -> BlockEnv StackMap -> CmmExpr -> CmmExpr +areaToSp sp_old _sp_hwm stackmaps (CmmStackSlot area n) = + cmmOffset (CmmReg spReg) (sp_old - area_off - n) + where + area_off = case area of + Old -> 0 + Young l -> + case mapLookup l stackmaps of + Just sm -> sm_sp sm - sm_args sm + Nothing -> pprPanic "areaToSp(2)" (ppr l) +areaToSp _ sp_hwm _ (CmmLit CmmHighStackMark) = CmmLit (mkIntCLit sp_hwm) +areaToSp _ _ _ other = other + + +-- ----------------------------------------------------------------------------- +-- Saving live registers + +-- | Given a set of live registers and a StackMap, save all the registers +-- on the stack and return the new StackMap and the assignments to do +-- the saving. +-- +allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O]) +allocate ret_off live stackmap@StackMap{ sm_sp = sp0 + , sm_regs = regs0 } + = + pprTrace "allocate" (ppr live $$ ppr stackmap) $ + + -- we only have to save regs that are not already in a slot + let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live) + regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0 + in + + -- make a map of the stack + let stack = reverse $ Array.elems $ + accumArray (\_ x -> x) Empty (1, toWords (max sp0 ret_off)) $ + ret_words ++ live_words + where ret_words = + [ (x, Occupied) + | x <- [ 1 .. toWords ret_off] ] + live_words = + [ (toWords x, Occupied) + | (r,off) <- eltsUFM regs1, + let w = localRegBytes r, + x <- [ off, off-wORD_SIZE .. off - w + 1] ] + in + + -- Pass over the stack: find slots to save all the new live variables, + -- choosing the oldest slots first (hence a foldr). + let + save slot ([], stack, n, assigs, regs) -- no more regs to save + = ([], slot:stack, n `plusW` 1, assigs, regs) + save slot (to_save, stack, n, assigs, regs) + = case slot of + Occupied -> (to_save, Occupied:stack, n `plusW` 1, assigs, regs) + Empty + | Just (stack', r, to_save') <- + select_save to_save (slot:stack) + -> let assig = CmmStore (CmmStackSlot Old n') + (CmmReg (CmmLocal r)) + n' = n `plusW` 1 + in + (to_save', stack', n', assig : assigs, (r,(r,n')):regs) + + | otherwise + -> (to_save, slot:stack, n `plusW` 1, assigs, regs) + + -- we should do better here: right now we'll fit the smallest first, + -- but it would make more sense to fit the biggest first. + select_save :: [LocalReg] -> [StackSlot] + -> Maybe ([StackSlot], LocalReg, [LocalReg]) + select_save regs stack = go regs [] + where go [] no_fit = Nothing + go (r:rs) no_fit + | Just rest <- dropEmpty words stack + = Just (replicate words Occupied ++ rest, r, rs++no_fit) + | otherwise + = go rs (r:no_fit) + where words = localRegWords r + + -- fill in empty slots as much as possible + (still_to_save, save_stack, n, save_assigs, save_regs) + = foldr save (to_save, [], 0, [], []) stack + + -- push any remaining live vars on the stack + (push_sp, push_assigs, push_regs) + = foldr push (n, [], []) still_to_save + where + push r (n, assigs, regs) + = (n', assig : assigs, (r,(r,n')) : regs) + where + w = typeWidth (localRegType r) + n' = n + widthInBytes w + assig = CmmStore (CmmStackSlot Old n') + (CmmReg (CmmLocal r)) + + trim_sp + | not (null push_regs) = push_sp + | otherwise + = case break notEmpty save_stack of + (empties, rest) -> n `plusW` (- length empties) + + final_regs = regs1 `addListToUFM` push_regs + `addListToUFM` save_regs + + in + -- XXX should be an assert + if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else + + ( stackmap { sm_regs = final_regs , sm_sp = trim_sp } + , push_assigs ++ save_assigs ) + + +-- ----------------------------------------------------------------------------- + +plusW :: ByteOff -> WordOff -> ByteOff +plusW b w = b + w * wORD_SIZE + +dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot] +dropEmpty 0 ss = Just ss +dropEmpty n (Empty : ss) = dropEmpty (n-1) ss +dropEmpty n _ = Nothing + +pushEmpty :: ByteOff -> [StackSlot] -> [StackSlot] +pushEmpty n stack = replicate (toWords n) Empty ++ stack + +notEmpty :: StackSlot -> Bool +notEmpty Empty = False +notEmpty _ = True + +localRegBytes :: LocalReg -> ByteOff +localRegBytes r = widthInBytes (typeWidth (localRegType r)) + +localRegWords :: LocalReg -> WordOff +localRegWords = toWords . localRegBytes + +toWords :: ByteOff -> WordOff +toWords x = x `quot` wORD_SIZE + + +insertReloads :: StackMap -> [CmmNode O O] +insertReloads stackmap = + [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp) + (localRegType r)) + | (r,sp) <- stackSlotRegs stackmap + ] + + +stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)] +stackSlotRegs sm = eltsUFM (sm_regs sm) diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs index 9258d986ad..d5170b66b7 100644 --- a/compiler/cmm/CmmLive.hs +++ b/compiler/cmm/CmmLive.hs @@ -21,7 +21,6 @@ import PprCmmExpr () import Hoopl import Maybes import Outputable -import UniqSet ----------------------------------------------------------------------------- -- Calculating what variables are live on entry to a basic block @@ -77,11 +76,7 @@ xferLive = mkBTransfer3 fst mid lst mid :: CmmNode O O -> CmmLive -> CmmLive mid n f = gen_kill n f lst :: CmmNode O C -> FactBase CmmLive -> CmmLive - -- slightly inefficient: kill is unnecessary for emptyRegSet - lst n f = gen_kill n - $ case n of CmmCall{} -> emptyRegSet - CmmForeignCall{} -> emptyRegSet - _ -> joinOutFacts liveLattice n f + lst n f = gen_kill n $ joinOutFacts liveLattice n f ----------------------------------------------------------------------------- -- Removing assignments to dead variables diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs index 5a07bad7d7..971b351320 100644 --- a/compiler/cmm/CmmNode.hs +++ b/compiler/cmm/CmmNode.hs @@ -310,14 +310,14 @@ instance UserOfSlots ForeignTarget where instance DefinerOfSlots (CmmNode e x) where foldSlotsDefd f z n = case n of CmmStore (CmmStackSlot a i) expr -> f z (a, i, widthInBytes $ typeWidth $ cmmExprType expr) - CmmForeignCall {res=res} -> fold f z $ map foreign_call_slot res + -- CmmForeignCall {res=res} -> fold f z $ map foreign_call_slot res _ -> z where fold :: forall a b. DefinerOfSlots a => (b -> SubArea -> b) -> b -> a -> b fold f z n = foldSlotsDefd f z n - foreign_call_slot r = case widthInBytes $ typeWidth $ localRegType r of w -> (RegSlot r, w, w) + -- foreign_call_slot r = case widthInBytes $ typeWidth $ localRegType r of w -> (RegSlot r, w, w) ----------------------------------- -- mapping Expr in CmmNode diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index 240dab92d1..f3ac1ed1e7 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -230,35 +230,31 @@ lits :: { [ExtFCode CmmExpr] } cmmproc :: { ExtCode } -- TODO: add real SRT/info tables to parsed Cmm - : info maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}' - { do ((entry_ret_label, info, live, formals, gc_block, frame), stmts) <- + : info maybe_formals_without_hints '{' body '}' + { do ((entry_ret_label, info, live, formals), stmts) <- getCgStmtsEC' $ loopDecls $ do { (entry_ret_label, info, live) <- $1; formals <- sequence $2; - gc_block <- $3; - frame <- $4; - $6; - return (entry_ret_label, info, live, formals, gc_block, frame) } + $4; + return (entry_ret_label, info, live, formals) } blks <- code (cgStmtsToBlocks stmts) - code (emitInfoTableAndCode entry_ret_label (CmmInfo gc_block frame info) formals blks) } + code (emitInfoTableAndCode entry_ret_label info formals blks) } | info maybe_formals_without_hints ';' { do (entry_ret_label, info, live) <- $1; formals <- sequence $2; - code (emitInfoTableAndCode entry_ret_label (CmmInfo Nothing Nothing info) formals []) } + code (emitInfoTableAndCode entry_ret_label info formals []) } - | NAME maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}' + | NAME maybe_formals_without_hints '{' body '}' {% withThisPackage $ \pkg -> do newFunctionName $1 pkg - ((formals, gc_block, frame), stmts) <- + (formals, stmts) <- getCgStmtsEC' $ loopDecls $ do { formals <- sequence $2; - gc_block <- $3; - frame <- $4; - $6; - return (formals, gc_block, frame) } + $4; + return formals } blks <- code (cgStmtsToBlocks stmts) - code (emitProc (CmmInfo gc_block frame CmmNonInfoTable) (mkCmmCodeLabel pkg $1) formals blks) } + code (emitProc CmmNonInfoTable (mkCmmCodeLabel pkg $1) formals blks) } info :: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) } : 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')' @@ -599,18 +595,7 @@ formals_without_hints :: { [ExtFCode LocalReg] } formal_without_hint :: { ExtFCode LocalReg } : type NAME { newLocal $1 $2 } -maybe_frame :: { ExtFCode (Maybe UpdateFrame) } - : {- empty -} { return Nothing } - | 'jump' expr '(' exprs0 ')' { do { target <- $2; - args <- sequence $4; - return $ Just (UpdateFrame target args) } } - -maybe_gc_block :: { ExtFCode (Maybe BlockId) } - : {- empty -} { return Nothing } - | 'goto' NAME - { do l <- lookupLabel $2; return (Just l) } - -type :: { CmmType } +type :: { CmmType } : 'bits8' { b8 } | typenot8 { $1 } diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs index 8bccc9d103..023907f245 100644 --- a/compiler/cmm/CmmPipeline.hs +++ b/compiler/cmm/CmmPipeline.hs @@ -21,6 +21,7 @@ import CmmRewriteAssignments import CmmStackLayout import CmmContFlowOpt import OptimizationFuel +import CmmLayoutStack import DynFlags import ErrUtils @@ -110,40 +111,45 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) ----------- Proc points ------------------- let callPPs = {-# SCC "callProcPoints" #-} callProcPoints g procPoints <- {-# SCC "minimalProcPointSet" #-} run $ minimalProcPointSet (targetPlatform dflags) callPPs g - g <- {-# SCC "addProcPointProtocols" #-} run $ addProcPointProtocols callPPs procPoints g - dump Opt_D_dump_cmmz_proc "Post Proc Points Added" g - ----------- Spills and reloads ------------------- - g <- {-# SCC "dualLivenessWithInsertion" #-} run $ dualLivenessWithInsertion procPoints g - dump Opt_D_dump_cmmz_spills "Post spills and reloads" g - - ----------- Sink and inline assignments ------------------- - g <- {-# SCC "rewriteAssignments" #-} runOptimization $ rewriteAssignments platform g - dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g + g <- {-# SCC "layoutStack" #-} run $ cmmLayoutStack procPoints entry_off g + dump Opt_D_dump_cmmz_sp "Layout Stack" g + +-- g <- {-# SCC "addProcPointProtocols" #-} run $ addProcPointProtocols callPPs procPoints g +-- dump Opt_D_dump_cmmz_proc "Post Proc Points Added" g +-- +-- ----------- Spills and reloads ------------------- +-- g <- {-# SCC "dualLivenessWithInsertion" #-} run $ dualLivenessWithInsertion procPoints g +-- dump Opt_D_dump_cmmz_spills "Post spills and reloads" g +-- +-- ----------- Sink and inline assignments ------------------- +-- g <- {-# SCC "rewriteAssignments" #-} runOptimization $ rewriteAssignments platform g +-- dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g +-- ----------- Eliminate dead assignments ------------------- g <- {-# SCC "removeDeadAssignments" #-} runOptimization $ removeDeadAssignments g dump Opt_D_dump_cmmz_dead "Post remove dead assignments" g - ----------- Zero dead stack slots (Debug only) --------------- - -- Debugging: stubbing slots on death can cause crashes early - g <- if opt_StubDeadValues - then {-# SCC "stubSlotsOnDeath" #-} run $ stubSlotsOnDeath g - else return g - dump Opt_D_dump_cmmz_stub "Post stub dead stack slots" g - - --------------- Stack layout ---------------- - slotEnv <- {-# SCC "liveSlotAnal" #-} run $ liveSlotAnal g - let spEntryMap = getSpEntryMap entry_off g - mbpprTrace "live slot analysis results: " (ppr slotEnv) $ return () - let areaMap = {-# SCC "layout" #-} layout procPoints spEntryMap slotEnv entry_off g - mbpprTrace "areaMap" (ppr areaMap) $ return () - - ------------ Manifest the stack pointer -------- - g <- {-# SCC "manifestSP" #-} run $ manifestSP spEntryMap areaMap entry_off g - dump Opt_D_dump_cmmz_sp "Post manifestSP" g - -- UGH... manifestSP can require updates to the procPointMap. - -- We can probably do something quicker here for the update... +-- ----------- Zero dead stack slots (Debug only) --------------- +-- -- Debugging: stubbing slots on death can cause crashes early +-- g <- if opt_StubDeadValues +-- then {-# SCC "stubSlotsOnDeath" #-} run $ stubSlotsOnDeath g +-- else return g +-- dump Opt_D_dump_cmmz_stub "Post stub dead stack slots" g +-- +-- --------------- Stack layout ---------------- +-- slotEnv <- {-# SCC "liveSlotAnal" #-} run $ liveSlotAnal g +-- let spEntryMap = getSpEntryMap entry_off g +-- mbpprTrace "live slot analysis results: " (ppr slotEnv) $ return () +-- let areaMap = {-# SCC "layout" #-} layout procPoints spEntryMap slotEnv entry_off g +-- mbpprTrace "areaMap" (ppr areaMap) $ return () +-- +-- ------------ Manifest the stack pointer -------- +-- g <- {-# SCC "manifestSP" #-} run $ manifestSP spEntryMap areaMap entry_off g +-- dump Opt_D_dump_cmmz_sp "Post manifestSP" g +-- -- UGH... manifestSP can require updates to the procPointMap. +-- -- We can probably do something quicker here for the update... ------------- Split into separate procedures ------------ procPointMap <- {-# SCC "procPointAnalysis" #-} run $ procPointAnalysis procPoints g @@ -157,12 +163,12 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}}) let localCAFs = catMaybes $ map (localCAFInfo platform cafEnv) gs mbpprTrace "localCAFs" (pprPlatform platform localCAFs) $ return () - gs <- {-# SCC "lowerSafeForeignCalls" #-} run $ mapM (lowerSafeForeignCalls areaMap) gs - dumps Opt_D_dump_cmmz_lower "Post lowerSafeForeignCalls" gs +-- gs <- {-# SCC "lowerSafeForeignCalls" #-} run $ mapM (lowerSafeForeignCalls areaMap) gs +-- dumps Opt_D_dump_cmmz_lower "Post lowerSafeForeignCalls" gs -- NO MORE GRAPH TRANSFORMATION AFTER HERE -- JUST MAKING INFOTABLES - gs <- {-# SCC "setInfoTableStackMap" #-} return $ map (setInfoTableStackMap slotEnv areaMap) gs - dumps Opt_D_dump_cmmz_info "after setInfoTableStackMap" gs +-- gs <- {-# SCC "setInfoTableStackMap" #-} return $ map (setInfoTableStackMap slotEnv areaMap) gs +-- dumps Opt_D_dump_cmmz_info "after setInfoTableStackMap" gs ----------- Control-flow optimisations --------------- gs <- {-# SCC "cmmCfgOpts(2)" #-} return $ map cmmCfgOptsProc gs diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs index cd0a1f0ded..77ffb4df56 100644 --- a/compiler/cmm/CmmProcPoint.hs +++ b/compiler/cmm/CmmProcPoint.hs @@ -4,7 +4,7 @@ module CmmProcPoint ( ProcPointSet, Status(..) , callProcPoints, minimalProcPointSet - , addProcPointProtocols, splitAtProcPoints, procPointAnalysis + , splitAtProcPoints, procPointAnalysis ) where @@ -248,6 +248,8 @@ algorithm would be just as good, so that's what we do. -} +{- + data Protocol = Protocol Convention [CmmFormal] Area deriving Eq instance Outputable Protocol where @@ -371,6 +373,8 @@ add_CopyOuts protos procPoints g = foldGraphBlocks mb_copy_out (return mapEmpty) return $ (b, foldl (flip insertBlock) bmap bs) finish (b, bmap) = return $ insertBlock b bmap skip b bs = insertBlock b `liftM` bs +-} + -- At this point, we have found a set of procpoints, each of which should be -- the entry point of a procedure. diff --git a/compiler/cmm/CmmRewriteAssignments.hs b/compiler/cmm/CmmRewriteAssignments.hs index 45eb89f9fd..2c33b7b5ac 100644 --- a/compiler/cmm/CmmRewriteAssignments.hs +++ b/compiler/cmm/CmmRewriteAssignments.hs @@ -404,8 +404,8 @@ clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False -- ToDo: Also catch MachOp case clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _) | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?) -clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr - where f (CmmLoad (CmmStackSlot (CallArea a') o') t) +clobbers (CmmStackSlot a o, rhs) (_, expr) = f expr + where f (CmmLoad (CmmStackSlot a' o') t) = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t)) f (CmmLoad e _) = containsStackSlot e f (CmmMachOp _ es) = or (map f es) @@ -416,9 +416,6 @@ clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es) containsStackSlot (CmmStackSlot{}) = True containsStackSlot _ = False -clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr - where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l' - f _ = False clobbers _ (_, e) = f e where f (CmmLoad (CmmStackSlot _ _) _) = False f (CmmLoad{}) = True -- conservative @@ -432,7 +429,7 @@ clobbers _ (_, e) = f e -- [ I32 ] -- [ F64 ] -- s' -w'- o' -type CallSubArea = (AreaId, Int, Int) -- area, offset, width +type CallSubArea = (Area, Int, Int) -- area, offset, width overlaps :: CallSubArea -> CallSubArea -> Bool overlaps (a, _, _) (a', _, _) | a /= a' = False overlaps (_, o, w) (_, o', w') = @@ -457,7 +454,7 @@ invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap invalidateVolatile k m = mapUFM p m where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize where exp CmmLit{} = True - exp (CmmLoad (CmmStackSlot (CallArea (Young k')) _) _) + exp (CmmLoad (CmmStackSlot (Young k') _) _) | k' == k = False exp (CmmLoad (CmmStackSlot _ _) _) = True exp (CmmMachOp _ es) = and (map exp es) @@ -596,10 +593,6 @@ assignmentRewrite = mkFRewrite3 first middle last where rep = typeWidth (localRegType r) _ -> old -- See Note [Soundness of store rewriting] - inlineExp assign old@(CmmLoad (CmmStackSlot (RegSlot r) _) _) - = case lookupUFM assign r of - Just (AlwaysInline x) -> x - _ -> old inlineExp _ old = old inlinable :: CmmNode e x -> Bool diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs index fbe4db0333..35f0429a7f 100644 --- a/compiler/cmm/CmmSpillReload.hs +++ b/compiler/cmm/CmmSpillReload.hs @@ -12,6 +12,10 @@ module CmmSpillReload ) where +import Outputable +dualLivenessWithInsertion = panic "BANG BANG BANG BANG BANG BANG CLICK CLICK" + +{- import BlockId import Cmm import CmmUtils @@ -164,3 +168,4 @@ instance Outputable DualLive where else (ppr_regs "live in regs =" regs), if nullRegSet stack then PP.empty else (ppr_regs "live on stack =" stack)] +-} diff --git a/compiler/cmm/CmmStackLayout.hs b/compiler/cmm/CmmStackLayout.hs index dad684bf31..b75572d6c5 100644 --- a/compiler/cmm/CmmStackLayout.hs +++ b/compiler/cmm/CmmStackLayout.hs @@ -1,3 +1,8 @@ +module CmmStackLayout () where + +#if 0 + + {-# OPTIONS_GHC -XGADTs -XNoMonoLocalBinds #-} -- Norman likes local bindings -- If this module lives on I'd like to get rid of the -XNoMonoLocalBinds @@ -589,3 +594,4 @@ stubSlotsOnDeath g = liftM fst $ dataflowPassBwd g [] $ analRewBwd slotLattice (stackStubExpr (widthFromBytes w)) in case rst of Nothing -> Just (mkMiddle m <*> store) Just g -> Just (g <*> store) +#endif diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs index 0d75235a52..922f31e45a 100644 --- a/compiler/cmm/MkGraph.hs +++ b/compiler/cmm/MkGraph.hs @@ -3,14 +3,15 @@ module MkGraph ( CmmAGraph, CgStmt(..) , (<*>), catAGraphs - , mkLabel, mkMiddle, mkLast + , mkLabel, mkMiddle, mkLast, outOfLine , lgraphOfAGraph, labelAGraph , stackStubExpr - , mkNop, mkAssign, mkStore, mkUnsafeCall, mkFinalCall, lastWithArgs - , mkJump, mkDirectJump, mkForeignJump, mkJumpGC, mkCbranch, mkSwitch + , mkNop, mkAssign, mkStore, mkUnsafeCall, mkFinalCall, mkCallReturnsTo + , mkJump, mkDirectJump, mkForeignJump, mkForeignJumpExtra, mkJumpGC + , mkCbranch, mkSwitch , mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch - , copyInOflow, copyInSlot, copyOutOflow, copyOutSlot + , copyInOflow, copyOutOflow , toCall, Transfer(..) ) where @@ -136,6 +137,9 @@ mkMiddle middle = unitOL (CgStmt middle) mkLast :: CmmNode O C -> CmmAGraph mkLast last = unitOL (CgLast last) +-- | A labelled code block; should end in a last node +outOfLine :: BlockId -> CmmAGraph -> CmmAGraph +outOfLine l g = unitOL (CgFork l g) -- | allocate a fresh label for the entry point lgraphOfAGraph :: CmmAGraph -> UniqSM CmmGraph @@ -168,23 +172,30 @@ mkStore l r = mkMiddle $ CmmStore l r ---------- Control transfer mkJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph mkJump e actuals updfr_off = - lastWithArgs Jump old NativeNodeCall actuals updfr_off $ + lastWithArgs Jump Old NativeNodeCall actuals updfr_off $ toCall e Nothing updfr_off 0 mkDirectJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph mkDirectJump e actuals updfr_off = - lastWithArgs Jump old NativeDirectCall actuals updfr_off $ + lastWithArgs Jump Old NativeDirectCall actuals updfr_off $ toCall e Nothing updfr_off 0 mkJumpGC :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph mkJumpGC e actuals updfr_off = - lastWithArgs Jump old GC actuals updfr_off $ + lastWithArgs Jump Old GC actuals updfr_off $ toCall e Nothing updfr_off 0 mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph mkForeignJump conv e actuals updfr_off = - lastWithArgs Jump old conv actuals updfr_off $ + lastWithArgs Jump Old conv actuals updfr_off $ + toCall e Nothing updfr_off 0 + +mkForeignJumpExtra :: Convention -> CmmExpr -> [CmmActual] + -> UpdFrameOffset -> (ByteOff, [(CmmExpr, ByteOff)]) + -> CmmAGraph +mkForeignJumpExtra conv e actuals updfr_off extra_stack = + lastWithArgsAndExtraStack Jump Old conv actuals updfr_off extra_stack $ toCall e Nothing updfr_off 0 mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph @@ -195,15 +206,15 @@ mkSwitch e tbl = mkLast $ CmmSwitch e tbl mkReturn :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph mkReturn e actuals updfr_off = - lastWithArgs Ret old NativeReturn actuals updfr_off $ + lastWithArgs Ret Old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0 - -- where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord + -- where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord mkReturnSimple :: [CmmActual] -> UpdFrameOffset -> CmmAGraph mkReturnSimple actuals updfr_off = - lastWithArgs Ret old NativeReturn actuals updfr_off $ + lastWithArgs Ret Old NativeReturn actuals updfr_off $ toCall e Nothing updfr_off 0 - where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord + where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord mkBranch :: BlockId -> CmmAGraph mkBranch bid = mkLast (CmmBranch bid) @@ -211,9 +222,20 @@ mkBranch bid = mkLast (CmmBranch bid) mkFinalCall :: CmmExpr -> CCallConv -> [CmmActual] -> UpdFrameOffset -> CmmAGraph mkFinalCall f _ actuals updfr_off = - lastWithArgs Call old NativeDirectCall actuals updfr_off $ + lastWithArgs Call Old NativeDirectCall actuals updfr_off $ toCall f Nothing updfr_off 0 +mkCallReturnsTo :: CmmExpr -> Convention -> [CmmActual] + -> BlockId + -> ByteOff + -> UpdFrameOffset + -> (ByteOff, [(CmmExpr,ByteOff)]) + -> CmmAGraph +mkCallReturnsTo f callConv actuals ret_lbl ret_off updfr_off extra_stack = do + lastWithArgsAndExtraStack Call (Young ret_lbl) callConv actuals + updfr_off extra_stack $ + toCall f (Just ret_lbl) updfr_off ret_off + mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as @@ -238,12 +260,9 @@ stackStubExpr w = CmmLit (CmmInt 0 w) -- Therefore, for copying arguments and results, we provide different -- functions to pass the arguments in an overflow area and to pass them in spill slots. copyInOflow :: Convention -> Area -> [CmmFormal] -> (Int, CmmAGraph) -copyInSlot :: Convention -> [CmmFormal] -> [CmmNode O O] -copyOutSlot :: Convention -> [LocalReg] -> [CmmNode O O] copyInOflow conv area formals = (offset, catAGraphs $ map mkMiddle nodes) where (offset, nodes) = copyIn oneCopyOflowI conv area formals -copyInSlot c f = snd $ copyIn oneCopySlotI c (panic "no area for copying to slots") f type SlotCopier = Area -> (LocalReg, ByteOff) -> (ByteOff, [CmmNode O O]) -> (ByteOff, [CmmNode O O]) @@ -264,26 +283,20 @@ copyIn oflow conv area formals = adjust rst x@(_, RegisterParam _) = x : rst -- Copy-in one arg, using overflow space if needed. -oneCopyOflowI, oneCopySlotI :: SlotCopier +oneCopyOflowI :: SlotCopier oneCopyOflowI area (reg, off) (n, ms) = (max n off, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty) : ms) where ty = localRegType reg --- Copy-in one arg, using spill slots if needed -- used for calling conventions at --- a procpoint that is not a return point. The offset is irrelevant here... -oneCopySlotI _ (reg, _) (n, ms) = - (n, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg) w) ty) : ms) - where ty = localRegType reg - w = widthInBytes (typeWidth ty) - - -- Factoring out the common parts of the copyout functions yielded something -- more complicated: data Transfer = Call | Jump | Ret deriving Eq -copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] -> UpdFrameOffset -> - (Int, CmmAGraph) +copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] + -> UpdFrameOffset + -> (ByteOff, [(CmmExpr,ByteOff)]) -- extra stack stuff + -> (Int, CmmAGraph) -- Generate code to move the actual parameters into the locations -- required by the calling convention. This includes a store for the @@ -294,51 +307,61 @@ copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] -> UpdFrameOffset -- the info table for return and adjust the offsets of the other -- parameters. If this is a call instruction, we adjust the offsets -- of the other parameters. -copyOutOflow conv transfer area@(CallArea a) actuals updfr_off - = foldr co (init_offset, mkNop) args' +copyOutOflow conv transfer area actuals updfr_off + (extra_stack_off, extra_stack_stuff) + = foldr co (init_offset, mkNop) (args' ++ stack_params) where co (v, RegisterParam r) (n, ms) = (n, mkAssign (CmmGlobal r) v <*> ms) co (v, StackParam off) (n, ms) = (max n off, mkStore (CmmStackSlot area off) v <*> ms) + stack_params = [ (e, StackParam (off + init_offset)) + | (e,off) <- extra_stack_stuff ] + (setRA, init_offset) = - case a of Young id -> id `seq` -- Generate a store instruction for + case area of + Young id -> id `seq` -- Generate a store instruction for -- the return address if making a call if transfer == Call then ([(CmmLit (CmmBlock id), StackParam init_offset)], widthInBytes wordWidth) else ([], 0) - Old -> ([], updfr_off) + Old -> ([], updfr_off) + + arg_offset = init_offset + extra_stack_off args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it args = assignArgumentsPos conv cmmExprType actuals args' = foldl adjust setRA args - where adjust rst (v, StackParam off) = (v, StackParam (off + init_offset)) : rst + where adjust rst (v, StackParam off) = (v, StackParam (off + arg_offset)) : rst adjust rst x@(_, RegisterParam _) = x : rst -copyOutOflow _ _ (RegSlot _) _ _ = panic "cannot copy arguments into a register slot" --- Args passed only in registers and stack slots; no overflow space. --- No return address may apply! -copyOutSlot conv actuals = foldr co [] args - where co (v, RegisterParam r) ms = CmmAssign (CmmGlobal r) (toExp v) : ms - co (v, StackParam off) ms = CmmStore (CmmStackSlot (RegSlot v) off) (toExp v) : ms - toExp r = CmmReg (CmmLocal r) - args = assignArgumentsPos conv localRegType actuals mkCallEntry :: Convention -> [CmmFormal] -> (Int, CmmAGraph) -mkCallEntry conv formals = copyInOflow conv (CallArea Old) formals +mkCallEntry conv formals = copyInOflow conv Old formals -lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] -> UpdFrameOffset -> - (ByteOff -> CmmAGraph) -> CmmAGraph +lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] + -> UpdFrameOffset + -> (ByteOff -> CmmAGraph) + -> CmmAGraph lastWithArgs transfer area conv actuals updfr_off last = - let (outArgs, copies) = copyOutOflow conv transfer area actuals updfr_off in + let (outArgs, copies) = copyOutOflow conv transfer area actuals + updfr_off noExtraStack in + copies <*> last outArgs + +lastWithArgsAndExtraStack :: Transfer -> Area -> Convention -> [CmmActual] + -> UpdFrameOffset -> (ByteOff, [(CmmExpr,ByteOff)]) + -> (ByteOff -> CmmAGraph) + -> CmmAGraph +lastWithArgsAndExtraStack transfer area conv actuals updfr_off + extra_stack last = + let (outArgs, copies) = copyOutOflow conv transfer area actuals + updfr_off extra_stack in copies <*> last outArgs --- The area created for the jump and return arguments is the same area as the --- procedure entry. -old :: Area -old = CallArea Old +noExtraStack :: (ByteOff, [(CmmExpr,ByteOff)]) +noExtraStack = (0,[]) toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff -> CmmAGraph diff --git a/compiler/cmm/OldCmm.hs b/compiler/cmm/OldCmm.hs index a8a9d5dde0..b9d1b9d1a4 100644 --- a/compiler/cmm/OldCmm.hs +++ b/compiler/cmm/OldCmm.hs @@ -16,7 +16,7 @@ module OldCmm ( CmmGroup, GenCmmGroup, RawCmmGroup, CmmDecl, RawCmmDecl, ListGraph(..), - CmmInfo(..), UpdateFrame(..), CmmInfoTable(..), ClosureTypeInfo(..), + UpdateFrame(..), CmmInfoTable(..), ClosureTypeInfo(..), CmmStatic(..), CmmStatics(..), CmmFormal, CmmActual, cmmMapGraph, cmmTopMapGraph, GenBasicBlock(..), CmmBasicBlock, blockId, blockStmts, mapBlockStmts, @@ -53,13 +53,6 @@ import FastString -- Info Tables ----------------------------------------------------------------------------- -data CmmInfo - = CmmInfo - (Maybe BlockId) -- GC target. Nothing <=> CPS won't do stack check - -- JD: NOT USED BY NEW CODE GEN - (Maybe UpdateFrame) -- Update frame - CmmInfoTable -- Info table - -- | A frame that is to be pushed before entry to the function. -- Used to handle 'update' frames. data UpdateFrame = @@ -85,8 +78,8 @@ newtype ListGraph i = ListGraph [GenBasicBlock i] -- across a whole compilation unit. -- | Cmm with the info table as a data type -type CmmGroup = GenCmmGroup CmmStatics CmmInfo (ListGraph CmmStmt) -type CmmDecl = GenCmmDecl CmmStatics CmmInfo (ListGraph CmmStmt) +type CmmGroup = GenCmmGroup CmmStatics CmmInfoTable (ListGraph CmmStmt) +type CmmDecl = GenCmmDecl CmmStatics CmmInfoTable (ListGraph CmmStmt) -- | Cmm with the info tables converted to a list of 'CmmStatic' along with the info -- table label. If we are building without tables-next-to-code there will be no statics diff --git a/compiler/cmm/OldPprCmm.hs b/compiler/cmm/OldPprCmm.hs index 07dfbf63bf..6a8fab48e8 100644 --- a/compiler/cmm/OldPprCmm.hs +++ b/compiler/cmm/OldPprCmm.hs @@ -66,9 +66,6 @@ instance PlatformOutputable instr => PlatformOutputable (GenBasicBlock instr) wh instance PlatformOutputable CmmStmt where pprPlatform = pprStmt -instance PlatformOutputable CmmInfo where - pprPlatform = pprInfo - -- -------------------------------------------------------------------------- instance PlatformOutputable CmmSafety where @@ -76,22 +73,6 @@ instance PlatformOutputable CmmSafety where pprPlatform _ CmmInterruptible = ptext (sLit "_interruptible_call_") pprPlatform platform (CmmSafe srt) = pprPlatform platform srt --- -------------------------------------------------------------------------- --- Info tables. The current pretty printer needs refinement --- but will work for now. --- --- For ideas on how to refine it, they used to be printed in the --- style of C--'s 'stackdata' declaration, just inside the proc body, --- and were labelled with the procedure name ++ "_info". -pprInfo :: Platform -> CmmInfo -> SDoc -pprInfo platform (CmmInfo _gc_target update_frame info_table) = - vcat [{-ptext (sLit "gc_target: ") <> - maybe (ptext (sLit "<none>")) ppr gc_target,-} - ptext (sLit "update_frame: ") <> - maybe (ptext (sLit "<none>")) - (pprUpdateFrame platform) - update_frame, - pprPlatform platform info_table] -- -------------------------------------------------------------------------- -- Basic blocks look like assembly blocks. diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs index 81ce84c264..5c8a9cf5ce 100644 --- a/compiler/cmm/PprCmmExpr.hs +++ b/compiler/cmm/PprCmmExpr.hs @@ -248,12 +248,8 @@ pprLocalReg (LocalReg uniq rep) -- Stack areas pprArea :: Area -> SDoc -pprArea (RegSlot r) = hcat [ text "slot<", ppr r, text ">" ] -pprArea (CallArea id) = pprAreaId id - -pprAreaId :: AreaId -> SDoc -pprAreaId Old = text "old" -pprAreaId (Young id) = hcat [ text "young<", ppr id, text ">" ] +pprArea Old = text "old" +pprArea (Young id) = hcat [ text "young<", ppr id, text ">" ] -- needs to be kept in syn with CmmExpr.hs.GlobalReg -- diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs index 25ba154d12..ed5c5261d7 100644 --- a/compiler/codeGen/CgInfoTbls.hs +++ b/compiler/codeGen/CgInfoTbls.hs @@ -67,10 +67,9 @@ emitClosureCodeAndInfoTable cl_info args body -- Convert from 'ClosureInfo' to 'CmmInfo'. -- Not used for return points. (The 'smRepClosureTypeInt' call would panic.) -mkCmmInfo :: ClosureInfo -> FCode CmmInfo +mkCmmInfo :: ClosureInfo -> FCode CmmInfoTable mkCmmInfo cl_info - = return (CmmInfo gc_target Nothing $ - CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info, + = return (CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info, cit_rep = closureSMRep cl_info, cit_prof = prof, cit_srt = closureSRT cl_info }) @@ -80,14 +79,6 @@ mkCmmInfo cl_info ty_descr_w8 = stringToWord8s (closureTypeDescr cl_info) val_descr_w8 = stringToWord8s (closureValDescr cl_info) - -- The gc_target is to inform the CPS pass when it inserts a stack check. - -- Since that pass isn't used yet we'll punt for now. - -- When the CPS pass is fully integrated, this should - -- be replaced by the label that any heap check jumped to, - -- so that branch can be shared by both the heap (from codeGen) - -- and stack checks (from the CPS pass). - gc_target = panic "TODO: gc_target" - ------------------------------------------------------------------------- -- -- Generating the info table and code for a return point @@ -106,8 +97,7 @@ emitReturnTarget name stmts ; blks <- cgStmtsToBlocks stmts ; frame <- mkStackLayout ; let smrep = mkStackRep (mkLiveness frame) - info = CmmInfo gc_target Nothing info_tbl - info_tbl = CmmInfoTable { cit_lbl = info_lbl + info = CmmInfoTable { cit_lbl = info_lbl , cit_prof = NoProfilingInfo , cit_rep = smrep , cit_srt = srt_info } @@ -119,14 +109,6 @@ emitReturnTarget name stmts info_lbl = mkReturnInfoLabel uniq entry_lbl = mkReturnPtLabel uniq - -- The gc_target is to inform the CPS pass when it inserts a stack check. - -- Since that pass isn't used yet we'll punt for now. - -- When the CPS pass is fully integrated, this should - -- be replaced by the label that any heap check jumped to, - -- so that branch can be shared by both the heap (from codeGen) - -- and stack checks (from the CPS pass). - gc_target = panic "TODO: gc_target" - -- Build stack layout information from the state of the 'FCode' monad. -- Should go away once 'codeGen' starts using the CPS conversion -- pass to handle the stack. Until then, this is really just @@ -378,8 +360,8 @@ funInfoTable info_ptr emitInfoTableAndCode :: CLabel -- Label of entry or ret - -> CmmInfo -- ...the info table - -> [CmmFormal] -- ...args + -> CmmInfoTable -- ...the info table + -> [CmmFormal] -- ...args -> [CmmBasicBlock] -- ...and body -> Code diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs index 59f6accf9d..6e164ce9ee 100644 --- a/compiler/codeGen/CgMonad.lhs +++ b/compiler/codeGen/CgMonad.lhs @@ -728,7 +728,7 @@ emitDecl decl = do { state <- getState ; setState $ state { cgs_tops = cgs_tops state `snocOL` decl } } -emitProc :: CmmInfo -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code +emitProc :: CmmInfoTable -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code emitProc info lbl [] blocks = do { let proc_block = CmmProc info lbl (ListGraph blocks) ; state <- getState @@ -740,7 +740,7 @@ emitSimpleProc :: CLabel -> Code -> Code emitSimpleProc lbl code = do { stmts <- getCgStmts code ; blks <- cgStmtsToBlocks stmts - ; emitProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] blks } + ; emitProc CmmNonInfoTable lbl [] blks } -- Get all the CmmTops (there should be no stmts) -- Return a single Cmm which may be split from other Cmms by diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index 724f28d142..0222299ff2 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -596,7 +596,7 @@ pushUpdateFrame es body offset <- foldM push updfr es withUpdFrameOff offset body where push off e = - do emitStore (CmmStackSlot (CallArea Old) base) e + do emitStore (CmmStackSlot Old base) e return base where base = off + widthInBytes (cmmExprWidth e) diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index 3b12b2a4b7..fe41de83fa 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -532,16 +532,9 @@ cgTailCall fun_id fun_info args = do ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged? EnterIt -> ASSERT( null args ) -- Discarding arguments - do { let entry = entryCode (closureInfoPtr fun) - ; [ret,call] <- forkAlts [ - getCode $ - emitReturn [fun], -- Is tagged; no need to untag - getCode $ do -- Not tagged - emitCall (NativeNodeCall, NativeReturn) entry [fun] - ] - ; emit =<< mkCmmIfThenElse (cmmIsTagged fun) ret call } - - SlowCall -> do -- A slow function call via the RTS apply routines + emitEnter fun + + SlowCall -> do -- A slow function call via the RTS apply routines { tickySlowCall lf_info args ; emitComment $ mkFastString "slowCall" ; slowCall fun args } @@ -565,6 +558,66 @@ cgTailCall fun_id fun_info args = do node_points = nodeMustPointToIt lf_info +emitEnter :: CmmExpr -> FCode () +emitEnter fun = do + { adjustHpBackwards + ; sequel <- getSequel + ; updfr_off <- getUpdFrameOff + ; case sequel of + -- For a return, we have the option of generating a tag-test or + -- not. If the value is tagged, we can return directly, which + -- is quicker than entering the value. This is a code + -- size/speed trade-off: when optimising for speed rather than + -- size we could generate the tag test. + -- + -- Right now, we do what the old codegen did, and omit the tag + -- test, just generating an enter. + Return _ -> do + { let entry = entryCode $ closureInfoPtr $ CmmReg nodeReg + ; emit $ mkForeignJump NativeNodeCall entry + [cmmUntag fun] updfr_off + } + + -- The result will be scrutinised in the sequel. This is where + -- we generate a tag-test to avoid entering the closure if + -- possible. + -- + -- The generated code will be something like this: + -- + -- R1 = fun -- copyout + -- if (fun & 7 != 0) goto Lcall else goto Lret + -- Lcall: + -- call [fun] returns to Lret + -- Lret: + -- fun' = R1 -- copyin + -- ... + -- + -- Note in particular that the label Lret is used as a + -- destination by both the tag-test and the call. This is + -- becase Lret will necessarily be a proc-point, and we want to + -- ensure that we generate only one proc-point for this + -- sequence. + -- + AssignTo res_regs _ -> do + { lret <- newLabelC + ; lcall <- newLabelC + ; let area = Young lret + ; let (off, copyin) = copyInOflow NativeReturn area res_regs + (outArgs, copyout) = copyOutOflow NativeNodeCall Call area + [fun] updfr_off (0,[]) + ; let entry = entryCode (closureInfoPtr fun) + the_call = toCall entry (Just lret) updfr_off off outArgs + ; emit $ + copyout <*> + mkCbranch (cmmIsTagged fun) lret lcall <*> + outOfLine lcall the_call <*> + mkLabel lret <*> + copyin + } + } + + + {- Note [case on Bool] ~~~~~~~~~~~~~~~~~~~ A case on a Boolean value does two things: diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index f4be622092..3580481043 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -184,7 +184,7 @@ emitSaveThreadState :: BlockId -> FCode () emitSaveThreadState bid = do -- CurrentTSO->stackobj->sp = Sp; emitStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) stack_SP) - (CmmStackSlot (CallArea (Young bid)) (widthInBytes (typeWidth gcWord))) + (CmmStackSlot (Young bid) (widthInBytes (typeWidth gcWord))) emit closeNursery -- and save the current cost centre stack in the TSO when profiling: when opt_SccProfilingOn $ diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs index 2b0b6f895e..68d078fb28 100644 --- a/compiler/codeGen/StgCmmHeap.hs +++ b/compiler/codeGen/StgCmmHeap.hs @@ -418,8 +418,8 @@ altHeapCheck regs code gc_call sp = case rts_label regs of - Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp - Nothing -> mkCall generic_gc (GC, GC) [] [] sp + Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp (0,[]) + Nothing -> mkCall generic_gc (GC, GC) [] [] sp (0,[]) rts_label [reg] | isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1") diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs index 0299bc0f96..9ee9192794 100644 --- a/compiler/codeGen/StgCmmLayout.hs +++ b/compiler/codeGen/StgCmmLayout.hs @@ -15,7 +15,7 @@ module StgCmmLayout ( mkArgDescr, - emitCall, emitReturn, + emitCall, emitReturn, adjustHpBackwards, emitClosureProcAndInfoTable, emitClosureAndInfoTable, @@ -41,10 +41,12 @@ import StgCmmEnv import StgCmmTicky import StgCmmMonad import StgCmmUtils +import StgCmmProf import MkGraph import SMRep import Cmm +import CmmUtils import CLabel import StgSyn import Id @@ -53,6 +55,7 @@ import TyCon ( PrimRep(..) ) import BasicTypes ( Arity ) import DynFlags import StaticFlags +import Module import Constants import Util @@ -93,10 +96,31 @@ emitCall convs@(callConv, _) fun args ; updfr_off <- getUpdFrameOff ; emitComment $ mkFastString ("emitCall: " ++ show sequel) ; case sequel of - Return _ -> emit (mkForeignJump callConv fun args updfr_off) - AssignTo res_regs _ -> emit =<< mkCall fun convs res_regs args updfr_off + Return _ -> + emit $ mkForeignJump callConv fun args updfr_off + AssignTo res_regs _ -> + emit =<< mkCall fun convs res_regs args updfr_off (0,[]) } +emitCallWithExtraStack + :: (Convention, Convention) -> CmmExpr -> [CmmExpr] + -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode () +-- (cgCall fun args) makes a call to the entry-code of 'fun', +-- passing 'args', and returning the results to the current sequel +emitCallWithExtraStack convs@(callConv, _) fun args extra_stack + = do { adjustHpBackwards + ; sequel <- getSequel + ; updfr_off <- getUpdFrameOff + ; emitComment $ mkFastString ("emitCallWithExtraStack: " ++ show sequel) + ; case sequel of + Return _ -> + emit $ mkForeignJumpExtra callConv fun args updfr_off extra_stack + AssignTo res_regs _ -> do + emit =<< mkCall fun convs res_regs args updfr_off extra_stack + } + + + adjustHpBackwards :: FCode () -- This function adjusts and heap pointers just before a tail call or -- return. At a call or return, the virtual heap pointer may be less @@ -128,6 +152,19 @@ adjustHpBackwards -- Making calls: directCall and slowCall ------------------------------------------------------------------------- +-- General plan is: +-- - we'll make *one* fast call, either to the function itself +-- (directCall) or to stg_ap_<pat>_fast (slowCall) +-- Any left-over arguments will be pushed on the stack, +-- +-- e.g. Sp[old+8] = arg1 +-- Sp[old+16] = arg2 +-- Sp[old+32] = stg_ap_pp_info +-- R2 = arg3 +-- R3 = arg4 +-- call f() return to Nothing updfr_off: 32 + + directCall :: CLabel -> Arity -> [StgArg] -> FCode () -- (directCall f n args) -- calls f(arg1, ..., argn), and applies the result to the remaining args @@ -140,8 +177,19 @@ directCall lbl arity stg_args slowCall :: CmmExpr -> [StgArg] -> FCode () -- (slowCall fun args) applies fun to args, returning the results to Sequel slowCall fun stg_args - = do { cmm_args <- getNonVoidArgAmodes stg_args - ; slow_call fun cmm_args (argsReps stg_args) } + = do { dflags <- getDynFlags + ; cmm_args <- getNonVoidArgAmodes stg_args + ; let platform = targetPlatform dflags + ; call <- getCode $ direct_call "slow_call" + (mkRtsApFastLabel rts_fun) arity cmm_args reps + ; emitComment $ mkFastString ("slow_call for " ++ + showSDoc (pprPlatform platform fun) ++ + " with pat " ++ showSDoc (ftext rts_fun)) + ; emit (mkAssign nodeReg fun <*> call) + } + where + reps = argsReps stg_args + (rts_fun, arity) = slowCallPattern reps -------------- direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode () @@ -149,7 +197,7 @@ direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode () -- the args exclude the void ones -- NB2: 'arity' refers to the *reps* direct_call caller lbl arity args reps - | debugIsOn && arity > length reps -- Too few args + | debugIsOn && arity > length reps -- Too few args = do -- Caller should ensure that there enough args! dflags <- getDynFlags let platform = targetPlatform dflags @@ -157,33 +205,77 @@ direct_call caller lbl arity args reps <+> pprPlatform platform lbl <+> ppr (length reps) <+> pprPlatform platform args <+> ppr reps ) - | null rest_reps -- Precisely the right number of arguments + + | null rest_reps -- Precisely the right number of arguments = emitCall (NativeDirectCall, NativeReturn) target args - | otherwise -- Over-saturated call + | otherwise -- Note [over-saturated calls] = ASSERT( arity == length initial_reps ) - do { pap_id <- newTemp gcWord - ; withSequel (AssignTo [pap_id] True) - (emitCall (NativeDirectCall, NativeReturn) target fast_args) - ; slow_call (CmmReg (CmmLocal pap_id)) - rest_args rest_reps } + emitCallWithExtraStack (NativeDirectCall, NativeReturn) + target fast_args (mkStkOffsets stack_args) where target = CmmLit (CmmLabel lbl) (initial_reps, rest_reps) = splitAt arity reps arg_arity = count isNonV initial_reps (fast_args, rest_args) = splitAt arg_arity args - --------------- -slow_call :: CmmExpr -> [CmmExpr] -> [ArgRep] -> FCode () -slow_call fun args reps - = do dflags <- getDynFlags - let platform = targetPlatform dflags - call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity args reps - emitComment $ mkFastString ("slow_call for " ++ showSDoc (pprPlatform platform fun) ++ - " with pat " ++ showSDoc (ftext rts_fun)) - emit (mkAssign nodeReg fun <*> call) + stack_args = slowArgs (zip rest_reps rest_args) + + +{- +Note [over-saturated calls] + +The natural thing to do for an over-saturated call would be to call +the function with the correct number of arguments, and then apply the +remaining arguments to the value returned, e.g. + + f a b c d (where f has arity 2) + --> + r = call f(a,b) + call r(c,d) + +but this entails + - saving c and d on the stack + - making a continuation info table + - at the continuation, loading c and d off the stack into regs + - finally, call r + +Note that since there are a fixed number of different r's +(e.g. stg_ap_pp_fast), we can also pre-compile continuations +that correspond to each of them, rather than generating a fresh +one for each over-saturated call. + +Not only does this generate much less code, it is faster too. We will +generate something like: + +Sp[old+16] = c +Sp[old+24] = d +Sp[old+32] = stg_ap_pp_info +call f(a,b) -- usual calling convention + +For the purposes of the CmmCall node, we count this extra stack as +just more arguments that we are passing on the stack (cml_args). +-} + +-- | 'slowArgs' takes a list of function arguments and prepares them for +-- pushing on the stack for "extra" arguments to a function which requires +-- fewer arguments than we currently have. +slowArgs :: [(ArgRep,CmmExpr)] -> [(ArgRep,CmmExpr)] +slowArgs [] = [] +slowArgs amodes + | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest + | otherwise = this_pat ++ slowArgs rest where - (rts_fun, arity) = slowCallPattern reps + (arg_pat, args, rest) = matchSlowPattern amodes + stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat + this_pat = (N, mkLblExpr stg_ap_pat) : args + save_cccs = [(N, mkLblExpr save_cccs_lbl), (N, curCCS)] + save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs") + +matchSlowPattern :: [(ArgRep,CmmExpr)] + -> (FastString, [(ArgRep,CmmExpr)], [(ArgRep,CmmExpr)]) +matchSlowPattern amodes = (arg_pat, these, rest) + where (arg_pat, n) = slowCallPattern (map fst amodes) + (these, rest) = splitAt n amodes -- These cases were found to cover about 99% of all slow calls: slowCallPattern :: [ArgRep] -> (FastString, Arity) @@ -206,6 +298,30 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0) ------------------------------------------------------------------------- +-- Fix the byte-offsets of a bunch of things to push on the stack + +-- This is used for pushing slow-call continuations. +-- See Note [over-saturated calls]. + +mkStkOffsets + :: [(ArgRep,CmmExpr)] -- things to make offsets for + -> ( ByteOff -- OUTPUTS: Topmost allocated word + , [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out) +mkStkOffsets things + = loop 0 [] (reverse things) + where + loop offset offs [] = (offset,offs) + loop offset offs ((V,_):things) = loop offset offs things + -- ignore Void arguments + loop offset offs ((rep,thing):things) + = loop thing_off ((thing, thing_off):offs) things + where + thing_off = offset + argRepSizeW rep * wORD_SIZE + -- offset of thing is offset+size, because we're + -- growing the stack *downwards* as the offsets increase. + + +------------------------------------------------------------------------- -- Classifying arguments: ArgRep ------------------------------------------------------------------------- diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index 6c5ab4c692..c64df7ecc5 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -611,7 +611,7 @@ emitLabel :: BlockId -> FCode () emitLabel id = emitCgStmt (CgLabel id) emitComment :: FastString -> FCode () -#ifdef DEBUG +#if 0 /* def DEBUG */ emitComment s = emitCgStmt (CgStmt (CmmComment s)) #else emitComment s = return () @@ -688,20 +688,18 @@ mkCmmIfThen e tbranch = do mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual] - -> UpdFrameOffset -> FCode CmmAGraph -mkCall f (callConv, retConv) results actuals updfr_off = do + -> UpdFrameOffset -> (ByteOff,[(CmmExpr,ByteOff)]) -> FCode CmmAGraph +mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do k <- newLabelC - let area = CallArea $ Young k + let area = Young k (off, copyin) = copyInOflow retConv area results - copyout = lastWithArgs Call area callConv actuals updfr_off - (toCall f (Just k) updfr_off off) + copyout = mkCallReturnsTo f callConv actuals k off updfr_off extra_stack return (copyout <*> mkLabel k <*> copyin) - mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset -> FCode CmmAGraph -mkCmmCall f results actuals - = mkCall f (NativeDirectCall, NativeReturn) results actuals +mkCmmCall f results actuals updfr_off + = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off (0,[]) mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] @@ -710,7 +708,7 @@ mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] mkSafeCall t fs as upd i = do k <- newLabelC return - ( mkStore (CmmStackSlot (CallArea (Young k)) (widthInBytes wordWidth)) + ( mkStore (CmmStackSlot (Young k) (widthInBytes wordWidth)) (CmmLit (CmmBlock k)) <*> mkLast (CmmForeignCall {tgt=t, res=fs, args=as, succ=k, updfr=upd, intrbl=i}) <*> mkLabel k) diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index b0333be379..bb50fffa12 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -94,7 +94,7 @@ Library ForeignFunctionInterface, EmptyDataDecls, TypeSynonymInstances, MultiParamTypeClasses, FlexibleInstances, Rank2Types, ScopedTypeVariables, - DeriveDataTypeable, BangPatterns + DeriveDataTypeable, BangPatterns, GADTs if impl(ghc >= 7.1) Extensions: NondecreasingIndentation @@ -203,6 +203,7 @@ Library CmmStackLayout CmmType CmmUtils + CmmLayoutStack MkGraph OldCmm OldCmmLint |