summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-02-03 15:03:06 +0000
committerSimon Marlow <marlowsd@gmail.com>2012-02-08 15:50:43 +0000
commit76999b605423f530ec17562d772eda1c1672db53 (patch)
tree3ca1208d02ca753ffe07e13fe1bdbc1388d7e9cf
parentcd38928495f9186646273432151259f3d654b7e2 (diff)
downloadhaskell-76999b605423f530ec17562d772eda1c1672db53.tar.gz
New stack layout algorithm
Also: - improvements to code generation: push slow-call continuations on the stack instead of generating explicit continuations - remove unused CmmInfo wrapper type (replace with CmmInfoTable) - squash Area and AreaId together, remove now-unused RegSlot - comment out old unused stack-allocation code that no longer compiles after removal of RegSlot
-rw-r--r--compiler/cmm/Cmm.hs3
-rw-r--r--compiler/cmm/CmmBuildInfoTables.hs11
-rw-r--r--compiler/cmm/CmmContFlowOpt.hs2
-rw-r--r--compiler/cmm/CmmCvt.hs2
-rw-r--r--compiler/cmm/CmmExpr.hs20
-rw-r--r--compiler/cmm/CmmInfo.hs5
-rw-r--r--compiler/cmm/CmmLayoutStack.hs545
-rw-r--r--compiler/cmm/CmmLive.hs7
-rw-r--r--compiler/cmm/CmmNode.hs4
-rw-r--r--compiler/cmm/CmmParse.y39
-rw-r--r--compiler/cmm/CmmPipeline.hs70
-rw-r--r--compiler/cmm/CmmProcPoint.hs6
-rw-r--r--compiler/cmm/CmmRewriteAssignments.hs15
-rw-r--r--compiler/cmm/CmmSpillReload.hs5
-rw-r--r--compiler/cmm/CmmStackLayout.hs6
-rw-r--r--compiler/cmm/MkGraph.hs119
-rw-r--r--compiler/cmm/OldCmm.hs13
-rw-r--r--compiler/cmm/OldPprCmm.hs19
-rw-r--r--compiler/cmm/PprCmmExpr.hs8
-rw-r--r--compiler/codeGen/CgInfoTbls.hs28
-rw-r--r--compiler/codeGen/CgMonad.lhs4
-rw-r--r--compiler/codeGen/StgCmmBind.hs2
-rw-r--r--compiler/codeGen/StgCmmExpr.hs73
-rw-r--r--compiler/codeGen/StgCmmForeign.hs2
-rw-r--r--compiler/codeGen/StgCmmHeap.hs4
-rw-r--r--compiler/codeGen/StgCmmLayout.hs164
-rw-r--r--compiler/codeGen/StgCmmMonad.hs18
-rw-r--r--compiler/ghc.cabal.in3
28 files changed, 934 insertions, 263 deletions
diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs
index e31a95ac7d..67e81ec268 100644
--- a/compiler/cmm/Cmm.hs
+++ b/compiler/cmm/Cmm.hs
@@ -111,7 +111,8 @@ type CmmBwdRewrite f = BwdRewrite FuelUniqSM CmmNode f
-- Info Tables
-----------------------------------------------------------------------------
-data CmmTopInfo = TopInfo {info_tbl :: CmmInfoTable, stack_info :: CmmStackInfo}
+data CmmTopInfo = TopInfo { info_tbl :: CmmInfoTable
+ , stack_info :: CmmStackInfo }
data CmmStackInfo
= StackInfo {
diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs
index 4bc258e7de..0e2b26a221 100644
--- a/compiler/cmm/CmmBuildInfoTables.hs
+++ b/compiler/cmm/CmmBuildInfoTables.hs
@@ -18,7 +18,7 @@ module CmmBuildInfoTables
, TopSRT, emptySRT, srtToData
, bundleCAFs
, lowerSafeForeignCalls
- , cafTransfers, liveSlotTransfers
+ , cafTransfers
, mkLiveness )
where
@@ -98,7 +98,7 @@ foldSet = Set.foldr
-- Also, don't forget to stop at the old end of the stack (oldByte),
-- which may differ depending on whether there is an update frame.
-
+{-
type RegSlotInfo
= ( Int -- Offset from oldest byte of Old area
, LocalReg -- The register
@@ -172,15 +172,18 @@ live_ptrs oldByte slotEnv areaMap bid =
slots :: SubAreaSet -- The SubAreaSet for 'bid'
slots = expectJust "live_ptrs slots" $ mapLookup bid slotEnv
youngByte = expectJust "live_ptrs bid_pos" $ Map.lookup (CallArea (Young bid)) areaMap
+-}
-- Construct the stack maps for a procedure _if_ it needs an infotable.
-- When wouldn't a procedure need an infotable? If it is a procpoint that
-- is not the successor of a call.
+{-
setInfoTableStackMap :: SlotEnv -> AreaMap -> CmmDecl -> CmmDecl
setInfoTableStackMap slotEnv areaMap
t@(CmmProc (TopInfo {stack_info=StackInfo {updfr_space = Just updfr_off}}) _
(CmmGraph {g_entry = eid}))
= updInfo (const (live_ptrs updfr_off slotEnv areaMap eid)) id t
+-}
setInfoTableStackMap _ _ t = t
@@ -500,8 +503,8 @@ lowerSafeForeignCall entry areaMap blocks bid m
saveRetVals = foldl (<**>) mkNop $ map (M.mkMiddle . spill) rs
spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r)
regSlot r@(LocalReg _ _) = CmmRegOff (CmmGlobal Sp) (sp_off - offset)
- where offset = w + expectJust "lowerForeign" (Map.lookup (RegSlot r) areaMap)
- sp_off = wORD_SIZE + expectJust "lowerForeign" (Map.lookup (CallArea area) areaMap)
+ where offset = w + expectJust "lowerForeign" Nothing -- XXX need to fix this: (Map.lookup (RegSlot r) areaMap)
+ sp_off = wORD_SIZE + expectJust "lowerForeign" (Map.lookup area areaMap)
area = if succ == entry then Old else Young succ
w = widthInBytes $ typeWidth $ localRegType r
-- Note: The successor must be a procpoint, and we have already split,
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs
index f686aa918b..36e7b8ec62 100644
--- a/compiler/cmm/CmmContFlowOpt.hs
+++ b/compiler/cmm/CmmContFlowOpt.hs
@@ -184,7 +184,7 @@ replaceLabels env g
exp :: CmmExpr -> CmmExpr
exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
- exp (CmmStackSlot (CallArea (Young id)) i) = CmmStackSlot (CallArea (Young (lookup id))) i
+ exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
exp e = e
mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C
diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs
index 8faf42b3bb..20795f7c82 100644
--- a/compiler/cmm/CmmCvt.hs
+++ b/compiler/cmm/CmmCvt.hs
@@ -19,7 +19,7 @@ import Outputable
cmmOfZgraph :: CmmGroup -> Old.CmmGroup
cmmOfZgraph tops = map mapTop tops
- where mapTop (CmmProc h l g) = CmmProc (Old.CmmInfo Nothing Nothing (info_tbl h)) l (ofZgraph g)
+ where mapTop (CmmProc h l g) = CmmProc (info_tbl h) l (ofZgraph g)
mapTop (CmmData s ds) = CmmData s ds
data ValueDirection = Arguments | Results
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index 5abbed3522..c22f8d5b76 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -18,8 +18,8 @@ module CmmExpr
, RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
, plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet
, regSetToList
- , regUsedIn, regSlot
- , Area(..), AreaId(..), SubArea, SubAreaSet, AreaMap, isStackSlotOf
+ , regUsedIn
+ , Area(..), SubArea, SubAreaSet, AreaMap
, module CmmMachOp
, module CmmType
)
@@ -71,11 +71,6 @@ data CmmReg
-- | A stack area is either the stack slot where a variable is spilled
-- or the stack space where function arguments and results are passed.
data Area
- = RegSlot LocalReg
- | CallArea AreaId
- deriving (Eq, Ord)
-
-data AreaId
= Old -- See Note [Old Area]
| Young BlockId -- Invariant: must be a continuation BlockId
-- See Note [Continuation BlockId] in CmmNode.
@@ -287,17 +282,6 @@ reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es
_ `regUsedIn` CmmStackSlot _ _ = False
-----------------------------------------------------------------------------
--- Stack slots
------------------------------------------------------------------------------
-
-isStackSlotOf :: CmmExpr -> LocalReg -> Bool
-isStackSlotOf (CmmStackSlot (RegSlot r) _) r' = r == r'
-isStackSlotOf _ _ = False
-
-regSlot :: LocalReg -> CmmExpr
-regSlot r = CmmStackSlot (RegSlot r) (widthInBytes $ typeWidth $ localRegType r)
-
------------------------------------------------------------------------------
-- Stack slot use information for expressions and other types [_$_]
-----------------------------------------------------------------------------
diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs
index 678d0add7c..7006c74ff7 100644
--- a/compiler/cmm/CmmInfo.hs
+++ b/compiler/cmm/CmmInfo.hs
@@ -88,7 +88,7 @@ mkInfoTable :: Platform -> CmmDecl -> UniqSM [RawCmmDecl]
mkInfoTable _ (CmmData sec dat)
= return [CmmData sec dat]
-mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks)
+mkInfoTable platform (CmmProc info entry_label blocks)
| CmmNonInfoTable <- info -- Code without an info table. Easy.
= return [CmmProc Nothing entry_label blocks]
@@ -97,7 +97,8 @@ mkInfoTable platform (CmmProc (CmmInfo _ _ info) entry_label blocks)
; return (top_decls ++
mkInfoTableAndCode info_lbl info_cts
entry_label blocks) }
- | otherwise = panic "mkInfoTable" -- Patern match overlap check not clever enough
+ | otherwise = panic "mkInfoTable"
+ -- Patern match overlap check not clever enough
-----------------------------------------------------
type InfoTableContents = ( [CmmLit] -- The standard part
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
new file mode 100644
index 0000000000..24c241ea88
--- /dev/null
+++ b/compiler/cmm/CmmLayoutStack.hs
@@ -0,0 +1,545 @@
+{-# LANGUAGE RecordWildCards, GADTs #-}
+module CmmLayoutStack (
+ cmmLayoutStack
+ ) where
+
+import Cmm
+import BlockId
+import CmmUtils
+import CmmLive
+import CmmProcPoint
+import SMRep
+import Hoopl
+import OptimizationFuel
+import Constants
+import UniqSupply
+import Maybes
+import UniqFM
+import Util
+
+import FastString
+import Outputable
+import Data.Map (Map)
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import Control.Monad.Fix
+import Data.Array as Array
+
+#include "HsVersions.h"
+
+
+data StackSlot = Occupied | Empty
+ -- Occupied: a return address or part of an update frame
+
+instance Outputable StackSlot where
+ ppr Occupied = ptext (sLit "XXX")
+ ppr Empty = ptext (sLit "---")
+
+-- All stack locations are expressed as positive byte offsets from the
+-- "base", which is defined to be the address above the return address
+-- on the stack on entry to this CmmProc.
+--
+-- | | <- base
+-- |-----------|
+-- | ret | <- base + 8
+-- |-----------|
+-- . .
+-- . .
+--
+-- Lower addresses have higher StackLocs.
+--
+type StackLoc = ByteOff
+
+data StackMap = StackMap
+ { sm_sp :: StackLoc
+ -- ^ the offset of Sp relative to the base on entry
+ -- to this block.
+ , sm_args :: ByteOff
+ , sm_ret_off :: ByteOff
+ -- ^ the number of bytes of arguments in the area for this block
+ -- Defn: the offset of young(L) relative to the base is given by
+ -- (sm_sp - sm_args) of the StackMap for block L.
+ , sm_regs :: UniqFM (LocalReg,StackLoc)
+ -- ^ regs on the stack
+ }
+
+instance Outputable StackMap where
+ ppr StackMap{..} =
+ text "Sp = " <> int sm_sp $$
+ text "sm_args = " <> int sm_args $$
+ text "sm_ret_off = " <> int sm_ret_off $$
+ text "sm_regs = " <> ppr (eltsUFM sm_regs)
+
+
+cmmLayoutStack :: ProcPointSet -> ByteOff -> CmmGraph -> FuelUniqSM CmmGraph
+cmmLayoutStack procpoints entry_args
+ graph@(CmmGraph { g_entry = entry })
+ = do
+ pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
+ liveness <- cmmLiveness graph
+ pprTrace "liveness" (ppr liveness) $ return ()
+ let blocks = postorderDfs graph
+
+ (_rec_stackmaps, rec_high_sp, new_blocks) <- liftUniq $
+ mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
+ layout procpoints liveness entry entry_args
+ rec_stackmaps rec_high_sp blocks
+
+ pprTrace ("Sp HWM") (ppr rec_high_sp) $
+ return (ofBlockList entry new_blocks)
+
+
+
+layout :: BlockSet -- proc points
+ -> BlockEnv CmmLive -- liveness
+ -> BlockId -- entry
+ -> ByteOff -- stack args on entry
+
+ -> BlockEnv StackMap -- [final] stack maps
+ -> ByteOff -- [final] Sp high water mark
+
+ -> [CmmBlock] -- [in] blocks
+
+ -> UniqSM
+ ( BlockEnv StackMap -- [out] stack maps
+ , ByteOff -- [out] Sp high water mark
+ , [CmmBlock] -- [out] new blocks
+ )
+
+layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks
+ = go blocks init_stackmap entry_args []
+ where
+ sp_high = final_hwm - entry_args
+ -- The stack check value is adjusted by the Sp offset on
+ -- entry to the proc, which is entry_args. We are
+ -- assuming that we only do a stack check at the
+ -- beginning of a proc, and we don't modify Sp before the
+ -- check.
+
+ (updfr, cont_info) = collectContInfo blocks
+
+ init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args
+ , sm_args = entry_args
+ , sm_ret_off = updfr
+ , sm_regs = emptyUFM
+ }
+
+ go [] acc_stackmaps acc_hwm acc_blocks
+ = return (acc_stackmaps, acc_hwm, acc_blocks)
+
+ go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
+ = do
+ let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0
+
+ pprTrace "layout" (ppr entry_lbl <+> ppr acc_stackmaps) $ return ()
+
+ let stack0@StackMap { sm_sp = sp0 }
+ = mapFindWithDefault
+ (pprPanic "no stack map for" (ppr entry_lbl))
+ entry_lbl acc_stackmaps
+
+ -- update the stack map to include the effects of assignments
+ -- in this block
+ let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
+
+ -- insert reloads if necessary
+ let middle1 = if entry_lbl `setMember` procpoints
+ then foldr blockCons middle0 (insertReloads stack0)
+ else middle0
+
+ (saves, out, sp_off, last1, fixup_blocks)
+ <- handleLastNode procpoints liveness cont_info
+ acc_stackmaps stack1 last0
+
+ let hwm' = maximum (acc_hwm : map sm_sp (mapElems out))
+ middle2 = maybeAddSpAdj sp_off $ foldl blockSnoc middle1 saves
+
+ fiddle_middle = mapExpDeep (areaToSp sp0 sp_high final_stackmaps)
+ fiddle_last = mapExpDeep (areaToSp (sp0 - sp_off) sp_high
+ final_stackmaps)
+
+ stackmaps' = mapUnion acc_stackmaps out
+ newblock = blockJoin entry0 middle2 last1
+ newblock' = blockMapNodes3 (id, fiddle_middle, fiddle_last) newblock
+ fixup_blocks' = map (blockMapNodes3 (id, fiddle_middle, id))
+ fixup_blocks
+
+ pprTrace "layout2" (ppr out) $ return ()
+
+ go bs stackmaps' hwm' (newblock' : fixup_blocks' ++ acc_blocks)
+
+
+-- This doesn't seem right somehow. We need to find out whether this
+-- proc will push some update frame material at some point, so that we
+-- can avoid using that area of the stack for spilling. The
+-- updfr_space field of the CmmProc *should* tell us, but it doesn't
+-- (I think maybe it gets filled in later when we do proc-point
+-- splitting).
+--
+-- So we'll just take the max of all the cml_ret_offs. This could be
+-- unnecessarily pessimistic, but probably not in the code we
+-- generate.
+
+collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff)
+collectContInfo blocks
+ = (maximum ret_offs, mapFromList (catMaybes mb_argss))
+ where
+ (mb_argss, ret_offs) = mapAndUnzip get_cont blocks
+
+ get_cont b =
+ case lastNode b of
+ CmmCall { cml_cont = Just l, .. }
+ -> (Just (l, cml_ret_args), cml_ret_off)
+ CmmForeignCall { .. }
+ -> (Just (succ, 0), updfr) -- ??
+ _other -> (Nothing, 0)
+
+
+maybeAddSpAdj :: ByteOff -> Block CmmNode O O -> Block CmmNode O O
+maybeAddSpAdj 0 block = block
+maybeAddSpAdj sp_off block
+ = block `blockSnoc` CmmAssign spReg (cmmOffset (CmmReg spReg) sp_off)
+
+
+procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap
+procMiddle stackmaps node sm
+ = case node of
+ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) t)
+ -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
+ where loc = getStackLoc area off stackmaps
+ CmmAssign (CmmLocal r) _other
+ -> sm { sm_regs = delFromUFM (sm_regs sm) r }
+ _other
+ -> sm
+
+getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc
+getStackLoc Old n _ = n
+getStackLoc (Young l) n stackmaps =
+ case mapLookup l stackmaps of
+ Nothing -> pprPanic "getStackLoc" (ppr l)
+ Just sm -> sm_sp sm - sm_args sm + n
+
+-- -----------------------------------------------------------------------------
+-- Handling stack allocation for a last node
+
+handleLastNode
+ :: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff
+ -> BlockEnv StackMap -> StackMap
+ -> CmmNode O C
+ -> UniqSM
+ ( [CmmNode O O] -- assignments to save live variables
+ , BlockEnv StackMap -- stackmaps for the continuations
+ , ByteOff -- amount to adjust Sp before the jump
+ , CmmNode O C -- new last node
+ , [CmmBlock] -- new blocks
+ )
+
+handleLastNode procpoints liveness cont_info stackmaps
+ stack0@StackMap { sm_sp = sp0 } last
+ = case last of
+ -- At each return / tail call,
+ -- adjust Sp to point to the last argument pushed, which
+ -- is cml_args, after popping any other junk from the stack.
+ CmmCall{ cml_cont = Nothing, .. } -> do
+ let sp_off = sp0 - cml_args
+ return ([], mapEmpty, sp_off, last, [])
+
+ -- At each CmmCall with a continuation:
+ CmmCall{ cml_cont = Just cont_lbl, .. }
+ -- If we have already seen this continuation before, then
+ -- we just have to make the stack look the same:
+ | Just cont_stack <- mapLookup cont_lbl stackmaps
+ ->
+ return ( fixupStack stack0 cont_stack
+ , stackmaps
+ , sp0 - sm_sp cont_stack
+ , last
+ , [] )
+
+ -- a continuation we haven't seen before:
+ -- allocate the stack frame for it.
+ | otherwise -> do
+
+ -- get the set of LocalRegs live in the continuation
+ let target_live = mapFindWithDefault Set.empty cont_lbl
+ liveness
+
+ -- the stack from the base to cml_ret_off is off-limits.
+ -- our new stack frame contains:
+ -- * saved live variables
+ -- * the return address [young(C) + 8]
+ -- * the args for the call,
+ -- which are replaced by the return values at the return
+ -- point.
+
+ -- everything up to cml_ret_off is off-limits: mark it Occupied
+ -- stack2 contains cml_ret_off, plus everything we need to save
+ (stack2, assigs) = allocate cml_ret_off target_live stack0
+
+ -- Sp is currently pointing to sp0,
+ -- we want it to point to (sm_sp stack2 + cml_args)
+ -- so the difference is sp0 - (sm_sp stack2 + cml_args)
+ sp_off = sp0 - (sm_sp stack2 + cml_args)
+
+ -- And the Sp at the continuation is:
+ -- sm_sp stack2 + cml_ret_args
+ cont_stack = stack2{ sm_sp = sm_sp stack2 + cml_ret_args
+ , sm_args = cml_ret_args
+ , sm_ret_off = cml_ret_off
+ }
+
+ -- emit the necessary assignments of LocalRegs to stack slots
+ -- emit an Sp adjustment, taking into account the call area
+ --
+ return ( assigs
+ , mapSingleton cont_lbl cont_stack
+ , sp_off
+ , last
+ , [] -- no new blocks
+ )
+
+ CmmBranch{..} -> handleProcPoints
+ CmmCondBranch{..} -> handleProcPoints
+ CmmSwitch{..} -> handleProcPoints
+
+ where
+ handleProcPoints :: UniqSM ( [CmmNode O O]
+ , BlockEnv StackMap
+ , ByteOff
+ , CmmNode O C
+ , [CmmBlock] )
+
+ handleProcPoints = do
+ pps <- mapM handleProcPoint (successors last)
+ let lbl_map :: LabelMap Label
+ lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
+ fix_lbl l = mapLookup l lbl_map `orElse` l
+ return ( []
+ , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ]
+ , 0
+ , mapSuccessors fix_lbl last
+ , concat [ blk | (_,_,_,blk) <- pps ] )
+
+ -- For each proc point that is a successor of this block, we need to
+ -- (a) if the proc point already has a stackmap, we need to
+ -- shuffle the current stack to make it look the same.
+ -- We have to insert a new block to make this happen.
+ -- (b) otherwise, call "allocate live stack0" to make the
+ -- stack map for the proc point
+ handleProcPoint :: BlockId
+ -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
+ handleProcPoint l
+ | not (l `setMember` procpoints) = return (l, l, stack0, [])
+ | otherwise = do
+ tmp <- getUniqueM
+ let tmp_lbl = mkBlockId tmp
+ (assigs, stack3) = case mapLookup l stackmaps of
+ Just pp_sm -> (fixupStack stack0 pp_sm, pp_sm)
+ Nothing -> pprTrace "first visit to proc point" (ppr l <+> ppr live $$ ppr stack1) $ (assigs, stack2)
+ where
+ live = mapFindWithDefault Set.empty l liveness
+ (stack1, assigs) = allocate (sm_ret_off stack0) live stack0
+ cont_args = mapFindWithDefault 0 l cont_info
+ stack2 = stack1 { sm_sp = sm_sp stack1 + cont_args
+ , sm_args = cont_args
+ }
+
+ sp_off = sp0 - sm_sp stack3
+
+ block = blockJoin
+ (CmmEntry tmp_lbl)
+ (maybeAddSpAdj sp_off (blockFromList assigs))
+ (CmmBranch l)
+ --
+ return (l, tmp_lbl, stack3, [block])
+
+
+ passthrough :: BlockEnv StackMap
+ passthrough = mapFromList (zip (successors last) (repeat stack0))
+
+
+-- | create a sequence of assignments to establish the new StackMap,
+-- given the old StackMap.
+fixupStack :: StackMap -> StackMap -> [CmmNode O O]
+fixupStack old_stack new_stack = concatMap move new_locs
+ where
+ old_map :: Map LocalReg ByteOff
+ old_map = Map.fromList (stackSlotRegs old_stack)
+ new_locs = stackSlotRegs new_stack
+
+ move (r,n)
+ | Just m <- Map.lookup r old_map, n == m = []
+ | otherwise = [CmmStore (CmmStackSlot Old n)
+ (CmmReg (CmmLocal r))]
+
+-- -----------------------------------------------------------------------------
+-- Updating references to CallAreas
+
+{-
+After running layout, we need to update all the references to stack areas.
+
+Sp(L) is the Sp offset on entry to block L relative to the base of the
+OLD area.
+
+SpArgs(L) is the size of the young area for L, i.e. the number of
+arguments.
+
+ - in block L, each reference to (OldArea[N]) turns into
+ [Sp + Sp(L) - N]
+
+ - in block L, each reference to (Young(L')[N]) turns into
+ [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
+
+ - be careful with the last node of each block: Sp has already been adjusted
+ to be Sp + Sp(L) - Sp(L')
+-}
+
+areaToSp :: ByteOff -> ByteOff -> BlockEnv StackMap -> CmmExpr -> CmmExpr
+areaToSp sp_old _sp_hwm stackmaps (CmmStackSlot area n) =
+ cmmOffset (CmmReg spReg) (sp_old - area_off - n)
+ where
+ area_off = case area of
+ Old -> 0
+ Young l ->
+ case mapLookup l stackmaps of
+ Just sm -> sm_sp sm - sm_args sm
+ Nothing -> pprPanic "areaToSp(2)" (ppr l)
+areaToSp _ sp_hwm _ (CmmLit CmmHighStackMark) = CmmLit (mkIntCLit sp_hwm)
+areaToSp _ _ _ other = other
+
+
+-- -----------------------------------------------------------------------------
+-- Saving live registers
+
+-- | Given a set of live registers and a StackMap, save all the registers
+-- on the stack and return the new StackMap and the assignments to do
+-- the saving.
+--
+allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O])
+allocate ret_off live stackmap@StackMap{ sm_sp = sp0
+ , sm_regs = regs0 }
+ =
+ pprTrace "allocate" (ppr live $$ ppr stackmap) $
+
+ -- we only have to save regs that are not already in a slot
+ let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
+ regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0
+ in
+
+ -- make a map of the stack
+ let stack = reverse $ Array.elems $
+ accumArray (\_ x -> x) Empty (1, toWords (max sp0 ret_off)) $
+ ret_words ++ live_words
+ where ret_words =
+ [ (x, Occupied)
+ | x <- [ 1 .. toWords ret_off] ]
+ live_words =
+ [ (toWords x, Occupied)
+ | (r,off) <- eltsUFM regs1,
+ let w = localRegBytes r,
+ x <- [ off, off-wORD_SIZE .. off - w + 1] ]
+ in
+
+ -- Pass over the stack: find slots to save all the new live variables,
+ -- choosing the oldest slots first (hence a foldr).
+ let
+ save slot ([], stack, n, assigs, regs) -- no more regs to save
+ = ([], slot:stack, n `plusW` 1, assigs, regs)
+ save slot (to_save, stack, n, assigs, regs)
+ = case slot of
+ Occupied -> (to_save, Occupied:stack, n `plusW` 1, assigs, regs)
+ Empty
+ | Just (stack', r, to_save') <-
+ select_save to_save (slot:stack)
+ -> let assig = CmmStore (CmmStackSlot Old n')
+ (CmmReg (CmmLocal r))
+ n' = n `plusW` 1
+ in
+ (to_save', stack', n', assig : assigs, (r,(r,n')):regs)
+
+ | otherwise
+ -> (to_save, slot:stack, n `plusW` 1, assigs, regs)
+
+ -- we should do better here: right now we'll fit the smallest first,
+ -- but it would make more sense to fit the biggest first.
+ select_save :: [LocalReg] -> [StackSlot]
+ -> Maybe ([StackSlot], LocalReg, [LocalReg])
+ select_save regs stack = go regs []
+ where go [] no_fit = Nothing
+ go (r:rs) no_fit
+ | Just rest <- dropEmpty words stack
+ = Just (replicate words Occupied ++ rest, r, rs++no_fit)
+ | otherwise
+ = go rs (r:no_fit)
+ where words = localRegWords r
+
+ -- fill in empty slots as much as possible
+ (still_to_save, save_stack, n, save_assigs, save_regs)
+ = foldr save (to_save, [], 0, [], []) stack
+
+ -- push any remaining live vars on the stack
+ (push_sp, push_assigs, push_regs)
+ = foldr push (n, [], []) still_to_save
+ where
+ push r (n, assigs, regs)
+ = (n', assig : assigs, (r,(r,n')) : regs)
+ where
+ w = typeWidth (localRegType r)
+ n' = n + widthInBytes w
+ assig = CmmStore (CmmStackSlot Old n')
+ (CmmReg (CmmLocal r))
+
+ trim_sp
+ | not (null push_regs) = push_sp
+ | otherwise
+ = case break notEmpty save_stack of
+ (empties, rest) -> n `plusW` (- length empties)
+
+ final_regs = regs1 `addListToUFM` push_regs
+ `addListToUFM` save_regs
+
+ in
+ -- XXX should be an assert
+ if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
+
+ ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
+ , push_assigs ++ save_assigs )
+
+
+-- -----------------------------------------------------------------------------
+
+plusW :: ByteOff -> WordOff -> ByteOff
+plusW b w = b + w * wORD_SIZE
+
+dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
+dropEmpty 0 ss = Just ss
+dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
+dropEmpty n _ = Nothing
+
+pushEmpty :: ByteOff -> [StackSlot] -> [StackSlot]
+pushEmpty n stack = replicate (toWords n) Empty ++ stack
+
+notEmpty :: StackSlot -> Bool
+notEmpty Empty = False
+notEmpty _ = True
+
+localRegBytes :: LocalReg -> ByteOff
+localRegBytes r = widthInBytes (typeWidth (localRegType r))
+
+localRegWords :: LocalReg -> WordOff
+localRegWords = toWords . localRegBytes
+
+toWords :: ByteOff -> WordOff
+toWords x = x `quot` wORD_SIZE
+
+
+insertReloads :: StackMap -> [CmmNode O O]
+insertReloads stackmap =
+ [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp)
+ (localRegType r))
+ | (r,sp) <- stackSlotRegs stackmap
+ ]
+
+
+stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
+stackSlotRegs sm = eltsUFM (sm_regs sm)
diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs
index 9258d986ad..d5170b66b7 100644
--- a/compiler/cmm/CmmLive.hs
+++ b/compiler/cmm/CmmLive.hs
@@ -21,7 +21,6 @@ import PprCmmExpr ()
import Hoopl
import Maybes
import Outputable
-import UniqSet
-----------------------------------------------------------------------------
-- Calculating what variables are live on entry to a basic block
@@ -77,11 +76,7 @@ xferLive = mkBTransfer3 fst mid lst
mid :: CmmNode O O -> CmmLive -> CmmLive
mid n f = gen_kill n f
lst :: CmmNode O C -> FactBase CmmLive -> CmmLive
- -- slightly inefficient: kill is unnecessary for emptyRegSet
- lst n f = gen_kill n
- $ case n of CmmCall{} -> emptyRegSet
- CmmForeignCall{} -> emptyRegSet
- _ -> joinOutFacts liveLattice n f
+ lst n f = gen_kill n $ joinOutFacts liveLattice n f
-----------------------------------------------------------------------------
-- Removing assignments to dead variables
diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs
index 5a07bad7d7..971b351320 100644
--- a/compiler/cmm/CmmNode.hs
+++ b/compiler/cmm/CmmNode.hs
@@ -310,14 +310,14 @@ instance UserOfSlots ForeignTarget where
instance DefinerOfSlots (CmmNode e x) where
foldSlotsDefd f z n = case n of
CmmStore (CmmStackSlot a i) expr -> f z (a, i, widthInBytes $ typeWidth $ cmmExprType expr)
- CmmForeignCall {res=res} -> fold f z $ map foreign_call_slot res
+ -- CmmForeignCall {res=res} -> fold f z $ map foreign_call_slot res
_ -> z
where
fold :: forall a b.
DefinerOfSlots a =>
(b -> SubArea -> b) -> b -> a -> b
fold f z n = foldSlotsDefd f z n
- foreign_call_slot r = case widthInBytes $ typeWidth $ localRegType r of w -> (RegSlot r, w, w)
+ -- foreign_call_slot r = case widthInBytes $ typeWidth $ localRegType r of w -> (RegSlot r, w, w)
-----------------------------------
-- mapping Expr in CmmNode
diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y
index 240dab92d1..f3ac1ed1e7 100644
--- a/compiler/cmm/CmmParse.y
+++ b/compiler/cmm/CmmParse.y
@@ -230,35 +230,31 @@ lits :: { [ExtFCode CmmExpr] }
cmmproc :: { ExtCode }
-- TODO: add real SRT/info tables to parsed Cmm
- : info maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}'
- { do ((entry_ret_label, info, live, formals, gc_block, frame), stmts) <-
+ : info maybe_formals_without_hints '{' body '}'
+ { do ((entry_ret_label, info, live, formals), stmts) <-
getCgStmtsEC' $ loopDecls $ do {
(entry_ret_label, info, live) <- $1;
formals <- sequence $2;
- gc_block <- $3;
- frame <- $4;
- $6;
- return (entry_ret_label, info, live, formals, gc_block, frame) }
+ $4;
+ return (entry_ret_label, info, live, formals) }
blks <- code (cgStmtsToBlocks stmts)
- code (emitInfoTableAndCode entry_ret_label (CmmInfo gc_block frame info) formals blks) }
+ code (emitInfoTableAndCode entry_ret_label info formals blks) }
| info maybe_formals_without_hints ';'
{ do (entry_ret_label, info, live) <- $1;
formals <- sequence $2;
- code (emitInfoTableAndCode entry_ret_label (CmmInfo Nothing Nothing info) formals []) }
+ code (emitInfoTableAndCode entry_ret_label info formals []) }
- | NAME maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}'
+ | NAME maybe_formals_without_hints '{' body '}'
{% withThisPackage $ \pkg ->
do newFunctionName $1 pkg
- ((formals, gc_block, frame), stmts) <-
+ (formals, stmts) <-
getCgStmtsEC' $ loopDecls $ do {
formals <- sequence $2;
- gc_block <- $3;
- frame <- $4;
- $6;
- return (formals, gc_block, frame) }
+ $4;
+ return formals }
blks <- code (cgStmtsToBlocks stmts)
- code (emitProc (CmmInfo gc_block frame CmmNonInfoTable) (mkCmmCodeLabel pkg $1) formals blks) }
+ code (emitProc CmmNonInfoTable (mkCmmCodeLabel pkg $1) formals blks) }
info :: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) }
: 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')'
@@ -599,18 +595,7 @@ formals_without_hints :: { [ExtFCode LocalReg] }
formal_without_hint :: { ExtFCode LocalReg }
: type NAME { newLocal $1 $2 }
-maybe_frame :: { ExtFCode (Maybe UpdateFrame) }
- : {- empty -} { return Nothing }
- | 'jump' expr '(' exprs0 ')' { do { target <- $2;
- args <- sequence $4;
- return $ Just (UpdateFrame target args) } }
-
-maybe_gc_block :: { ExtFCode (Maybe BlockId) }
- : {- empty -} { return Nothing }
- | 'goto' NAME
- { do l <- lookupLabel $2; return (Just l) }
-
-type :: { CmmType }
+type :: { CmmType }
: 'bits8' { b8 }
| typenot8 { $1 }
diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs
index 8bccc9d103..023907f245 100644
--- a/compiler/cmm/CmmPipeline.hs
+++ b/compiler/cmm/CmmPipeline.hs
@@ -21,6 +21,7 @@ import CmmRewriteAssignments
import CmmStackLayout
import CmmContFlowOpt
import OptimizationFuel
+import CmmLayoutStack
import DynFlags
import ErrUtils
@@ -110,40 +111,45 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}})
----------- Proc points -------------------
let callPPs = {-# SCC "callProcPoints" #-} callProcPoints g
procPoints <- {-# SCC "minimalProcPointSet" #-} run $ minimalProcPointSet (targetPlatform dflags) callPPs g
- g <- {-# SCC "addProcPointProtocols" #-} run $ addProcPointProtocols callPPs procPoints g
- dump Opt_D_dump_cmmz_proc "Post Proc Points Added" g
- ----------- Spills and reloads -------------------
- g <- {-# SCC "dualLivenessWithInsertion" #-} run $ dualLivenessWithInsertion procPoints g
- dump Opt_D_dump_cmmz_spills "Post spills and reloads" g
-
- ----------- Sink and inline assignments -------------------
- g <- {-# SCC "rewriteAssignments" #-} runOptimization $ rewriteAssignments platform g
- dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g
+ g <- {-# SCC "layoutStack" #-} run $ cmmLayoutStack procPoints entry_off g
+ dump Opt_D_dump_cmmz_sp "Layout Stack" g
+
+-- g <- {-# SCC "addProcPointProtocols" #-} run $ addProcPointProtocols callPPs procPoints g
+-- dump Opt_D_dump_cmmz_proc "Post Proc Points Added" g
+--
+-- ----------- Spills and reloads -------------------
+-- g <- {-# SCC "dualLivenessWithInsertion" #-} run $ dualLivenessWithInsertion procPoints g
+-- dump Opt_D_dump_cmmz_spills "Post spills and reloads" g
+--
+-- ----------- Sink and inline assignments -------------------
+-- g <- {-# SCC "rewriteAssignments" #-} runOptimization $ rewriteAssignments platform g
+-- dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g
+--
----------- Eliminate dead assignments -------------------
g <- {-# SCC "removeDeadAssignments" #-} runOptimization $ removeDeadAssignments g
dump Opt_D_dump_cmmz_dead "Post remove dead assignments" g
- ----------- Zero dead stack slots (Debug only) ---------------
- -- Debugging: stubbing slots on death can cause crashes early
- g <- if opt_StubDeadValues
- then {-# SCC "stubSlotsOnDeath" #-} run $ stubSlotsOnDeath g
- else return g
- dump Opt_D_dump_cmmz_stub "Post stub dead stack slots" g
-
- --------------- Stack layout ----------------
- slotEnv <- {-# SCC "liveSlotAnal" #-} run $ liveSlotAnal g
- let spEntryMap = getSpEntryMap entry_off g
- mbpprTrace "live slot analysis results: " (ppr slotEnv) $ return ()
- let areaMap = {-# SCC "layout" #-} layout procPoints spEntryMap slotEnv entry_off g
- mbpprTrace "areaMap" (ppr areaMap) $ return ()
-
- ------------ Manifest the stack pointer --------
- g <- {-# SCC "manifestSP" #-} run $ manifestSP spEntryMap areaMap entry_off g
- dump Opt_D_dump_cmmz_sp "Post manifestSP" g
- -- UGH... manifestSP can require updates to the procPointMap.
- -- We can probably do something quicker here for the update...
+-- ----------- Zero dead stack slots (Debug only) ---------------
+-- -- Debugging: stubbing slots on death can cause crashes early
+-- g <- if opt_StubDeadValues
+-- then {-# SCC "stubSlotsOnDeath" #-} run $ stubSlotsOnDeath g
+-- else return g
+-- dump Opt_D_dump_cmmz_stub "Post stub dead stack slots" g
+--
+-- --------------- Stack layout ----------------
+-- slotEnv <- {-# SCC "liveSlotAnal" #-} run $ liveSlotAnal g
+-- let spEntryMap = getSpEntryMap entry_off g
+-- mbpprTrace "live slot analysis results: " (ppr slotEnv) $ return ()
+-- let areaMap = {-# SCC "layout" #-} layout procPoints spEntryMap slotEnv entry_off g
+-- mbpprTrace "areaMap" (ppr areaMap) $ return ()
+--
+-- ------------ Manifest the stack pointer --------
+-- g <- {-# SCC "manifestSP" #-} run $ manifestSP spEntryMap areaMap entry_off g
+-- dump Opt_D_dump_cmmz_sp "Post manifestSP" g
+-- -- UGH... manifestSP can require updates to the procPointMap.
+-- -- We can probably do something quicker here for the update...
------------- Split into separate procedures ------------
procPointMap <- {-# SCC "procPointAnalysis" #-} run $ procPointAnalysis procPoints g
@@ -157,12 +163,12 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}})
let localCAFs = catMaybes $ map (localCAFInfo platform cafEnv) gs
mbpprTrace "localCAFs" (pprPlatform platform localCAFs) $ return ()
- gs <- {-# SCC "lowerSafeForeignCalls" #-} run $ mapM (lowerSafeForeignCalls areaMap) gs
- dumps Opt_D_dump_cmmz_lower "Post lowerSafeForeignCalls" gs
+-- gs <- {-# SCC "lowerSafeForeignCalls" #-} run $ mapM (lowerSafeForeignCalls areaMap) gs
+-- dumps Opt_D_dump_cmmz_lower "Post lowerSafeForeignCalls" gs
-- NO MORE GRAPH TRANSFORMATION AFTER HERE -- JUST MAKING INFOTABLES
- gs <- {-# SCC "setInfoTableStackMap" #-} return $ map (setInfoTableStackMap slotEnv areaMap) gs
- dumps Opt_D_dump_cmmz_info "after setInfoTableStackMap" gs
+-- gs <- {-# SCC "setInfoTableStackMap" #-} return $ map (setInfoTableStackMap slotEnv areaMap) gs
+-- dumps Opt_D_dump_cmmz_info "after setInfoTableStackMap" gs
----------- Control-flow optimisations ---------------
gs <- {-# SCC "cmmCfgOpts(2)" #-} return $ map cmmCfgOptsProc gs
diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs
index cd0a1f0ded..77ffb4df56 100644
--- a/compiler/cmm/CmmProcPoint.hs
+++ b/compiler/cmm/CmmProcPoint.hs
@@ -4,7 +4,7 @@
module CmmProcPoint
( ProcPointSet, Status(..)
, callProcPoints, minimalProcPointSet
- , addProcPointProtocols, splitAtProcPoints, procPointAnalysis
+ , splitAtProcPoints, procPointAnalysis
)
where
@@ -248,6 +248,8 @@ algorithm would be just as good, so that's what we do.
-}
+{-
+
data Protocol = Protocol Convention [CmmFormal] Area
deriving Eq
instance Outputable Protocol where
@@ -371,6 +373,8 @@ add_CopyOuts protos procPoints g = foldGraphBlocks mb_copy_out (return mapEmpty)
return $ (b, foldl (flip insertBlock) bmap bs)
finish (b, bmap) = return $ insertBlock b bmap
skip b bs = insertBlock b `liftM` bs
+-}
+
-- At this point, we have found a set of procpoints, each of which should be
-- the entry point of a procedure.
diff --git a/compiler/cmm/CmmRewriteAssignments.hs b/compiler/cmm/CmmRewriteAssignments.hs
index 45eb89f9fd..2c33b7b5ac 100644
--- a/compiler/cmm/CmmRewriteAssignments.hs
+++ b/compiler/cmm/CmmRewriteAssignments.hs
@@ -404,8 +404,8 @@ clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False
-- ToDo: Also catch MachOp case
clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _)
| getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?)
-clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
- where f (CmmLoad (CmmStackSlot (CallArea a') o') t)
+clobbers (CmmStackSlot a o, rhs) (_, expr) = f expr
+ where f (CmmLoad (CmmStackSlot a' o') t)
= (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))
f (CmmLoad e _) = containsStackSlot e
f (CmmMachOp _ es) = or (map f es)
@@ -416,9 +416,6 @@ clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
containsStackSlot (CmmStackSlot{}) = True
containsStackSlot _ = False
-clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr
- where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l'
- f _ = False
clobbers _ (_, e) = f e
where f (CmmLoad (CmmStackSlot _ _) _) = False
f (CmmLoad{}) = True -- conservative
@@ -432,7 +429,7 @@ clobbers _ (_, e) = f e
-- [ I32 ]
-- [ F64 ]
-- s' -w'- o'
-type CallSubArea = (AreaId, Int, Int) -- area, offset, width
+type CallSubArea = (Area, Int, Int) -- area, offset, width
overlaps :: CallSubArea -> CallSubArea -> Bool
overlaps (a, _, _) (a', _, _) | a /= a' = False
overlaps (_, o, w) (_, o', w') =
@@ -457,7 +454,7 @@ invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap
invalidateVolatile k m = mapUFM p m
where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize
where exp CmmLit{} = True
- exp (CmmLoad (CmmStackSlot (CallArea (Young k')) _) _)
+ exp (CmmLoad (CmmStackSlot (Young k') _) _)
| k' == k = False
exp (CmmLoad (CmmStackSlot _ _) _) = True
exp (CmmMachOp _ es) = and (map exp es)
@@ -596,10 +593,6 @@ assignmentRewrite = mkFRewrite3 first middle last
where rep = typeWidth (localRegType r)
_ -> old
-- See Note [Soundness of store rewriting]
- inlineExp assign old@(CmmLoad (CmmStackSlot (RegSlot r) _) _)
- = case lookupUFM assign r of
- Just (AlwaysInline x) -> x
- _ -> old
inlineExp _ old = old
inlinable :: CmmNode e x -> Bool
diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs
index fbe4db0333..35f0429a7f 100644
--- a/compiler/cmm/CmmSpillReload.hs
+++ b/compiler/cmm/CmmSpillReload.hs
@@ -12,6 +12,10 @@ module CmmSpillReload
)
where
+import Outputable
+dualLivenessWithInsertion = panic "BANG BANG BANG BANG BANG BANG CLICK CLICK"
+
+{-
import BlockId
import Cmm
import CmmUtils
@@ -164,3 +168,4 @@ instance Outputable DualLive where
else (ppr_regs "live in regs =" regs),
if nullRegSet stack then PP.empty
else (ppr_regs "live on stack =" stack)]
+-}
diff --git a/compiler/cmm/CmmStackLayout.hs b/compiler/cmm/CmmStackLayout.hs
index dad684bf31..b75572d6c5 100644
--- a/compiler/cmm/CmmStackLayout.hs
+++ b/compiler/cmm/CmmStackLayout.hs
@@ -1,3 +1,8 @@
+module CmmStackLayout () where
+
+#if 0
+
+
{-# OPTIONS_GHC -XGADTs -XNoMonoLocalBinds #-}
-- Norman likes local bindings
-- If this module lives on I'd like to get rid of the -XNoMonoLocalBinds
@@ -589,3 +594,4 @@ stubSlotsOnDeath g = liftM fst $ dataflowPassBwd g [] $ analRewBwd slotLattice
(stackStubExpr (widthFromBytes w))
in case rst of Nothing -> Just (mkMiddle m <*> store)
Just g -> Just (g <*> store)
+#endif
diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs
index 0d75235a52..922f31e45a 100644
--- a/compiler/cmm/MkGraph.hs
+++ b/compiler/cmm/MkGraph.hs
@@ -3,14 +3,15 @@
module MkGraph
( CmmAGraph, CgStmt(..)
, (<*>), catAGraphs
- , mkLabel, mkMiddle, mkLast
+ , mkLabel, mkMiddle, mkLast, outOfLine
, lgraphOfAGraph, labelAGraph
, stackStubExpr
- , mkNop, mkAssign, mkStore, mkUnsafeCall, mkFinalCall, lastWithArgs
- , mkJump, mkDirectJump, mkForeignJump, mkJumpGC, mkCbranch, mkSwitch
+ , mkNop, mkAssign, mkStore, mkUnsafeCall, mkFinalCall, mkCallReturnsTo
+ , mkJump, mkDirectJump, mkForeignJump, mkForeignJumpExtra, mkJumpGC
+ , mkCbranch, mkSwitch
, mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch
- , copyInOflow, copyInSlot, copyOutOflow, copyOutSlot
+ , copyInOflow, copyOutOflow
, toCall, Transfer(..)
)
where
@@ -136,6 +137,9 @@ mkMiddle middle = unitOL (CgStmt middle)
mkLast :: CmmNode O C -> CmmAGraph
mkLast last = unitOL (CgLast last)
+-- | A labelled code block; should end in a last node
+outOfLine :: BlockId -> CmmAGraph -> CmmAGraph
+outOfLine l g = unitOL (CgFork l g)
-- | allocate a fresh label for the entry point
lgraphOfAGraph :: CmmAGraph -> UniqSM CmmGraph
@@ -168,23 +172,30 @@ mkStore l r = mkMiddle $ CmmStore l r
---------- Control transfer
mkJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
mkJump e actuals updfr_off =
- lastWithArgs Jump old NativeNodeCall actuals updfr_off $
+ lastWithArgs Jump Old NativeNodeCall actuals updfr_off $
toCall e Nothing updfr_off 0
mkDirectJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
mkDirectJump e actuals updfr_off =
- lastWithArgs Jump old NativeDirectCall actuals updfr_off $
+ lastWithArgs Jump Old NativeDirectCall actuals updfr_off $
toCall e Nothing updfr_off 0
mkJumpGC :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
mkJumpGC e actuals updfr_off =
- lastWithArgs Jump old GC actuals updfr_off $
+ lastWithArgs Jump Old GC actuals updfr_off $
toCall e Nothing updfr_off 0
mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset
-> CmmAGraph
mkForeignJump conv e actuals updfr_off =
- lastWithArgs Jump old conv actuals updfr_off $
+ lastWithArgs Jump Old conv actuals updfr_off $
+ toCall e Nothing updfr_off 0
+
+mkForeignJumpExtra :: Convention -> CmmExpr -> [CmmActual]
+ -> UpdFrameOffset -> (ByteOff, [(CmmExpr, ByteOff)])
+ -> CmmAGraph
+mkForeignJumpExtra conv e actuals updfr_off extra_stack =
+ lastWithArgsAndExtraStack Jump Old conv actuals updfr_off extra_stack $
toCall e Nothing updfr_off 0
mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph
@@ -195,15 +206,15 @@ mkSwitch e tbl = mkLast $ CmmSwitch e tbl
mkReturn :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
mkReturn e actuals updfr_off =
- lastWithArgs Ret old NativeReturn actuals updfr_off $
+ lastWithArgs Ret Old NativeReturn actuals updfr_off $
toCall e Nothing updfr_off 0
- -- where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord
+ -- where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord
mkReturnSimple :: [CmmActual] -> UpdFrameOffset -> CmmAGraph
mkReturnSimple actuals updfr_off =
- lastWithArgs Ret old NativeReturn actuals updfr_off $
+ lastWithArgs Ret Old NativeReturn actuals updfr_off $
toCall e Nothing updfr_off 0
- where e = CmmLoad (CmmStackSlot (CallArea Old) updfr_off) gcWord
+ where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord
mkBranch :: BlockId -> CmmAGraph
mkBranch bid = mkLast (CmmBranch bid)
@@ -211,9 +222,20 @@ mkBranch bid = mkLast (CmmBranch bid)
mkFinalCall :: CmmExpr -> CCallConv -> [CmmActual] -> UpdFrameOffset
-> CmmAGraph
mkFinalCall f _ actuals updfr_off =
- lastWithArgs Call old NativeDirectCall actuals updfr_off $
+ lastWithArgs Call Old NativeDirectCall actuals updfr_off $
toCall f Nothing updfr_off 0
+mkCallReturnsTo :: CmmExpr -> Convention -> [CmmActual]
+ -> BlockId
+ -> ByteOff
+ -> UpdFrameOffset
+ -> (ByteOff, [(CmmExpr,ByteOff)])
+ -> CmmAGraph
+mkCallReturnsTo f callConv actuals ret_lbl ret_off updfr_off extra_stack = do
+ lastWithArgsAndExtraStack Call (Young ret_lbl) callConv actuals
+ updfr_off extra_stack $
+ toCall f (Just ret_lbl) updfr_off ret_off
+
mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph
mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as
@@ -238,12 +260,9 @@ stackStubExpr w = CmmLit (CmmInt 0 w)
-- Therefore, for copying arguments and results, we provide different
-- functions to pass the arguments in an overflow area and to pass them in spill slots.
copyInOflow :: Convention -> Area -> [CmmFormal] -> (Int, CmmAGraph)
-copyInSlot :: Convention -> [CmmFormal] -> [CmmNode O O]
-copyOutSlot :: Convention -> [LocalReg] -> [CmmNode O O]
copyInOflow conv area formals = (offset, catAGraphs $ map mkMiddle nodes)
where (offset, nodes) = copyIn oneCopyOflowI conv area formals
-copyInSlot c f = snd $ copyIn oneCopySlotI c (panic "no area for copying to slots") f
type SlotCopier = Area -> (LocalReg, ByteOff) -> (ByteOff, [CmmNode O O]) ->
(ByteOff, [CmmNode O O])
@@ -264,26 +283,20 @@ copyIn oflow conv area formals =
adjust rst x@(_, RegisterParam _) = x : rst
-- Copy-in one arg, using overflow space if needed.
-oneCopyOflowI, oneCopySlotI :: SlotCopier
+oneCopyOflowI :: SlotCopier
oneCopyOflowI area (reg, off) (n, ms) =
(max n off, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty) : ms)
where ty = localRegType reg
--- Copy-in one arg, using spill slots if needed -- used for calling conventions at
--- a procpoint that is not a return point. The offset is irrelevant here...
-oneCopySlotI _ (reg, _) (n, ms) =
- (n, CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg) w) ty) : ms)
- where ty = localRegType reg
- w = widthInBytes (typeWidth ty)
-
-
-- Factoring out the common parts of the copyout functions yielded something
-- more complicated:
data Transfer = Call | Jump | Ret deriving Eq
-copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] -> UpdFrameOffset ->
- (Int, CmmAGraph)
+copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual]
+ -> UpdFrameOffset
+ -> (ByteOff, [(CmmExpr,ByteOff)]) -- extra stack stuff
+ -> (Int, CmmAGraph)
-- Generate code to move the actual parameters into the locations
-- required by the calling convention. This includes a store for the
@@ -294,51 +307,61 @@ copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] -> UpdFrameOffset
-- the info table for return and adjust the offsets of the other
-- parameters. If this is a call instruction, we adjust the offsets
-- of the other parameters.
-copyOutOflow conv transfer area@(CallArea a) actuals updfr_off
- = foldr co (init_offset, mkNop) args'
+copyOutOflow conv transfer area actuals updfr_off
+ (extra_stack_off, extra_stack_stuff)
+ = foldr co (init_offset, mkNop) (args' ++ stack_params)
where
co (v, RegisterParam r) (n, ms) = (n, mkAssign (CmmGlobal r) v <*> ms)
co (v, StackParam off) (n, ms) = (max n off, mkStore (CmmStackSlot area off) v <*> ms)
+ stack_params = [ (e, StackParam (off + init_offset))
+ | (e,off) <- extra_stack_stuff ]
+
(setRA, init_offset) =
- case a of Young id -> id `seq` -- Generate a store instruction for
+ case area of
+ Young id -> id `seq` -- Generate a store instruction for
-- the return address if making a call
if transfer == Call then
([(CmmLit (CmmBlock id), StackParam init_offset)],
widthInBytes wordWidth)
else ([], 0)
- Old -> ([], updfr_off)
+ Old -> ([], updfr_off)
+
+ arg_offset = init_offset + extra_stack_off
args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it
args = assignArgumentsPos conv cmmExprType actuals
args' = foldl adjust setRA args
- where adjust rst (v, StackParam off) = (v, StackParam (off + init_offset)) : rst
+ where adjust rst (v, StackParam off) = (v, StackParam (off + arg_offset)) : rst
adjust rst x@(_, RegisterParam _) = x : rst
-copyOutOflow _ _ (RegSlot _) _ _ = panic "cannot copy arguments into a register slot"
--- Args passed only in registers and stack slots; no overflow space.
--- No return address may apply!
-copyOutSlot conv actuals = foldr co [] args
- where co (v, RegisterParam r) ms = CmmAssign (CmmGlobal r) (toExp v) : ms
- co (v, StackParam off) ms = CmmStore (CmmStackSlot (RegSlot v) off) (toExp v) : ms
- toExp r = CmmReg (CmmLocal r)
- args = assignArgumentsPos conv localRegType actuals
mkCallEntry :: Convention -> [CmmFormal] -> (Int, CmmAGraph)
-mkCallEntry conv formals = copyInOflow conv (CallArea Old) formals
+mkCallEntry conv formals = copyInOflow conv Old formals
-lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] -> UpdFrameOffset ->
- (ByteOff -> CmmAGraph) -> CmmAGraph
+lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual]
+ -> UpdFrameOffset
+ -> (ByteOff -> CmmAGraph)
+ -> CmmAGraph
lastWithArgs transfer area conv actuals updfr_off last =
- let (outArgs, copies) = copyOutOflow conv transfer area actuals updfr_off in
+ let (outArgs, copies) = copyOutOflow conv transfer area actuals
+ updfr_off noExtraStack in
+ copies <*> last outArgs
+
+lastWithArgsAndExtraStack :: Transfer -> Area -> Convention -> [CmmActual]
+ -> UpdFrameOffset -> (ByteOff, [(CmmExpr,ByteOff)])
+ -> (ByteOff -> CmmAGraph)
+ -> CmmAGraph
+lastWithArgsAndExtraStack transfer area conv actuals updfr_off
+ extra_stack last =
+ let (outArgs, copies) = copyOutOflow conv transfer area actuals
+ updfr_off extra_stack in
copies <*> last outArgs
--- The area created for the jump and return arguments is the same area as the
--- procedure entry.
-old :: Area
-old = CallArea Old
+noExtraStack :: (ByteOff, [(CmmExpr,ByteOff)])
+noExtraStack = (0,[])
toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff -> ByteOff
-> CmmAGraph
diff --git a/compiler/cmm/OldCmm.hs b/compiler/cmm/OldCmm.hs
index a8a9d5dde0..b9d1b9d1a4 100644
--- a/compiler/cmm/OldCmm.hs
+++ b/compiler/cmm/OldCmm.hs
@@ -16,7 +16,7 @@
module OldCmm (
CmmGroup, GenCmmGroup, RawCmmGroup, CmmDecl, RawCmmDecl,
ListGraph(..),
- CmmInfo(..), UpdateFrame(..), CmmInfoTable(..), ClosureTypeInfo(..),
+ UpdateFrame(..), CmmInfoTable(..), ClosureTypeInfo(..),
CmmStatic(..), CmmStatics(..), CmmFormal, CmmActual,
cmmMapGraph, cmmTopMapGraph,
GenBasicBlock(..), CmmBasicBlock, blockId, blockStmts, mapBlockStmts,
@@ -53,13 +53,6 @@ import FastString
-- Info Tables
-----------------------------------------------------------------------------
-data CmmInfo
- = CmmInfo
- (Maybe BlockId) -- GC target. Nothing <=> CPS won't do stack check
- -- JD: NOT USED BY NEW CODE GEN
- (Maybe UpdateFrame) -- Update frame
- CmmInfoTable -- Info table
-
-- | A frame that is to be pushed before entry to the function.
-- Used to handle 'update' frames.
data UpdateFrame =
@@ -85,8 +78,8 @@ newtype ListGraph i = ListGraph [GenBasicBlock i]
-- across a whole compilation unit.
-- | Cmm with the info table as a data type
-type CmmGroup = GenCmmGroup CmmStatics CmmInfo (ListGraph CmmStmt)
-type CmmDecl = GenCmmDecl CmmStatics CmmInfo (ListGraph CmmStmt)
+type CmmGroup = GenCmmGroup CmmStatics CmmInfoTable (ListGraph CmmStmt)
+type CmmDecl = GenCmmDecl CmmStatics CmmInfoTable (ListGraph CmmStmt)
-- | Cmm with the info tables converted to a list of 'CmmStatic' along with the info
-- table label. If we are building without tables-next-to-code there will be no statics
diff --git a/compiler/cmm/OldPprCmm.hs b/compiler/cmm/OldPprCmm.hs
index 07dfbf63bf..6a8fab48e8 100644
--- a/compiler/cmm/OldPprCmm.hs
+++ b/compiler/cmm/OldPprCmm.hs
@@ -66,9 +66,6 @@ instance PlatformOutputable instr => PlatformOutputable (GenBasicBlock instr) wh
instance PlatformOutputable CmmStmt where
pprPlatform = pprStmt
-instance PlatformOutputable CmmInfo where
- pprPlatform = pprInfo
-
-- --------------------------------------------------------------------------
instance PlatformOutputable CmmSafety where
@@ -76,22 +73,6 @@ instance PlatformOutputable CmmSafety where
pprPlatform _ CmmInterruptible = ptext (sLit "_interruptible_call_")
pprPlatform platform (CmmSafe srt) = pprPlatform platform srt
--- --------------------------------------------------------------------------
--- Info tables. The current pretty printer needs refinement
--- but will work for now.
---
--- For ideas on how to refine it, they used to be printed in the
--- style of C--'s 'stackdata' declaration, just inside the proc body,
--- and were labelled with the procedure name ++ "_info".
-pprInfo :: Platform -> CmmInfo -> SDoc
-pprInfo platform (CmmInfo _gc_target update_frame info_table) =
- vcat [{-ptext (sLit "gc_target: ") <>
- maybe (ptext (sLit "<none>")) ppr gc_target,-}
- ptext (sLit "update_frame: ") <>
- maybe (ptext (sLit "<none>"))
- (pprUpdateFrame platform)
- update_frame,
- pprPlatform platform info_table]
-- --------------------------------------------------------------------------
-- Basic blocks look like assembly blocks.
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
index 81ce84c264..5c8a9cf5ce 100644
--- a/compiler/cmm/PprCmmExpr.hs
+++ b/compiler/cmm/PprCmmExpr.hs
@@ -248,12 +248,8 @@ pprLocalReg (LocalReg uniq rep)
-- Stack areas
pprArea :: Area -> SDoc
-pprArea (RegSlot r) = hcat [ text "slot<", ppr r, text ">" ]
-pprArea (CallArea id) = pprAreaId id
-
-pprAreaId :: AreaId -> SDoc
-pprAreaId Old = text "old"
-pprAreaId (Young id) = hcat [ text "young<", ppr id, text ">" ]
+pprArea Old = text "old"
+pprArea (Young id) = hcat [ text "young<", ppr id, text ">" ]
-- needs to be kept in syn with CmmExpr.hs.GlobalReg
--
diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs
index 25ba154d12..ed5c5261d7 100644
--- a/compiler/codeGen/CgInfoTbls.hs
+++ b/compiler/codeGen/CgInfoTbls.hs
@@ -67,10 +67,9 @@ emitClosureCodeAndInfoTable cl_info args body
-- Convert from 'ClosureInfo' to 'CmmInfo'.
-- Not used for return points. (The 'smRepClosureTypeInt' call would panic.)
-mkCmmInfo :: ClosureInfo -> FCode CmmInfo
+mkCmmInfo :: ClosureInfo -> FCode CmmInfoTable
mkCmmInfo cl_info
- = return (CmmInfo gc_target Nothing $
- CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info,
+ = return (CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info,
cit_rep = closureSMRep cl_info,
cit_prof = prof,
cit_srt = closureSRT cl_info })
@@ -80,14 +79,6 @@ mkCmmInfo cl_info
ty_descr_w8 = stringToWord8s (closureTypeDescr cl_info)
val_descr_w8 = stringToWord8s (closureValDescr cl_info)
- -- The gc_target is to inform the CPS pass when it inserts a stack check.
- -- Since that pass isn't used yet we'll punt for now.
- -- When the CPS pass is fully integrated, this should
- -- be replaced by the label that any heap check jumped to,
- -- so that branch can be shared by both the heap (from codeGen)
- -- and stack checks (from the CPS pass).
- gc_target = panic "TODO: gc_target"
-
-------------------------------------------------------------------------
--
-- Generating the info table and code for a return point
@@ -106,8 +97,7 @@ emitReturnTarget name stmts
; blks <- cgStmtsToBlocks stmts
; frame <- mkStackLayout
; let smrep = mkStackRep (mkLiveness frame)
- info = CmmInfo gc_target Nothing info_tbl
- info_tbl = CmmInfoTable { cit_lbl = info_lbl
+ info = CmmInfoTable { cit_lbl = info_lbl
, cit_prof = NoProfilingInfo
, cit_rep = smrep
, cit_srt = srt_info }
@@ -119,14 +109,6 @@ emitReturnTarget name stmts
info_lbl = mkReturnInfoLabel uniq
entry_lbl = mkReturnPtLabel uniq
- -- The gc_target is to inform the CPS pass when it inserts a stack check.
- -- Since that pass isn't used yet we'll punt for now.
- -- When the CPS pass is fully integrated, this should
- -- be replaced by the label that any heap check jumped to,
- -- so that branch can be shared by both the heap (from codeGen)
- -- and stack checks (from the CPS pass).
- gc_target = panic "TODO: gc_target"
-
-- Build stack layout information from the state of the 'FCode' monad.
-- Should go away once 'codeGen' starts using the CPS conversion
-- pass to handle the stack. Until then, this is really just
@@ -378,8 +360,8 @@ funInfoTable info_ptr
emitInfoTableAndCode
:: CLabel -- Label of entry or ret
- -> CmmInfo -- ...the info table
- -> [CmmFormal] -- ...args
+ -> CmmInfoTable -- ...the info table
+ -> [CmmFormal] -- ...args
-> [CmmBasicBlock] -- ...and body
-> Code
diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs
index 59f6accf9d..6e164ce9ee 100644
--- a/compiler/codeGen/CgMonad.lhs
+++ b/compiler/codeGen/CgMonad.lhs
@@ -728,7 +728,7 @@ emitDecl decl
= do { state <- getState
; setState $ state { cgs_tops = cgs_tops state `snocOL` decl } }
-emitProc :: CmmInfo -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code
+emitProc :: CmmInfoTable -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code
emitProc info lbl [] blocks
= do { let proc_block = CmmProc info lbl (ListGraph blocks)
; state <- getState
@@ -740,7 +740,7 @@ emitSimpleProc :: CLabel -> Code -> Code
emitSimpleProc lbl code
= do { stmts <- getCgStmts code
; blks <- cgStmtsToBlocks stmts
- ; emitProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] blks }
+ ; emitProc CmmNonInfoTable lbl [] blks }
-- Get all the CmmTops (there should be no stmts)
-- Return a single Cmm which may be split from other Cmms by
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index 724f28d142..0222299ff2 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -596,7 +596,7 @@ pushUpdateFrame es body
offset <- foldM push updfr es
withUpdFrameOff offset body
where push off e =
- do emitStore (CmmStackSlot (CallArea Old) base) e
+ do emitStore (CmmStackSlot Old base) e
return base
where base = off + widthInBytes (cmmExprWidth e)
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
index 3b12b2a4b7..fe41de83fa 100644
--- a/compiler/codeGen/StgCmmExpr.hs
+++ b/compiler/codeGen/StgCmmExpr.hs
@@ -532,16 +532,9 @@ cgTailCall fun_id fun_info args = do
ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
EnterIt -> ASSERT( null args ) -- Discarding arguments
- do { let entry = entryCode (closureInfoPtr fun)
- ; [ret,call] <- forkAlts [
- getCode $
- emitReturn [fun], -- Is tagged; no need to untag
- getCode $ do -- Not tagged
- emitCall (NativeNodeCall, NativeReturn) entry [fun]
- ]
- ; emit =<< mkCmmIfThenElse (cmmIsTagged fun) ret call }
-
- SlowCall -> do -- A slow function call via the RTS apply routines
+ emitEnter fun
+
+ SlowCall -> do -- A slow function call via the RTS apply routines
{ tickySlowCall lf_info args
; emitComment $ mkFastString "slowCall"
; slowCall fun args }
@@ -565,6 +558,66 @@ cgTailCall fun_id fun_info args = do
node_points = nodeMustPointToIt lf_info
+emitEnter :: CmmExpr -> FCode ()
+emitEnter fun = do
+ { adjustHpBackwards
+ ; sequel <- getSequel
+ ; updfr_off <- getUpdFrameOff
+ ; case sequel of
+ -- For a return, we have the option of generating a tag-test or
+ -- not. If the value is tagged, we can return directly, which
+ -- is quicker than entering the value. This is a code
+ -- size/speed trade-off: when optimising for speed rather than
+ -- size we could generate the tag test.
+ --
+ -- Right now, we do what the old codegen did, and omit the tag
+ -- test, just generating an enter.
+ Return _ -> do
+ { let entry = entryCode $ closureInfoPtr $ CmmReg nodeReg
+ ; emit $ mkForeignJump NativeNodeCall entry
+ [cmmUntag fun] updfr_off
+ }
+
+ -- The result will be scrutinised in the sequel. This is where
+ -- we generate a tag-test to avoid entering the closure if
+ -- possible.
+ --
+ -- The generated code will be something like this:
+ --
+ -- R1 = fun -- copyout
+ -- if (fun & 7 != 0) goto Lcall else goto Lret
+ -- Lcall:
+ -- call [fun] returns to Lret
+ -- Lret:
+ -- fun' = R1 -- copyin
+ -- ...
+ --
+ -- Note in particular that the label Lret is used as a
+ -- destination by both the tag-test and the call. This is
+ -- becase Lret will necessarily be a proc-point, and we want to
+ -- ensure that we generate only one proc-point for this
+ -- sequence.
+ --
+ AssignTo res_regs _ -> do
+ { lret <- newLabelC
+ ; lcall <- newLabelC
+ ; let area = Young lret
+ ; let (off, copyin) = copyInOflow NativeReturn area res_regs
+ (outArgs, copyout) = copyOutOflow NativeNodeCall Call area
+ [fun] updfr_off (0,[])
+ ; let entry = entryCode (closureInfoPtr fun)
+ the_call = toCall entry (Just lret) updfr_off off outArgs
+ ; emit $
+ copyout <*>
+ mkCbranch (cmmIsTagged fun) lret lcall <*>
+ outOfLine lcall the_call <*>
+ mkLabel lret <*>
+ copyin
+ }
+ }
+
+
+
{- Note [case on Bool]
~~~~~~~~~~~~~~~~~~~
A case on a Boolean value does two things:
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index f4be622092..3580481043 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -184,7 +184,7 @@ emitSaveThreadState :: BlockId -> FCode ()
emitSaveThreadState bid = do
-- CurrentTSO->stackobj->sp = Sp;
emitStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) stack_SP)
- (CmmStackSlot (CallArea (Young bid)) (widthInBytes (typeWidth gcWord)))
+ (CmmStackSlot (Young bid) (widthInBytes (typeWidth gcWord)))
emit closeNursery
-- and save the current cost centre stack in the TSO when profiling:
when opt_SccProfilingOn $
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index 2b0b6f895e..68d078fb28 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -418,8 +418,8 @@ altHeapCheck regs code
gc_call sp =
case rts_label regs of
- Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp
- Nothing -> mkCall generic_gc (GC, GC) [] [] sp
+ Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp (0,[])
+ Nothing -> mkCall generic_gc (GC, GC) [] [] sp (0,[])
rts_label [reg]
| isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1")
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 0299bc0f96..9ee9192794 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -15,7 +15,7 @@
module StgCmmLayout (
mkArgDescr,
- emitCall, emitReturn,
+ emitCall, emitReturn, adjustHpBackwards,
emitClosureProcAndInfoTable,
emitClosureAndInfoTable,
@@ -41,10 +41,12 @@ import StgCmmEnv
import StgCmmTicky
import StgCmmMonad
import StgCmmUtils
+import StgCmmProf
import MkGraph
import SMRep
import Cmm
+import CmmUtils
import CLabel
import StgSyn
import Id
@@ -53,6 +55,7 @@ import TyCon ( PrimRep(..) )
import BasicTypes ( Arity )
import DynFlags
import StaticFlags
+import Module
import Constants
import Util
@@ -93,10 +96,31 @@ emitCall convs@(callConv, _) fun args
; updfr_off <- getUpdFrameOff
; emitComment $ mkFastString ("emitCall: " ++ show sequel)
; case sequel of
- Return _ -> emit (mkForeignJump callConv fun args updfr_off)
- AssignTo res_regs _ -> emit =<< mkCall fun convs res_regs args updfr_off
+ Return _ ->
+ emit $ mkForeignJump callConv fun args updfr_off
+ AssignTo res_regs _ ->
+ emit =<< mkCall fun convs res_regs args updfr_off (0,[])
}
+emitCallWithExtraStack
+ :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
+ -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode ()
+-- (cgCall fun args) makes a call to the entry-code of 'fun',
+-- passing 'args', and returning the results to the current sequel
+emitCallWithExtraStack convs@(callConv, _) fun args extra_stack
+ = do { adjustHpBackwards
+ ; sequel <- getSequel
+ ; updfr_off <- getUpdFrameOff
+ ; emitComment $ mkFastString ("emitCallWithExtraStack: " ++ show sequel)
+ ; case sequel of
+ Return _ ->
+ emit $ mkForeignJumpExtra callConv fun args updfr_off extra_stack
+ AssignTo res_regs _ -> do
+ emit =<< mkCall fun convs res_regs args updfr_off extra_stack
+ }
+
+
+
adjustHpBackwards :: FCode ()
-- This function adjusts and heap pointers just before a tail call or
-- return. At a call or return, the virtual heap pointer may be less
@@ -128,6 +152,19 @@ adjustHpBackwards
-- Making calls: directCall and slowCall
-------------------------------------------------------------------------
+-- General plan is:
+-- - we'll make *one* fast call, either to the function itself
+-- (directCall) or to stg_ap_<pat>_fast (slowCall)
+-- Any left-over arguments will be pushed on the stack,
+--
+-- e.g. Sp[old+8] = arg1
+-- Sp[old+16] = arg2
+-- Sp[old+32] = stg_ap_pp_info
+-- R2 = arg3
+-- R3 = arg4
+-- call f() return to Nothing updfr_off: 32
+
+
directCall :: CLabel -> Arity -> [StgArg] -> FCode ()
-- (directCall f n args)
-- calls f(arg1, ..., argn), and applies the result to the remaining args
@@ -140,8 +177,19 @@ directCall lbl arity stg_args
slowCall :: CmmExpr -> [StgArg] -> FCode ()
-- (slowCall fun args) applies fun to args, returning the results to Sequel
slowCall fun stg_args
- = do { cmm_args <- getNonVoidArgAmodes stg_args
- ; slow_call fun cmm_args (argsReps stg_args) }
+ = do { dflags <- getDynFlags
+ ; cmm_args <- getNonVoidArgAmodes stg_args
+ ; let platform = targetPlatform dflags
+ ; call <- getCode $ direct_call "slow_call"
+ (mkRtsApFastLabel rts_fun) arity cmm_args reps
+ ; emitComment $ mkFastString ("slow_call for " ++
+ showSDoc (pprPlatform platform fun) ++
+ " with pat " ++ showSDoc (ftext rts_fun))
+ ; emit (mkAssign nodeReg fun <*> call)
+ }
+ where
+ reps = argsReps stg_args
+ (rts_fun, arity) = slowCallPattern reps
--------------
direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode ()
@@ -149,7 +197,7 @@ direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode ()
-- the args exclude the void ones
-- NB2: 'arity' refers to the *reps*
direct_call caller lbl arity args reps
- | debugIsOn && arity > length reps -- Too few args
+ | debugIsOn && arity > length reps -- Too few args
= do -- Caller should ensure that there enough args!
dflags <- getDynFlags
let platform = targetPlatform dflags
@@ -157,33 +205,77 @@ direct_call caller lbl arity args reps
<+> pprPlatform platform lbl <+> ppr (length reps)
<+> pprPlatform platform args <+> ppr reps )
- | null rest_reps -- Precisely the right number of arguments
+
+ | null rest_reps -- Precisely the right number of arguments
= emitCall (NativeDirectCall, NativeReturn) target args
- | otherwise -- Over-saturated call
+ | otherwise -- Note [over-saturated calls]
= ASSERT( arity == length initial_reps )
- do { pap_id <- newTemp gcWord
- ; withSequel (AssignTo [pap_id] True)
- (emitCall (NativeDirectCall, NativeReturn) target fast_args)
- ; slow_call (CmmReg (CmmLocal pap_id))
- rest_args rest_reps }
+ emitCallWithExtraStack (NativeDirectCall, NativeReturn)
+ target fast_args (mkStkOffsets stack_args)
where
target = CmmLit (CmmLabel lbl)
(initial_reps, rest_reps) = splitAt arity reps
arg_arity = count isNonV initial_reps
(fast_args, rest_args) = splitAt arg_arity args
-
---------------
-slow_call :: CmmExpr -> [CmmExpr] -> [ArgRep] -> FCode ()
-slow_call fun args reps
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
- call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity args reps
- emitComment $ mkFastString ("slow_call for " ++ showSDoc (pprPlatform platform fun) ++
- " with pat " ++ showSDoc (ftext rts_fun))
- emit (mkAssign nodeReg fun <*> call)
+ stack_args = slowArgs (zip rest_reps rest_args)
+
+
+{-
+Note [over-saturated calls]
+
+The natural thing to do for an over-saturated call would be to call
+the function with the correct number of arguments, and then apply the
+remaining arguments to the value returned, e.g.
+
+ f a b c d (where f has arity 2)
+ -->
+ r = call f(a,b)
+ call r(c,d)
+
+but this entails
+ - saving c and d on the stack
+ - making a continuation info table
+ - at the continuation, loading c and d off the stack into regs
+ - finally, call r
+
+Note that since there are a fixed number of different r's
+(e.g. stg_ap_pp_fast), we can also pre-compile continuations
+that correspond to each of them, rather than generating a fresh
+one for each over-saturated call.
+
+Not only does this generate much less code, it is faster too. We will
+generate something like:
+
+Sp[old+16] = c
+Sp[old+24] = d
+Sp[old+32] = stg_ap_pp_info
+call f(a,b) -- usual calling convention
+
+For the purposes of the CmmCall node, we count this extra stack as
+just more arguments that we are passing on the stack (cml_args).
+-}
+
+-- | 'slowArgs' takes a list of function arguments and prepares them for
+-- pushing on the stack for "extra" arguments to a function which requires
+-- fewer arguments than we currently have.
+slowArgs :: [(ArgRep,CmmExpr)] -> [(ArgRep,CmmExpr)]
+slowArgs [] = []
+slowArgs amodes
+ | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest
+ | otherwise = this_pat ++ slowArgs rest
where
- (rts_fun, arity) = slowCallPattern reps
+ (arg_pat, args, rest) = matchSlowPattern amodes
+ stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
+ this_pat = (N, mkLblExpr stg_ap_pat) : args
+ save_cccs = [(N, mkLblExpr save_cccs_lbl), (N, curCCS)]
+ save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
+
+matchSlowPattern :: [(ArgRep,CmmExpr)]
+ -> (FastString, [(ArgRep,CmmExpr)], [(ArgRep,CmmExpr)])
+matchSlowPattern amodes = (arg_pat, these, rest)
+ where (arg_pat, n) = slowCallPattern (map fst amodes)
+ (these, rest) = splitAt n amodes
-- These cases were found to cover about 99% of all slow calls:
slowCallPattern :: [ArgRep] -> (FastString, Arity)
@@ -206,6 +298,30 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0)
-------------------------------------------------------------------------
+-- Fix the byte-offsets of a bunch of things to push on the stack
+
+-- This is used for pushing slow-call continuations.
+-- See Note [over-saturated calls].
+
+mkStkOffsets
+ :: [(ArgRep,CmmExpr)] -- things to make offsets for
+ -> ( ByteOff -- OUTPUTS: Topmost allocated word
+ , [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out)
+mkStkOffsets things
+ = loop 0 [] (reverse things)
+ where
+ loop offset offs [] = (offset,offs)
+ loop offset offs ((V,_):things) = loop offset offs things
+ -- ignore Void arguments
+ loop offset offs ((rep,thing):things)
+ = loop thing_off ((thing, thing_off):offs) things
+ where
+ thing_off = offset + argRepSizeW rep * wORD_SIZE
+ -- offset of thing is offset+size, because we're
+ -- growing the stack *downwards* as the offsets increase.
+
+
+-------------------------------------------------------------------------
-- Classifying arguments: ArgRep
-------------------------------------------------------------------------
diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs
index 6c5ab4c692..c64df7ecc5 100644
--- a/compiler/codeGen/StgCmmMonad.hs
+++ b/compiler/codeGen/StgCmmMonad.hs
@@ -611,7 +611,7 @@ emitLabel :: BlockId -> FCode ()
emitLabel id = emitCgStmt (CgLabel id)
emitComment :: FastString -> FCode ()
-#ifdef DEBUG
+#if 0 /* def DEBUG */
emitComment s = emitCgStmt (CgStmt (CmmComment s))
#else
emitComment s = return ()
@@ -688,20 +688,18 @@ mkCmmIfThen e tbranch = do
mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual]
- -> UpdFrameOffset -> FCode CmmAGraph
-mkCall f (callConv, retConv) results actuals updfr_off = do
+ -> UpdFrameOffset -> (ByteOff,[(CmmExpr,ByteOff)]) -> FCode CmmAGraph
+mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do
k <- newLabelC
- let area = CallArea $ Young k
+ let area = Young k
(off, copyin) = copyInOflow retConv area results
- copyout = lastWithArgs Call area callConv actuals updfr_off
- (toCall f (Just k) updfr_off off)
+ copyout = mkCallReturnsTo f callConv actuals k off updfr_off extra_stack
return (copyout <*> mkLabel k <*> copyin)
-
mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset
-> FCode CmmAGraph
-mkCmmCall f results actuals
- = mkCall f (NativeDirectCall, NativeReturn) results actuals
+mkCmmCall f results actuals updfr_off
+ = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off (0,[])
mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
@@ -710,7 +708,7 @@ mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
mkSafeCall t fs as upd i = do
k <- newLabelC
return
- ( mkStore (CmmStackSlot (CallArea (Young k)) (widthInBytes wordWidth))
+ ( mkStore (CmmStackSlot (Young k) (widthInBytes wordWidth))
(CmmLit (CmmBlock k))
<*> mkLast (CmmForeignCall {tgt=t, res=fs, args=as, succ=k, updfr=upd, intrbl=i})
<*> mkLabel k)
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index b0333be379..bb50fffa12 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -94,7 +94,7 @@ Library
ForeignFunctionInterface, EmptyDataDecls,
TypeSynonymInstances, MultiParamTypeClasses,
FlexibleInstances, Rank2Types, ScopedTypeVariables,
- DeriveDataTypeable, BangPatterns
+ DeriveDataTypeable, BangPatterns, GADTs
if impl(ghc >= 7.1)
Extensions: NondecreasingIndentation
@@ -203,6 +203,7 @@ Library
CmmStackLayout
CmmType
CmmUtils
+ CmmLayoutStack
MkGraph
OldCmm
OldCmmLint