summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-03-06 12:24:40 +0000
committerSimon Marlow <marlowsd@gmail.com>2012-03-06 13:28:32 +0000
commit93e42a6895d2172f40d37fd13cb7405243dc4d0f (patch)
tree61e620bdba7d5b0e37348d68ba6ab2a60959a21e
parent9a32e71d912985a6fd8e3491518ac357f2e8686b (diff)
downloadhaskell-93e42a6895d2172f40d37fd13cb7405243dc4d0f.tar.gz
Lower safe foreign calls in the new CmmLayoutStack
We also generate much better code for safe foreign calls (and maybe also unsafe foreign calls) than previously. See the two new Notes: Note [lower safe foreign calls] Note [safe foreign call convention]
-rw-r--r--compiler/cmm/CmmBuildInfoTables.hs89
-rw-r--r--compiler/cmm/CmmLayoutStack.hs233
-rw-r--r--compiler/cmm/MkGraph.hs15
-rw-r--r--compiler/codeGen/StgCmmEnv.hs3
-rw-r--r--compiler/codeGen/StgCmmForeign.hs111
-rw-r--r--compiler/codeGen/StgCmmLayout.hs142
-rw-r--r--compiler/codeGen/StgCmmMonad.hs15
-rw-r--r--compiler/codeGen/StgCmmPrim.hs11
8 files changed, 390 insertions, 229 deletions
diff --git a/compiler/cmm/CmmBuildInfoTables.hs b/compiler/cmm/CmmBuildInfoTables.hs
index 43ff2b0758..7a396ee37b 100644
--- a/compiler/cmm/CmmBuildInfoTables.hs
+++ b/compiler/cmm/CmmBuildInfoTables.hs
@@ -17,7 +17,6 @@ module CmmBuildInfoTables
, setInfoTableSRT
, TopSRT, emptySRT, srtToData
, bundleCAFs
- , lowerSafeForeignCalls
, cafTransfers )
where
@@ -315,91 +314,3 @@ updInfoTbl toVars toSrt info_tbl@(CmmInfoTable {})
StackRep ls -> StackRep (toVars ls)
other -> other }
updInfoTbl _ _ t@CmmNonInfoTable = t
-
-----------------------------------------------------------------
--- Safe foreign calls: We need to insert the code that suspends and resumes
--- the thread before and after a safe foreign call.
--- Why do we do this so late in the pipeline?
--- Because we need this code to appear without interrruption: you can't rely on the
--- value of the stack pointer between the call and resetting the thread state;
--- you need to have an infotable on the young end of the stack both when
--- suspending the thread and making the foreign call.
--- All of this is much easier if we insert the suspend and resume calls here.
-
--- At the same time, we prepare for the stages of the compiler that
--- build the proc points. We have to do this at the same time because
--- the safe foreign calls need special treatment with respect to infotables.
--- A safe foreign call needs an infotable even though it isn't
--- a procpoint. The following datatype captures the information
--- needed to generate the infotables along with the Cmm data and procedures.
-
--- JD: Why not do this while splitting procedures?
-lowerSafeForeignCalls :: AreaMap -> CmmDecl -> FuelUniqSM CmmDecl
-lowerSafeForeignCalls _ t@(CmmData _ _) = return t
-lowerSafeForeignCalls areaMap (CmmProc info l g@(CmmGraph {g_entry=entry})) = do
- let block b mblocks = mblocks >>= lowerSafeCallBlock entry areaMap b
- blocks <- foldGraphBlocks block (return mapEmpty) g
- return $ CmmProc info l (ofBlockMap entry blocks)
-
--- If the block ends with a safe call in the block, lower it to an unsafe
--- call (with appropriate saves and restores before and after).
-lowerSafeCallBlock :: BlockId -> AreaMap -> CmmBlock -> BlockEnv CmmBlock
- -> FuelUniqSM (BlockEnv CmmBlock)
-lowerSafeCallBlock entry areaMap b blocks =
- case blockToNodeList b of
- (JustC (CmmEntry id), m, JustC l@(CmmForeignCall {})) -> lowerSafeForeignCall entry areaMap blocks id m l
- _ -> return $ insertBlock b blocks
-
--- Late in the code generator, we want to insert the code necessary
--- to lower a safe foreign call to a sequence of unsafe calls.
-lowerSafeForeignCall :: BlockId -> AreaMap -> BlockEnv CmmBlock -> BlockId -> [CmmNode O O] -> CmmNode O C
- -> FuelUniqSM (BlockEnv CmmBlock)
-lowerSafeForeignCall entry areaMap blocks bid m
- (CmmForeignCall {tgt=tgt, res=rs, args=as, succ=succ, updfr = updfr_off, intrbl = intrbl}) =
- do let newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
- -- Both 'id' and 'new_base' are KindNonPtr because they're
- -- RTS-only objects and are not subject to garbage collection
- id <- newTemp bWord
- new_base <- newTemp (cmmRegType (CmmGlobal BaseReg))
- let (caller_save, caller_load) = callerSaveVolatileRegs
- load_tso <- newTemp gcWord -- TODO FIXME NOW
- load_stack <- newTemp gcWord -- TODO FIXME NOW
- let (<**>) = (M.<*>)
- let suspendThread = foreignLbl "suspendThread"
- resumeThread = foreignLbl "resumeThread"
- foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId (fsLit name)))
- suspend = saveThreadState <**>
- caller_save <**>
- mkUnsafeCall (ForeignTarget suspendThread
- (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint]))
- [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))]
- midCall = mkUnsafeCall tgt rs as
- resume = mkUnsafeCall (ForeignTarget resumeThread
- (ForeignConvention CCallConv [AddrHint] [AddrHint]))
- [new_base] [CmmReg (CmmLocal id)] <**>
- -- Assign the result to BaseReg: we
- -- might now have a different Capability!
- mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <**>
- caller_load <**>
- loadThreadState load_tso load_stack
- -- We have to save the return value on the stack because its next use
- -- may appear in a different procedure due to procpoint splitting...
- saveRetVals = foldl (<**>) mkNop $ map (M.mkMiddle . spill) rs
- spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r)
- regSlot r@(LocalReg _ _) = CmmRegOff (CmmGlobal Sp) (sp_off - offset)
- where offset = w + expectJust "lowerForeign" Nothing -- XXX need to fix this: (Map.lookup (RegSlot r) areaMap)
- sp_off = wORD_SIZE + expectJust "lowerForeign" (Map.lookup area areaMap)
- area = if succ == entry then Old else Young succ
- w = widthInBytes $ typeWidth $ localRegType r
- -- Note: The successor must be a procpoint, and we have already split,
- -- so we use a jump, not a branch.
- succLbl = CmmLit (CmmLabel (infoTblLbl succ))
- jump = CmmCall { cml_target = succLbl, cml_cont = Nothing
- , cml_args = widthInBytes wordWidth ,cml_ret_args = 0
- , cml_ret_off = updfr_off}
- graph' <- liftUniq $ labelAGraph bid $ catAGraphs (map M.mkMiddle m) <**>
- suspend <**> midCall <**>
- resume <**> saveRetVals <**> M.mkLast jump
- return $ blocks `mapUnion` toBlockMap graph'
-lowerSafeForeignCall _ _ _ _ _ _ = panic "lowerSafeForeignCall was passed something else"
-
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
index 9a382c0557..87f495aa72 100644
--- a/compiler/cmm/CmmLayoutStack.hs
+++ b/compiler/cmm/CmmLayoutStack.hs
@@ -3,13 +3,20 @@ module CmmLayoutStack (
cmmLayoutStack, setInfoTableStackMap
) where
+import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX
+import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX
+
import Cmm
import BlockId
+import CLabel
import CmmUtils
+import MkGraph
+import Module
+import ForeignCall
import CmmLive
import CmmProcPoint
import SMRep
-import Hoopl
+import Hoopl hiding ((<*>), mkLast, mkMiddle)
import OptimizationFuel
import Constants
import UniqSupply
@@ -177,31 +184,49 @@ layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks
-- a proc point, we must save the live variables, adjust Sp, and
-- construct the StackMaps for each of the successor blocks.
-- See handleLastNode for details.
- (saves, out, sp_off, last1, fixup_blocks)
+ (middle2, sp_off, middle3, last1, fixup_blocks, out)
<- handleLastNode procpoints liveness cont_info
acc_stackmaps stack1 last0
- let hwm' = maximum (acc_hwm : map sm_sp (mapElems out))
- middle2 = maybeAddSpAdj sp_off $ foldl blockSnoc middle1 saves
-
- area_off = getAreaOff final_stackmaps
-
- -- manifest Sp: turn all CmmStackSlots into actual loads
- adj_middle = mapExpDeep (areaToSp sp0 sp_high area_off)
- adj_last = optStackCheck .
- mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off)
-
- middle3 = blockFromList $
- map adj_middle $
- elimStackStores stack0 final_stackmaps area_off $
- blockToList middle2
-
- newblock = blockJoin entry0 middle3 (adj_last last1)
-
- fixup_blocks' = map (blockMapNodes3 (id, adj_middle, id)) fixup_blocks
+ -- our block:
+ -- middle1 -- the original middle nodes
+ -- middle2 -- live variable saves from handleLastNode
+ -- Sp = Sp + sp_off -- Sp adjustment goes here
+ -- middle3 -- some more middle nodes from handleLastNode
+ -- last1 -- the last node
+ --
+ -- The next step is to manifest Sp: turn all the CmmStackSlots
+ -- into CmmLoads from Sp. The adjustment for middle1/middle2
+ -- will be different from that for middle3/last1, because the
+ -- Sp adjustment intervenes.
+ --
+ let area_off = getAreaOff final_stackmaps
+
+ adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
+ adj_pre_sp = mapExpDeep (areaToSp sp0 sp_high area_off)
+ adj_post_sp = mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off)
+
+ middle_pre = maybeAddSpAdj sp_off $
+ blockFromList $
+ map adj_pre_sp $
+ elimStackStores stack0 final_stackmaps area_off $
+ blockToList $
+ foldl blockSnoc middle1 middle2
+
+ middle_post = map adj_post_sp middle3
+
+ final_middle = foldl blockSnoc middle_pre middle_post
+ final_last = optStackCheck (adj_post_sp last1)
+
+ newblock = blockJoin entry0 final_middle final_last
+
+ fixup_blocks' = map (blockMapNodes3 (id, adj_post_sp, id))
+ fixup_blocks
stackmaps' = mapUnion acc_stackmaps out
+ hwm' = maximum (acc_hwm : map sm_sp (mapElems out))
+
pprTrace "layout(out)" (ppr out) $ return ()
go bs stackmaps' hwm' (newblock : fixup_blocks' ++ acc_blocks)
@@ -292,16 +317,33 @@ getStackLoc (Young l) n stackmaps =
-- -----------------------------------------------------------------------------
-- Handling stack allocation for a last node
+-- We take a single last node and turn it into:
+--
+-- C1 (some statements)
+-- Sp = Sp + N
+-- C2 (some more statements)
+-- call f() -- the actual last node
+--
+-- plus possibly some more blocks (we may have to add some fixup code
+-- between the last node and the continuation).
+--
+-- C1: is the code for saving the variables across this last node onto
+-- the stack, if the continuation is a call or jumps to a proc point.
+--
+-- C2: if the last node is a safe foreign call, we have to inject some
+-- extra code that goes *after* the Sp adjustment.
+
handleLastNode
:: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff
-> BlockEnv StackMap -> StackMap
-> CmmNode O C
-> UniqSM
- ( [CmmNode O O] -- assignments to save live variables
- , BlockEnv StackMap -- stackmaps for the continuations
- , ByteOff -- amount to adjust Sp before the jump
+ ( [CmmNode O O] -- nodes to go *before* the Sp adjustment
+ , ByteOff -- amount to adjust Sp
+ , [CmmNode O O] -- nodes to go *after* the Sp adjustment
, CmmNode O C -- new last node
, [CmmBlock] -- new blocks
+ , BlockEnv StackMap -- stackmaps for the continuations
)
handleLastNode procpoints liveness cont_info stackmaps
@@ -312,39 +354,45 @@ handleLastNode procpoints liveness cont_info stackmaps
-- is cml_args, after popping any other junk from the stack.
CmmCall{ cml_cont = Nothing, .. } -> do
let sp_off = sp0 - cml_args
- return ([], mapEmpty, sp_off, last, [])
+ return ([], sp_off, [], last, [], mapEmpty)
-- At each CmmCall with a continuation:
CmmCall{ cml_cont = Just cont_lbl, .. } ->
- lastCall cont_lbl cml_args cml_ret_args cml_ret_off
+ lastCall cont_lbl [] cml_args cml_ret_args cml_ret_off
- CmmForeignCall{ succ = cont_lbl, .. } ->
- lastCall cont_lbl 0{-no args-} 0{-no results-} (sm_ret_off stack0)
+ CmmForeignCall{ succ = cont_lbl, .. } -> do
+ (mids, spoff, _, last', blocks, stackmap') <-
+ lastCall cont_lbl res wORD_SIZE wORD_SIZE (sm_ret_off stack0)
+ -- one word each for args and results: the return address
+ (extra_mids, last'') <- lowerSafeForeignCall last'
+ return (mids, spoff, extra_mids, last'', blocks, stackmap')
CmmBranch{..} -> handleProcPoints
CmmCondBranch{..} -> handleProcPoints
CmmSwitch{..} -> handleProcPoints
where
- lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
+ lastCall :: BlockId -> [LocalReg] -> ByteOff -> ByteOff -> ByteOff
-> UniqSM
( [CmmNode O O]
- , BlockEnv StackMap
, ByteOff
+ , [CmmNode O O]
, CmmNode O C
, [CmmBlock]
+ , BlockEnv StackMap
)
- lastCall cont_lbl cml_args cml_ret_args cml_ret_off
+ lastCall cont_lbl res_regs cml_args cml_ret_args cml_ret_off
-- If we have already seen this continuation before, then
-- we just have to make the stack look the same:
| Just cont_stack <- mapLookup cont_lbl stackmaps
=
return ( fixupStack stack0 cont_stack
- , stackmaps
, sp0 - sm_sp cont_stack
+ , []
, last
- , [] )
+ , []
+ , stackmaps )
-- a continuation we haven't seen before:
-- allocate the stack frame for it.
@@ -353,6 +401,7 @@ handleLastNode procpoints liveness cont_info stackmaps
-- get the set of LocalRegs live in the continuation
let target_live = mapFindWithDefault Set.empty cont_lbl
liveness
+ `Set.difference` Set.fromList res_regs
-- the stack from the base to cml_ret_off is off-limits.
-- our new stack frame contains:
@@ -382,18 +431,19 @@ handleLastNode procpoints liveness cont_info stackmaps
-- emit an Sp adjustment, taking into account the call area
--
return ( assigs
- , mapSingleton cont_lbl cont_stack
, sp_off
+ , []
, last
, [] -- no new blocks
- )
+ , mapSingleton cont_lbl cont_stack )
handleProcPoints :: UniqSM ( [CmmNode O O]
- , BlockEnv StackMap
, ByteOff
+ , [CmmNode O O]
, CmmNode O C
- , [CmmBlock] )
+ , [CmmBlock]
+ , BlockEnv StackMap )
handleProcPoints = do
pps <- mapM handleProcPoint (successors last)
@@ -401,10 +451,11 @@ handleLastNode procpoints liveness cont_info stackmaps
lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
fix_lbl l = mapLookup l lbl_map `orElse` l
return ( []
- , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ]
, 0
+ , []
, mapSuccessors fix_lbl last
- , concat [ blk | (_,_,_,blk) <- pps ] )
+ , concat [ blk | (_,_,_,blk) <- pps ]
+ , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
-- For each proc point that is a successor of this block
-- (a) if the proc point already has a stackmap, we need to
@@ -642,6 +693,112 @@ stackMapToLiveness StackMap{..} =
-- -----------------------------------------------------------------------------
+-- Lowering safe foreign calls
+
+{-
+Note [lower safe foreign calls]
+
+We start with
+
+ Sp[young(L1)] = L1
+ ,-----------------------
+ | r1 = foo(x,y,z) returns to L1
+ '-----------------------
+ L1:
+ R1 = r1 -- copyIn, inserted by mkSafeCall
+ ...
+
+the stack layout algorithm will arrange to save and reload everything
+live across the call. Our job now is to expand the call so we get
+
+ Sp[young(L1)] = L1
+ ,-----------------------
+ | SAVE_THREAD_STATE()
+ | token = suspendThread(BaseReg, interruptible)
+ | r = foo(x,y,z)
+ | BaseReg = resumeThread(token)
+ | LOAD_THREAD_STATE()
+ | R1 = r -- copyOut
+ | jump L1
+ '-----------------------
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ ...
+
+Note the copyOut, which saves the results in the places that L1 is
+expecting them (see Note {safe foreign call convention]).
+-}
+
+
+lowerSafeForeignCall :: CmmNode O C -> UniqSM ([CmmNode O O], CmmNode O C)
+lowerSafeForeignCall CmmForeignCall { .. } =
+ do let
+ -- Both 'id' and 'new_base' are KindNonPtr because they're
+ -- RTS-only objects and are not subject to garbage collection
+ id <- newTemp bWord
+ new_base <- newTemp (cmmRegType (CmmGlobal BaseReg))
+ let (caller_save, caller_load) = callerSaveVolatileRegs
+ load_tso <- newTemp gcWord
+ load_stack <- newTemp gcWord
+ let suspend = saveThreadState <*>
+ caller_save <*>
+ mkMiddle (callSuspendThread id intrbl)
+ midCall = mkUnsafeCall tgt res args
+ resume = mkMiddle (callResumeThread new_base id) <*>
+ -- Assign the result to BaseReg: we
+ -- might now have a different Capability!
+ mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
+ caller_load <*>
+ loadThreadState load_tso load_stack
+ -- Note: The successor must be a procpoint, and we have already split,
+ -- so we use a jump, not a branch.
+ succLbl = CmmLit (CmmLabel (infoTblLbl succ))
+
+ (ret_args, copyout) = copyOutOflow NativeReturn Jump (Young succ)
+ (map (CmmReg . CmmLocal) res)
+ updfr (0, [])
+
+ jump = CmmCall { cml_target = succLbl
+ , cml_cont = Just succ
+ , cml_args = widthInBytes wordWidth
+ , cml_ret_args = ret_args
+ , cml_ret_off = updfr }
+
+ graph' <- lgraphOfAGraph $ suspend <*>
+ midCall <*>
+ resume <*>
+ copyout <*>
+ mkLast jump
+
+ case toBlockList graph' of
+ [one] -> let (_, middle, last) = blockSplit one
+ in return (blockToList middle, last)
+ _ -> panic "lowerSafeForeignCall0"
+
+lowerSafeForeignCall _ = panic "lowerSafeForeignCall1"
+
+
+foreignLbl :: FastString -> CmmExpr
+foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name))
+
+newTemp :: CmmType -> UniqSM LocalReg
+newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
+
+callSuspendThread :: LocalReg -> Bool -> CmmNode O O
+callSuspendThread id intrbl =
+ CmmUnsafeForeignCall
+ (ForeignTarget (foreignLbl (fsLit "suspendThread"))
+ (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint]))
+ [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))]
+
+callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
+callResumeThread new_base id =
+ CmmUnsafeForeignCall
+ (ForeignTarget (foreignLbl (fsLit "resumeThread"))
+ (ForeignConvention CCallConv [AddrHint] [AddrHint]))
+ [new_base] [CmmReg (CmmLocal id)]
+
+-- -----------------------------------------------------------------------------
plusW :: ByteOff -> WordOff -> ByteOff
plusW b w = b + w * wORD_SIZE
diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs
index 922f31e45a..797b785de2 100644
--- a/compiler/cmm/MkGraph.hs
+++ b/compiler/cmm/MkGraph.hs
@@ -12,6 +12,7 @@ module MkGraph
, mkCbranch, mkSwitch
, mkReturn, mkReturnSimple, mkComment, mkCallEntry, mkBranch
, copyInOflow, copyOutOflow
+ , noExtraStack
, toCall, Transfer(..)
)
where
@@ -188,8 +189,7 @@ mkJumpGC e actuals updfr_off =
mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset
-> CmmAGraph
mkForeignJump conv e actuals updfr_off =
- lastWithArgs Jump Old conv actuals updfr_off $
- toCall e Nothing updfr_off 0
+ mkForeignJumpExtra conv e actuals updfr_off noExtraStack
mkForeignJumpExtra :: Convention -> CmmExpr -> [CmmActual]
-> UpdFrameOffset -> (ByteOff, [(CmmExpr, ByteOff)])
@@ -208,13 +208,11 @@ mkReturn :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
mkReturn e actuals updfr_off =
lastWithArgs Ret Old NativeReturn actuals updfr_off $
toCall e Nothing updfr_off 0
- -- where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord
mkReturnSimple :: [CmmActual] -> UpdFrameOffset -> CmmAGraph
mkReturnSimple actuals updfr_off =
- lastWithArgs Ret Old NativeReturn actuals updfr_off $
- toCall e Nothing updfr_off 0
- where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord
+ mkReturn e actuals updfr_off
+ where e = CmmLoad (CmmStackSlot Old updfr_off) gcWord
mkBranch :: BlockId -> CmmAGraph
mkBranch bid = mkLast (CmmBranch bid)
@@ -346,9 +344,8 @@ lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual]
-> (ByteOff -> CmmAGraph)
-> CmmAGraph
lastWithArgs transfer area conv actuals updfr_off last =
- let (outArgs, copies) = copyOutOflow conv transfer area actuals
- updfr_off noExtraStack in
- copies <*> last outArgs
+ lastWithArgsAndExtraStack transfer area conv actuals
+ updfr_off noExtraStack last
lastWithArgsAndExtraStack :: Transfer -> Area -> Convention -> [CmmActual]
-> UpdFrameOffset -> (ByteOff, [(CmmExpr,ByteOff)])
diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs
index d8a7061eec..3b56e2feb6 100644
--- a/compiler/codeGen/StgCmmEnv.hs
+++ b/compiler/codeGen/StgCmmEnv.hs
@@ -27,7 +27,7 @@ module StgCmmEnv (
bindArgsToRegs, bindToReg, rebindToReg,
bindArgToReg, idToReg,
- getArgAmode, getNonVoidArgAmodes,
+ getArgAmode, getNonVoidArgAmodes,
getCgIdInfo,
maybeLetNoEscape,
) where
@@ -213,7 +213,6 @@ getNonVoidArgAmodes (arg:args)
; amodes <- getNonVoidArgAmodes args
; return ( amode : amodes ) }
-
------------------------------------------------------------------------
-- Interface functions for binding and re-binding names
------------------------------------------------------------------------
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index f1a522b37d..d5c9600b38 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -22,6 +22,7 @@ import StgCmmEnv
import StgCmmMonad
import StgCmmUtils
import StgCmmClosure
+import StgCmmLayout
import BlockId
import Cmm
@@ -45,15 +46,16 @@ import Control.Monad
-- Code generation for Foreign Calls
-----------------------------------------------------------------------------
-cgForeignCall :: [LocalReg] -- r1,r2 where to put the results
- -> [ForeignHint]
- -> ForeignCall -- the op
+-- | emit code for a foreign call, and return the results to the sequel.
+--
+cgForeignCall :: ForeignCall -- the op
-> [StgArg] -- x,y arguments
+ -> Type -- result type
-> FCode ()
--- Emits code for an unsafe foreign call: r1, r2 = foo( x, y, z )
-cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_args
+cgForeignCall (CCall (CCallSpec target cconv safety)) stg_args res_ty
= do { cmm_args <- getFCallArgs stg_args
+ ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
; let ((call_args, arg_hints), cmm_target)
= case target of
StaticTarget lbl mPkgId
@@ -61,7 +63,7 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
= case mPkgId of
Nothing -> ForeignLabelInThisPackage
Just pkgId -> ForeignLabelInPackage pkgId
- size = call_size cmm_args
+ size = call_size cmm_args
in ( unzip cmm_args
, CmmLit (CmmLabel
(mkForeignLabel lbl size labelSource IsFunction)))
@@ -69,10 +71,31 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
DynamicTarget -> case cmm_args of
(fn,_):rest -> (unzip rest, fn)
[] -> panic "cgForeignCall []"
- fc = ForeignConvention cconv arg_hints result_hints
+ fc = ForeignConvention cconv arg_hints res_hints
call_target = ForeignTarget cmm_target fc
- ; emitForeignCall safety results call_target call_args CmmMayReturn }
+ -- we want to emit code for the call, and then emitReturn.
+ -- However, if the sequel is AssignTo, we shortcut a little
+ -- and generate a foreign call that assigns the results
+ -- directly. Otherwise we end up generating a bunch of
+ -- useless "r = r" assignments, which are not merely annoying:
+ -- they prevent the common block elimination from working correctly
+ -- in the case of a safe foreign call.
+ -- See Note [safe foreign call convention]
+ --
+ ; sequel <- getSequel
+ ; case sequel of
+ AssignTo assign_to_these _ ->
+ do { emitForeignCall safety assign_to_these call_target
+ call_args CmmMayReturn
+ }
+
+ _something_else ->
+ do { emitForeignCall safety res_regs call_target
+ call_args CmmMayReturn
+ ; emitReturn (map (CmmReg . CmmLocal) res_regs)
+ }
+ }
where
-- in the stdcall calling convention, the symbol needs @size appended
-- to it, where size is the total number of bytes of arguments. We
@@ -83,7 +106,76 @@ cgForeignCall results result_hints (CCall (CCallSpec target cconv safety)) stg_a
| otherwise = Nothing
-- ToDo: this might not be correct for 64-bit API
- arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg) wORD_SIZE
+ arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType arg)
+ wORD_SIZE
+
+{- Note [safe foreign call convention]
+
+The simple thing to do for a safe foreign call would be the same as an
+unsafe one: just
+
+ emitForeignCall ...
+ emitReturn ...
+
+but consider what happens in this case
+
+ case foo x y z of
+ (# s, r #) -> ...
+
+The sequel is AssignTo [r]. The call to newUnboxedTupleRegs picks [r]
+as the result reg, and we generate
+
+ r = foo(x,y,z) returns to L1 -- emitForeignCall
+ L1:
+ r = r -- emitReturn
+ goto L2
+L2:
+ ...
+
+Now L1 is a proc point (by definition, it is the continuation of the
+safe foreign call). If L2 does a heap check, then L2 will also be a
+proc point.
+
+Furthermore, the stack layout algorithm has to arrange to save r
+somewhere between the call and the jump to L1, which is annoying: we
+would have to treat r differently from the other live variables, which
+have to be saved *before* the call.
+
+So we adopt a special convention for safe foreign calls: the results
+are copied out according to the NativeReturn convention by the call,
+and the continuation of the call should copyIn the results. (The
+copyOut code is actually inserted when the safe foreign call is
+lowered later). The result regs attached to the safe foreign call are
+only used temporarily to hold the results before they are copied out.
+
+We will now generate this:
+
+ r = foo(x,y,z) returns to L1
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ goto L2
+ L2:
+ ... r ...
+
+And when the safe foreign call is lowered later (see Note [lower safe
+foreign calls]) we get this:
+
+ suspendThread()
+ r = foo(x,y,z)
+ resumeThread()
+ R1 = r -- copyOut, inserted by lowerSafeForeignCall
+ jump L1
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ goto L2
+ L2:
+ ... r ...
+
+Now consider what happens if L2 does a heap check: the Adams
+optimisation kicks in and commons up L1 with the heap-check
+continuation, resulting in just one proc point instead of two. Yay!
+-}
+
emitCCall :: [(CmmFormal,ForeignHint)]
-> CmmExpr
@@ -125,6 +217,7 @@ emitForeignCall safety results target args _ret
(playInterruptible safety)
+
{-
-- THINK ABOUT THIS (used to happen)
-- we might need to load arguments into temporaries before
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 9ee9192794..16b33d1faf 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -67,13 +67,17 @@ import FastString ( mkFastString, FastString, fsLit )
-- Call and return sequences
------------------------------------------------------------------------
-emitReturn :: [CmmExpr] -> FCode ()
--- Return multiple values to the sequel
+-- | Return multiple values to the sequel
+--
+-- If the sequel is @Return@
+--
+-- > return (x,y)
--
--- If the sequel is Return
--- return (x,y)
--- If the sequel is AssignTo [p,q]
--- p=x; q=y;
+-- If the sequel is @AssignTo [p,q]@
+--
+-- > p=x; q=y;
+--
+emitReturn :: [CmmExpr] -> FCode ()
emitReturn results
= do { sequel <- getSequel;
; updfr_off <- getUpdFrameOff
@@ -87,26 +91,24 @@ emitReturn results
; emitMultiAssign regs results }
}
+
+-- | @emitCall conv fun args@ makes a call to the entry-code of @fun@,
+-- using the call/return convention @conv@, passing @args@, and
+-- returning the results to the current sequel.
+--
emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ()
--- (cgCall fun args) makes a call to the entry-code of 'fun',
--- passing 'args', and returning the results to the current sequel
-emitCall convs@(callConv, _) fun args
- = do { adjustHpBackwards
- ; sequel <- getSequel
- ; updfr_off <- getUpdFrameOff
- ; emitComment $ mkFastString ("emitCall: " ++ show sequel)
- ; case sequel of
- Return _ ->
- emit $ mkForeignJump callConv fun args updfr_off
- AssignTo res_regs _ ->
- emit =<< mkCall fun convs res_regs args updfr_off (0,[])
- }
+emitCall convs fun args
+ = emitCallWithExtraStack convs fun args noExtraStack
+
+-- | @emitCallWithExtraStack conv fun args stack@ makes a call to the
+-- entry-code of @fun@, using the call/return convention @conv@,
+-- passing @args@, pushing some extra stack frames described by
+-- @stack@, and returning the results to the current sequel.
+--
emitCallWithExtraStack
:: (Convention, Convention) -> CmmExpr -> [CmmExpr]
-> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode ()
--- (cgCall fun args) makes a call to the entry-code of 'fun',
--- passing 'args', and returning the results to the current sequel
emitCallWithExtraStack convs@(callConv, _) fun args extra_stack
= do { adjustHpBackwards
; sequel <- getSequel
@@ -120,7 +122,6 @@ emitCallWithExtraStack convs@(callConv, _) fun args extra_stack
}
-
adjustHpBackwards :: FCode ()
-- This function adjusts and heap pointers just before a tail call or
-- return. At a call or return, the virtual heap pointer may be less
@@ -171,55 +172,67 @@ directCall :: CLabel -> Arity -> [StgArg] -> FCode ()
-- The function f has arity n, and there are guaranteed at least n args
-- Both arity and args include void args
directCall lbl arity stg_args
- = do { cmm_args <- getNonVoidArgAmodes stg_args
- ; direct_call "directCall" lbl arity cmm_args (argsReps stg_args) }
+ = do { argreps <- getArgRepsAmodes stg_args
+ ; direct_call "directCall" lbl arity argreps }
+
slowCall :: CmmExpr -> [StgArg] -> FCode ()
-- (slowCall fun args) applies fun to args, returning the results to Sequel
slowCall fun stg_args
= do { dflags <- getDynFlags
- ; cmm_args <- getNonVoidArgAmodes stg_args
+ ; argsreps <- getArgRepsAmodes stg_args
+ ; let (rts_fun, arity) = slowCallPattern (map fst argsreps)
; let platform = targetPlatform dflags
; call <- getCode $ direct_call "slow_call"
- (mkRtsApFastLabel rts_fun) arity cmm_args reps
+ (mkRtsApFastLabel rts_fun) arity argsreps
; emitComment $ mkFastString ("slow_call for " ++
showSDoc (pprPlatform platform fun) ++
" with pat " ++ showSDoc (ftext rts_fun))
; emit (mkAssign nodeReg fun <*> call)
}
- where
- reps = argsReps stg_args
- (rts_fun, arity) = slowCallPattern reps
+
--------------
-direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode ()
--- NB1: (length args) may be less than (length reps), because
--- the args exclude the void ones
--- NB2: 'arity' refers to the *reps*
-direct_call caller lbl arity args reps
- | debugIsOn && arity > length reps -- Too few args
+direct_call :: String -> CLabel -> Arity -> [(ArgRep,Maybe CmmExpr)] -> FCode ()
+direct_call caller lbl arity args
+ | debugIsOn && arity > length args -- Too few args
= do -- Caller should ensure that there enough args!
dflags <- getDynFlags
let platform = targetPlatform dflags
- pprPanic "direct_call" (text caller <+> ppr arity
- <+> pprPlatform platform lbl <+> ppr (length reps)
- <+> pprPlatform platform args <+> ppr reps )
+ pprPanic "direct_call" $
+ text caller <+> ppr arity <+>
+ pprPlatform platform lbl <+> ppr (length args) <+>
+ pprPlatform platform (map snd args) <+> ppr (map fst args)
-
- | null rest_reps -- Precisely the right number of arguments
- = emitCall (NativeDirectCall, NativeReturn) target args
+ | null rest_args -- Precisely the right number of arguments
+ = emitCall (NativeDirectCall, NativeReturn) target (nonVArgs args)
| otherwise -- Note [over-saturated calls]
- = ASSERT( arity == length initial_reps )
- emitCallWithExtraStack (NativeDirectCall, NativeReturn)
- target fast_args (mkStkOffsets stack_args)
+ = emitCallWithExtraStack (NativeDirectCall, NativeReturn)
+ target (nonVArgs fast_args) (mkStkOffsets stack_args)
where
target = CmmLit (CmmLabel lbl)
- (initial_reps, rest_reps) = splitAt arity reps
- arg_arity = count isNonV initial_reps
- (fast_args, rest_args) = splitAt arg_arity args
- stack_args = slowArgs (zip rest_reps rest_args)
+ (fast_args, rest_args) = splitAt arity args
+ stack_args = slowArgs rest_args
+
+-- When constructing calls, it is easier to keep the ArgReps and the
+-- CmmExprs zipped together. However, a void argument has no
+-- representation, so we need to use Maybe CmmExpr (the alternative of
+-- using zeroCLit or even undefined would work, but would be ugly).
+--
+getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)]
+getArgRepsAmodes = mapM getArgRepAmode
+ where getArgRepAmode arg
+ | V <- rep = return (V, Nothing)
+ | otherwise = do expr <- getArgAmode (NonVoid arg)
+ return (rep, Just expr)
+ where rep = toArgRep (argPrimRep arg)
+
+nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr]
+nonVArgs [] = []
+nonVArgs ((_,Nothing) : args) = nonVArgs args
+nonVArgs ((_,Just arg) : args) = arg : nonVArgs args
{-
Note [over-saturated calls]
@@ -259,23 +272,21 @@ just more arguments that we are passing on the stack (cml_args).
-- | 'slowArgs' takes a list of function arguments and prepares them for
-- pushing on the stack for "extra" arguments to a function which requires
-- fewer arguments than we currently have.
-slowArgs :: [(ArgRep,CmmExpr)] -> [(ArgRep,CmmExpr)]
+slowArgs :: [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)]
slowArgs [] = []
-slowArgs amodes
- | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest
- | otherwise = this_pat ++ slowArgs rest
+slowArgs args -- careful: reps contains voids (V), but args does not
+ | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest_args
+ | otherwise = this_pat ++ slowArgs rest_args
where
- (arg_pat, args, rest) = matchSlowPattern amodes
+ (arg_pat, n) = slowCallPattern (map fst args)
+ (call_args, rest_args) = splitAt n args
+
stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
- this_pat = (N, mkLblExpr stg_ap_pat) : args
- save_cccs = [(N, mkLblExpr save_cccs_lbl), (N, curCCS)]
+ this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args
+ save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just curCCS)]
save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
-matchSlowPattern :: [(ArgRep,CmmExpr)]
- -> (FastString, [(ArgRep,CmmExpr)], [(ArgRep,CmmExpr)])
-matchSlowPattern amodes = (arg_pat, these, rest)
- where (arg_pat, n) = slowCallPattern (map fst amodes)
- (these, rest) = splitAt n amodes
+
-- These cases were found to cover about 99% of all slow calls:
slowCallPattern :: [ArgRep] -> (FastString, Arity)
@@ -304,16 +315,16 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0)
-- See Note [over-saturated calls].
mkStkOffsets
- :: [(ArgRep,CmmExpr)] -- things to make offsets for
+ :: [(ArgRep, Maybe CmmExpr)] -- things to make offsets for
-> ( ByteOff -- OUTPUTS: Topmost allocated word
, [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out)
mkStkOffsets things
= loop 0 [] (reverse things)
where
loop offset offs [] = (offset,offs)
- loop offset offs ((V,_):things) = loop offset offs things
+ loop offset offs ((_,Nothing):things) = loop offset offs things
-- ignore Void arguments
- loop offset offs ((rep,thing):things)
+ loop offset offs ((rep,Just thing):things)
= loop thing_off ((thing, thing_off):offs) things
where
thing_off = offset + argRepSizeW rep * wORD_SIZE
@@ -357,10 +368,7 @@ isNonV :: ArgRep -> Bool
isNonV V = False
isNonV _ = True
-argsReps :: [StgArg] -> [ArgRep]
-argsReps = map (toArgRep . argPrimRep)
-
-argRepSizeW :: ArgRep -> WordOff -- Size in words
+argRepSizeW :: ArgRep -> WordOff -- Size in words
argRepSizeW N = 1
argRepSizeW P = 1
argRepSizeW F = 1
diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs
index ccf0777906..240469c3f2 100644
--- a/compiler/codeGen/StgCmmMonad.hs
+++ b/compiler/codeGen/StgCmmMonad.hs
@@ -77,9 +77,6 @@ import Unique
import UniqSupply
import FastString
import Outputable
-import Util
-
-import Compiler.Hoopl hiding (Unique, (<*>), mkLabel, mkBranch, mkLast)
import Control.Monad
import Data.List
@@ -614,7 +611,7 @@ emitComment :: FastString -> FCode ()
#if 0 /* def DEBUG */
emitComment s = emitCgStmt (CgStmt (CmmComment s))
#else
-emitComment s = return ()
+emitComment _ = return ()
#endif
emitAssign :: CmmReg -> CmmExpr -> FCode ()
@@ -707,12 +704,16 @@ mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
-> FCode CmmAGraph
mkSafeCall t fs as upd i = do
k <- newLabelC
+ let (_off, copyout) = copyInOflow NativeReturn (Young k) fs
+ -- see Note [safe foreign call convention]
return
( mkStore (CmmStackSlot (Young k) (widthInBytes wordWidth))
(CmmLit (CmmBlock k))
- <*> mkLast (CmmForeignCall {tgt=t, res=fs, args=as, succ=k, updfr=upd, intrbl=i})
- <*> mkLabel k)
-
+ <*> mkLast (CmmForeignCall { tgt=t, res=fs, args=as, succ=k
+ , updfr=upd, intrbl=i })
+ <*> mkLabel k
+ <*> copyout
+ )
-- ----------------------------------------------------------------------------
-- CgStmts
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index c95b1f02ff..9f87271fba 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -68,14 +68,9 @@ cgOpApp :: StgOp -- The op
-- Foreign calls
cgOpApp (StgFCallOp fcall _) stg_args res_ty
- = do { (res_regs, res_hints) <- newUnboxedTupleRegs res_ty
- -- Choose result regs r1, r2
- -- Note [Foreign call results]
- ; cgForeignCall res_regs res_hints fcall stg_args
- -- r1, r2 = foo( x, y )
- ; emitReturn (map (CmmReg . CmmLocal) res_regs) }
- -- return (r1, r2)
-
+ = cgForeignCall fcall stg_args res_ty
+ -- Note [Foreign call results]
+
-- tagToEnum# is special: we need to pull the constructor
-- out of the table, and perform an appropriate return.