summaryrefslogtreecommitdiff
path: root/compiler/codeGen/CgTailCall.lhs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/codeGen/CgTailCall.lhs')
-rw-r--r--compiler/codeGen/CgTailCall.lhs455
1 files changed, 455 insertions, 0 deletions
diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs
new file mode 100644
index 0000000000..dd7327b745
--- /dev/null
+++ b/compiler/codeGen/CgTailCall.lhs
@@ -0,0 +1,455 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgTailCall.lhs,v 1.43 2005/06/21 10:44:41 simonmar Exp $
+%
+%********************************************************
+%* *
+\section[CgTailCall]{Tail calls: converting @StgApps@}
+%* *
+%********************************************************
+
+\begin{code}
+module CgTailCall (
+ cgTailCall, performTailCall,
+ performReturn, performPrimReturn,
+ emitKnownConReturnCode, emitAlgReturnCode,
+ returnUnboxedTuple, ccallReturnUnboxedTuple,
+ pushUnboxedTuple,
+ tailCallPrimOp,
+
+ pushReturnAddress
+ ) where
+
+#include "HsVersions.h"
+
+import CgMonad
+import CgBindery ( getArgAmodes, getCgIdInfo, CgIdInfo, maybeLetNoEscape,
+ idInfoToAmode, cgIdInfoId, cgIdInfoLF,
+ cgIdInfoArgRep )
+import CgInfoTbls ( entryCode, emitDirectReturnInstr, dataConTagZ,
+ emitVectoredReturnInstr, closureInfoPtr )
+import CgCallConv
+import CgStackery ( setRealSp, mkStkAmodes, adjustStackHW,
+ getSpRelOffset )
+import CgHeapery ( setRealHp, getHpRelOffset )
+import CgUtils ( emitSimultaneously )
+import CgTicky
+import ClosureInfo
+import SMRep ( CgRep, isVoidArg, separateByPtrFollowness )
+import Cmm
+import CmmUtils
+import CLabel ( CLabel, mkRtsPrimOpLabel, mkSeqInfoLabel )
+import Type ( isUnLiftedType )
+import Id ( Id, idName, idUnique, idType )
+import DataCon ( DataCon, dataConTyCon )
+import StgSyn ( StgArg )
+import TyCon ( TyCon )
+import PrimOp ( PrimOp )
+import Outputable
+
+import Monad ( when )
+
+-----------------------------------------------------------------------------
+-- Tail Calls
+
+cgTailCall :: Id -> [StgArg] -> Code
+
+-- Here's the code we generate for a tail call. (NB there may be no
+-- arguments, in which case this boils down to just entering a variable.)
+--
+-- * Put args in the top locations of the stack.
+-- * Adjust the stack ptr
+-- * Make R1 point to the function closure if necessary.
+-- * Perform the call.
+--
+-- Things to be careful about:
+--
+-- * Don't overwrite stack locations before you have finished with
+-- them (remember you need the function and the as-yet-unmoved
+-- arguments).
+-- * Preferably, generate no code to replace x by x on the stack (a
+-- common situation in tail-recursion).
+-- * Adjust the stack high water mark appropriately.
+--
+-- Treat unboxed locals exactly like literals (above) except use the addr
+-- mode for the local instead of (CLit lit) in the assignment.
+
+cgTailCall fun args
+ = do { fun_info <- getCgIdInfo fun
+
+ ; if isUnLiftedType (idType fun)
+ then -- Primitive return
+ ASSERT( null args )
+ do { fun_amode <- idInfoToAmode fun_info
+ ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
+
+ else -- Normal case, fun is boxed
+ do { arg_amodes <- getArgAmodes args
+ ; performTailCall fun_info arg_amodes noStmts }
+ }
+
+
+-- -----------------------------------------------------------------------------
+-- The guts of a tail-call
+
+performTailCall
+ :: CgIdInfo -- The function
+ -> [(CgRep,CmmExpr)] -- Args
+ -> CmmStmts -- Pending simultaneous assignments
+ -- *** GUARANTEED to contain only stack assignments.
+ -> Code
+
+performTailCall fun_info arg_amodes pending_assts
+ | Just join_sp <- maybeLetNoEscape fun_info
+ = -- A let-no-escape is slightly different, because we
+ -- arrange the stack arguments into pointers and non-pointers
+ -- to make the heap check easier. The tail-call sequence
+ -- is very similar to returning an unboxed tuple, so we
+ -- share some code.
+ do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
+ ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
+ ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
+ ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
+
+ | otherwise
+ = do { fun_amode <- idInfoToAmode fun_info
+ ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
+ opt_node_asst | nodeMustPointToIt lf_info = node_asst
+ | otherwise = noStmts
+ ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
+ ; hmods <- getHomeModules
+
+ ; case (getCallMethod hmods fun_name lf_info (length arg_amodes)) of
+
+ -- Node must always point to things we enter
+ EnterIt -> do
+ { emitSimultaneously (node_asst `plusStmts` pending_assts)
+ ; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
+ ; doFinalJump sp False (stmtC (CmmJump target [])) }
+
+ -- A function, but we have zero arguments. It is already in WHNF,
+ -- so we can just return it.
+ -- As with any return, Node must point to it.
+ ReturnIt -> do
+ { emitSimultaneously (node_asst `plusStmts` pending_assts)
+ ; doFinalJump sp False emitDirectReturnInstr }
+
+ -- A real constructor. Don't bother entering it,
+ -- just do the right sort of return instead.
+ -- As with any return, Node must point to it.
+ ReturnCon con -> do
+ { emitSimultaneously (node_asst `plusStmts` pending_assts)
+ ; doFinalJump sp False (emitKnownConReturnCode con) }
+
+ JumpToIt lbl -> do
+ { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
+ ; doFinalJump sp False (jumpToLbl lbl) }
+
+ -- A slow function call via the RTS apply routines
+ -- Node must definitely point to the thing
+ SlowCall -> do
+ { when (not (null arg_amodes)) $ do
+ { if (isKnownFun lf_info)
+ then tickyKnownCallTooFewArgs
+ else tickyUnknownCall
+ ; tickySlowCallPat (map fst arg_amodes)
+ }
+
+ ; let (apply_lbl, args, extra_args)
+ = constructSlowCall arg_amodes
+
+ ; directCall sp apply_lbl args extra_args
+ (node_asst `plusStmts` pending_assts)
+ }
+
+ -- A direct function call (possibly with some left-over arguments)
+ DirectEntry lbl arity -> do
+ { if arity == length arg_amodes
+ then tickyKnownCallExact
+ else do tickyKnownCallExtraArgs
+ tickySlowCallPat (map fst (drop arity arg_amodes))
+
+ ; let
+ -- The args beyond the arity go straight on the stack
+ (arity_args, extra_args) = splitAt arity arg_amodes
+
+ ; directCall sp lbl arity_args extra_args
+ (opt_node_asst `plusStmts` pending_assts)
+ }
+ }
+ where
+ fun_name = idName (cgIdInfoId fun_info)
+ lf_info = cgIdInfoLF fun_info
+
+
+
+directCall sp lbl args extra_args assts = do
+ let
+ -- First chunk of args go in registers
+ (reg_arg_amodes, stk_args) = assignCallRegs args
+
+ -- Any "extra" arguments are placed in frames on the
+ -- stack after the other arguments.
+ slow_stk_args = slowArgs extra_args
+
+ reg_assts = assignToRegs reg_arg_amodes
+ --
+ (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
+
+ emitSimultaneously (reg_assts `plusStmts`
+ stk_assts `plusStmts`
+ assts)
+
+ doFinalJump final_sp False (jumpToLbl lbl)
+
+-- -----------------------------------------------------------------------------
+-- The final clean-up before we do a jump at the end of a basic block.
+-- This code is shared by tail-calls and returns.
+
+doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
+doFinalJump final_sp is_let_no_escape jump_code
+ = do { -- Adjust the high-water mark if necessary
+ adjustStackHW final_sp
+
+ -- Push a return address if necessary (after the assignments
+ -- above, in case we clobber a live stack location)
+ --
+ -- DONT push the return address when we're about to jump to a
+ -- let-no-escape: the final tail call in the let-no-escape
+ -- will do this.
+ ; eob <- getEndOfBlockInfo
+ ; whenC (not is_let_no_escape) (pushReturnAddress eob)
+
+ -- Final adjustment of Sp/Hp
+ ; adjustSpAndHp final_sp
+
+ -- and do the jump
+ ; jump_code }
+
+-- -----------------------------------------------------------------------------
+-- A general return (just a special case of doFinalJump, above)
+
+performReturn :: Code -- The code to execute to actually do the return
+ -> Code
+
+performReturn finish_code
+ = do { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
+ ; doFinalJump args_sp False{-not a LNE-} finish_code }
+
+-- -----------------------------------------------------------------------------
+-- Primitive Returns
+-- Just load the return value into the right register, and return.
+
+performPrimReturn :: CgRep -> CmmExpr -- The thing to return
+ -> Code
+performPrimReturn rep amode
+ = do { whenC (not (isVoidArg rep))
+ (stmtC (CmmAssign ret_reg amode))
+ ; performReturn emitDirectReturnInstr }
+ where
+ ret_reg = dataReturnConvPrim rep
+
+-- -----------------------------------------------------------------------------
+-- Algebraic constructor returns
+
+-- Constructor is built on the heap; Node is set.
+-- All that remains is to do the right sort of jump.
+
+emitKnownConReturnCode :: DataCon -> Code
+emitKnownConReturnCode con
+ = emitAlgReturnCode (dataConTyCon con)
+ (CmmLit (mkIntCLit (dataConTagZ con)))
+ -- emitAlgReturnCode requires zero-indexed tag
+
+emitAlgReturnCode :: TyCon -> CmmExpr -> Code
+-- emitAlgReturnCode is used both by emitKnownConReturnCode,
+-- and by by PrimOps that return enumerated types (i.e.
+-- all the comparison operators).
+emitAlgReturnCode tycon tag
+ = do { case ctrlReturnConvAlg tycon of
+ VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz
+ ; emitVectoredReturnInstr tag }
+ UnvectoredReturn _ -> emitDirectReturnInstr
+ }
+
+
+-- ---------------------------------------------------------------------------
+-- Unboxed tuple returns
+
+-- These are a bit like a normal tail call, except that:
+--
+-- - The tail-call target is an info table on the stack
+--
+-- - We separate stack arguments into pointers and non-pointers,
+-- to make it easier to leave things in a sane state for a heap check.
+-- This is OK because we can never partially-apply an unboxed tuple,
+-- unlike a function. The same technique is used when calling
+-- let-no-escape functions, because they also can't be partially
+-- applied.
+
+returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
+returnUnboxedTuple amodes
+ = do { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
+ ; tickyUnboxedTupleReturn (length amodes)
+ ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
+ ; emitSimultaneously assts
+ ; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr }
+
+pushUnboxedTuple :: VirtualSpOffset -- Sp at which to start pushing
+ -> [(CgRep, CmmExpr)] -- amodes of the components
+ -> FCode (VirtualSpOffset, -- final Sp
+ CmmStmts) -- assignments (regs+stack)
+
+pushUnboxedTuple sp []
+ = return (sp, noStmts)
+pushUnboxedTuple sp amodes
+ = do { let (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
+
+ -- separate the rest of the args into pointers and non-pointers
+ (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
+ reg_arg_assts = assignToRegs reg_arg_amodes
+
+ -- push ptrs, then nonptrs, on the stack
+ ; (ptr_sp, ptr_assts) <- mkStkAmodes sp ptr_args
+ ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
+
+ ; returnFC (final_sp,
+ reg_arg_assts `plusStmts`
+ ptr_assts `plusStmts` nptr_assts) }
+
+
+-- -----------------------------------------------------------------------------
+-- Returning unboxed tuples. This is mainly to support _ccall_GC_, where
+-- we want to do things in a slightly different order to normal:
+--
+-- - push return address
+-- - adjust stack pointer
+-- - r = call(args...)
+-- - assign regs for unboxed tuple (usually just R1 = r)
+-- - return to continuation
+--
+-- The return address (i.e. stack frame) must be on the stack before
+-- doing the call in case the call ends up in the garbage collector.
+--
+-- Sadly, the information about the continuation is lost after we push it
+-- (in order to avoid pushing it again), so we end up doing a needless
+-- indirect jump (ToDo).
+
+ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
+ccallReturnUnboxedTuple amodes before_jump
+ = do { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
+
+ -- Push a return address if necessary
+ ; pushReturnAddress eob
+ ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
+ (do { adjustSpAndHp args_sp
+ ; before_jump
+ ; returnUnboxedTuple amodes })
+ }
+
+-- -----------------------------------------------------------------------------
+-- Calling an out-of-line primop
+
+tailCallPrimOp :: PrimOp -> [StgArg] -> Code
+tailCallPrimOp op args
+ = do { -- We're going to perform a normal-looking tail call,
+ -- except that *all* the arguments will be in registers.
+ -- Hence the ASSERT( null leftovers )
+ arg_amodes <- getArgAmodes args
+ ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
+ jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
+
+ ; ASSERT(null leftovers) -- no stack-resident args
+ emitSimultaneously (assignToRegs arg_regs)
+
+ ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
+ ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
+
+-- -----------------------------------------------------------------------------
+-- Return Addresses
+
+-- We always push the return address just before performing a tail call
+-- or return. The reason we leave it until then is because the stack
+-- slot that the return address is to go into might contain something
+-- useful.
+--
+-- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
+-- case expression and the return address is still to be pushed.
+--
+-- There are cases where it doesn't look necessary to push the return
+-- address: for example, just before doing a return to a known
+-- continuation. However, the continuation will expect to find the
+-- return address on the stack in case it needs to do a heap check.
+
+pushReturnAddress :: EndOfBlockInfo -> Code
+
+pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False))
+ = do { sp_rel <- getSpRelOffset args_sp
+ ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
+
+-- For a polymorphic case, we have two return addresses to push: the case
+-- return, and stg_seq_frame_info which turns a possible vectored return
+-- into a direct one.
+pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True))
+ = do { sp_rel <- getSpRelOffset (args_sp-1)
+ ; stmtC (CmmStore sp_rel (mkLblExpr lbl))
+ ; sp_rel <- getSpRelOffset args_sp
+ ; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) }
+
+pushReturnAddress _ = nopC
+
+-- -----------------------------------------------------------------------------
+-- Misc.
+
+jumpToLbl :: CLabel -> Code
+-- Passes no argument to the destination procedure
+jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
+
+assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
+assignToRegs reg_args
+ = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
+ | (expr, reg_id) <- reg_args ]
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[CgStackery-adjust]{Adjusting the stack pointers}
+%* *
+%************************************************************************
+
+This function adjusts the stack and heap pointers just before a tail
+call or return. The stack pointer is adjusted to its final position
+(i.e. to point to the last argument for a tail call, or the activation
+record for a return). The heap pointer may be moved backwards, in
+cases where we overallocated at the beginning of the basic block (see
+CgCase.lhs for discussion).
+
+These functions {\em do not} deal with high-water-mark adjustment.
+That's done by functions which allocate stack space.
+
+\begin{code}
+adjustSpAndHp :: VirtualSpOffset -- New offset for Arg stack ptr
+ -> Code
+adjustSpAndHp newRealSp
+ = do { -- Adjust stack, if necessary.
+ -- NB: the conditional on the monad-carried realSp
+ -- is out of line (via codeOnly), to avoid a black hole
+ ; new_sp <- getSpRelOffset newRealSp
+ ; checkedAbsC (CmmAssign spReg new_sp) -- Will generate no code in the case
+ ; setRealSp newRealSp -- where realSp==newRealSp
+
+ -- Adjust heap. The virtual heap pointer may be less than the real Hp
+ -- because the latter was advanced to deal with the worst-case branch
+ -- of the code, and we may be in a better-case branch. In that case,
+ -- move the real Hp *back* and retract some ticky allocation count.
+ ; hp_usg <- getHpUsage
+ ; let rHp = realHp hp_usg
+ vHp = virtHp hp_usg
+ ; new_hp <- getHpRelOffset vHp
+ ; checkedAbsC (CmmAssign hpReg new_hp) -- Generates nothing when vHp==rHp
+ ; tickyAllocHeap (vHp - rHp) -- ...ditto
+ ; setRealHp vHp
+ }
+\end{code}