1 files changed, 455 insertions, 0 deletions
diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs
new file mode 100644
index 0000000000..dd7327b745
--- /dev/null
+++ b/compiler/codeGen/CgTailCall.lhs
@@ -0,0 +1,455 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgTailCall.lhs,v 1.43 2005/06/21 10:44:41 simonmar Exp $
+%
+%********************************************************
+%*							*
+\section[CgTailCall]{Tail calls: converting @StgApps@}
+%*							*
+%********************************************************
+
+\begin{code}
+module CgTailCall (
+	cgTailCall, performTailCall,
+	performReturn, performPrimReturn,
+	emitKnownConReturnCode, emitAlgReturnCode,
+	returnUnboxedTuple, ccallReturnUnboxedTuple,
+	pushUnboxedTuple,
+	tailCallPrimOp,
+
+	pushReturnAddress
+    ) where
+
+#include "HsVersions.h"
+
+import CgMonad
+import CgBindery	( getArgAmodes, getCgIdInfo, CgIdInfo, maybeLetNoEscape,
+			  idInfoToAmode, cgIdInfoId, cgIdInfoLF,
+			  cgIdInfoArgRep )
+import CgInfoTbls	( entryCode, emitDirectReturnInstr, dataConTagZ,
+			  emitVectoredReturnInstr, closureInfoPtr )
+import CgCallConv
+import CgStackery	( setRealSp, mkStkAmodes, adjustStackHW,
+			  getSpRelOffset )
+import CgHeapery	( setRealHp, getHpRelOffset )
+import CgUtils		( emitSimultaneously )
+import CgTicky
+import ClosureInfo
+import SMRep		( CgRep, isVoidArg, separateByPtrFollowness )
+import Cmm	
+import CmmUtils
+import CLabel		( CLabel, mkRtsPrimOpLabel, mkSeqInfoLabel )
+import Type		( isUnLiftedType )
+import Id		( Id, idName, idUnique, idType )
+import DataCon		( DataCon, dataConTyCon )
+import StgSyn		( StgArg )
+import TyCon            ( TyCon )
+import PrimOp		( PrimOp )
+import Outputable
+
+import Monad		( when )
+
+-----------------------------------------------------------------------------
+-- Tail Calls
+
+cgTailCall :: Id -> [StgArg] -> Code
+
+-- Here's the code we generate for a tail call.  (NB there may be no
+-- arguments, in which case this boils down to just entering a variable.)
+-- 
+--    *	Put args in the top locations of the stack.
+--    *	Adjust the stack ptr
+--    *	Make R1 point to the function closure if necessary.
+--    *	Perform the call.
+--
+-- Things to be careful about:
+--
+--    *	Don't overwrite stack locations before you have finished with
+-- 	them (remember you need the function and the as-yet-unmoved
+-- 	arguments).
+--    *	Preferably, generate no code to replace x by x on the stack (a
+-- 	common situation in tail-recursion).
+--    *	Adjust the stack high water mark appropriately.
+-- 
+-- Treat unboxed locals exactly like literals (above) except use the addr
+-- mode for the local instead of (CLit lit) in the assignment.
+
+cgTailCall fun args
+  = do	{ fun_info <- getCgIdInfo fun
+
+	; if isUnLiftedType (idType fun)
+	  then 	-- Primitive return
+		ASSERT( null args )
+	    do	{ fun_amode <- idInfoToAmode fun_info
+		; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode } 
+
+	  else -- Normal case, fun is boxed
+	    do  { arg_amodes <- getArgAmodes args
+		; performTailCall fun_info arg_amodes noStmts }
+	}
+		
+
+-- -----------------------------------------------------------------------------
+-- The guts of a tail-call
+
+performTailCall 
+	:: CgIdInfo		-- The function
+	-> [(CgRep,CmmExpr)]	-- Args
+	-> CmmStmts		-- Pending simultaneous assignments
+				--  *** GUARANTEED to contain only stack assignments.
+	-> Code
+
+performTailCall fun_info arg_amodes pending_assts
+  | Just join_sp <- maybeLetNoEscape fun_info
+  = 	   -- A let-no-escape is slightly different, because we
+	   -- arrange the stack arguments into pointers and non-pointers
+	   -- to make the heap check easier.  The tail-call sequence
+	   -- is very similar to returning an unboxed tuple, so we
+	   -- share some code.
+     do	{ (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
+	; emitSimultaneously (pending_assts `plusStmts` arg_assts)
+	; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
+	; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
+
+  | otherwise
+  = do 	{ fun_amode <- idInfoToAmode fun_info
+	; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
+	      opt_node_asst | nodeMustPointToIt lf_info = node_asst
+			    | otherwise		        = noStmts
+	; EndOfBlockInfo sp _ <- getEndOfBlockInfo
+	; hmods <- getHomeModules
+
+	; case (getCallMethod hmods fun_name lf_info (length arg_amodes)) of
+
+	    -- Node must always point to things we enter
+	    EnterIt -> do
+		{ emitSimultaneously (node_asst `plusStmts` pending_assts) 
+		; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
+		; doFinalJump sp False (stmtC (CmmJump target [])) }
+    
+	    -- A function, but we have zero arguments.  It is already in WHNF,
+	    -- so we can just return it.  
+	    -- As with any return, Node must point to it.
+	    ReturnIt -> do
+		{ emitSimultaneously (node_asst `plusStmts` pending_assts)
+		; doFinalJump sp False emitDirectReturnInstr }
+    
+	    -- A real constructor.  Don't bother entering it, 
+	    -- just do the right sort of return instead.
+	    -- As with any return, Node must point to it.
+	    ReturnCon con -> do
+		{ emitSimultaneously (node_asst `plusStmts` pending_assts)
+		; doFinalJump sp False (emitKnownConReturnCode con) }
+
+	    JumpToIt lbl -> do
+		{ emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
+		; doFinalJump sp False (jumpToLbl lbl) }
+    
+	    -- A slow function call via the RTS apply routines
+	    -- Node must definitely point to the thing
+	    SlowCall -> do 
+		{  when (not (null arg_amodes)) $ do
+		   { if (isKnownFun lf_info) 
+			then tickyKnownCallTooFewArgs
+			else tickyUnknownCall
+		   ; tickySlowCallPat (map fst arg_amodes) 
+		   }
+
+		; let (apply_lbl, args, extra_args) 
+			= constructSlowCall arg_amodes
+
+		; directCall sp apply_lbl args extra_args 
+			(node_asst `plusStmts` pending_assts)
+		}
+    
+	    -- A direct function call (possibly with some left-over arguments)
+	    DirectEntry lbl arity -> do
+		{ if arity == length arg_amodes
+			then tickyKnownCallExact
+			else do tickyKnownCallExtraArgs
+				tickySlowCallPat (map fst (drop arity arg_amodes))
+
+ 		; let
+		     -- The args beyond the arity go straight on the stack
+		     (arity_args, extra_args) = splitAt arity arg_amodes
+     
+		; directCall sp lbl arity_args extra_args
+			(opt_node_asst `plusStmts` pending_assts)
+	        }
+	}
+  where
+    fun_name  = idName (cgIdInfoId fun_info)
+    lf_info   = cgIdInfoLF fun_info
+
+
+
+directCall sp lbl args extra_args assts = do
+  let
+	-- First chunk of args go in registers
+	(reg_arg_amodes, stk_args) = assignCallRegs args
+     
+	-- Any "extra" arguments are placed in frames on the
+	-- stack after the other arguments.
+	slow_stk_args = slowArgs extra_args
+
+	reg_assts = assignToRegs reg_arg_amodes
+  --
+  (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
+
+  emitSimultaneously (reg_assts     `plusStmts`
+		      stk_assts     `plusStmts`
+		      assts)
+
+  doFinalJump final_sp False (jumpToLbl lbl)
+
+-- -----------------------------------------------------------------------------
+-- The final clean-up before we do a jump at the end of a basic block.
+-- This code is shared by tail-calls and returns.
+
+doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code 
+doFinalJump final_sp is_let_no_escape jump_code
+  = do	{ -- Adjust the high-water mark if necessary
+	  adjustStackHW final_sp
+
+	-- Push a return address if necessary (after the assignments
+	-- above, in case we clobber a live stack location)
+	--
+	-- DONT push the return address when we're about to jump to a
+	-- let-no-escape: the final tail call in the let-no-escape
+	-- will do this.
+	; eob <- getEndOfBlockInfo
+	; whenC (not is_let_no_escape) (pushReturnAddress eob)
+
+	    -- Final adjustment of Sp/Hp
+	; adjustSpAndHp final_sp
+
+	    -- and do the jump
+	; jump_code }
+
+-- -----------------------------------------------------------------------------
+-- A general return (just a special case of doFinalJump, above)
+
+performReturn :: Code		-- The code to execute to actually do the return
+	      -> Code
+
+performReturn finish_code
+  = do  { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
+	; doFinalJump args_sp False{-not a LNE-} finish_code }
+
+-- -----------------------------------------------------------------------------
+-- Primitive Returns
+-- Just load the return value into the right register, and return.
+
+performPrimReturn :: CgRep -> CmmExpr	-- The thing to return
+		  -> Code
+performPrimReturn rep amode
+  =  do { whenC (not (isVoidArg rep))
+		(stmtC (CmmAssign ret_reg amode))
+	; performReturn emitDirectReturnInstr }
+  where
+    ret_reg = dataReturnConvPrim rep
+
+-- -----------------------------------------------------------------------------
+-- Algebraic constructor returns
+
+-- Constructor is built on the heap; Node is set.
+-- All that remains is to do the right sort of jump.
+
+emitKnownConReturnCode :: DataCon -> Code
+emitKnownConReturnCode con
+  = emitAlgReturnCode (dataConTyCon con)
+		      (CmmLit (mkIntCLit (dataConTagZ con)))
+			-- emitAlgReturnCode requires zero-indexed tag
+
+emitAlgReturnCode :: TyCon -> CmmExpr -> Code
+-- emitAlgReturnCode is used both by emitKnownConReturnCode,
+-- and by by PrimOps that return enumerated types (i.e.
+-- all the comparison operators).
+emitAlgReturnCode tycon tag
+ =  do	{ case ctrlReturnConvAlg tycon of
+	    VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz
+	 			     	; emitVectoredReturnInstr tag }
+	    UnvectoredReturn _    -> emitDirectReturnInstr 
+	}
+
+
+-- ---------------------------------------------------------------------------
+-- Unboxed tuple returns
+
+-- These are a bit like a normal tail call, except that:
+--
+--   - The tail-call target is an info table on the stack
+--
+--   - We separate stack arguments into pointers and non-pointers,
+--     to make it easier to leave things in a sane state for a heap check.
+--     This is OK because we can never partially-apply an unboxed tuple,
+--     unlike a function.  The same technique is used when calling
+--     let-no-escape functions, because they also can't be partially
+--     applied.
+
+returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
+returnUnboxedTuple amodes
+  = do 	{ eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
+	; tickyUnboxedTupleReturn (length amodes)
+	; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
+	; emitSimultaneously assts
+	; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr }
+
+pushUnboxedTuple :: VirtualSpOffset		-- Sp at which to start pushing
+		 -> [(CgRep, CmmExpr)]		-- amodes of the components
+		 -> FCode (VirtualSpOffset,	-- final Sp
+			   CmmStmts)		-- assignments (regs+stack)
+
+pushUnboxedTuple sp [] 
+  = return (sp, noStmts)
+pushUnboxedTuple sp amodes
+  = do	{ let	(reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
+	
+		-- separate the rest of the args into pointers and non-pointers
+		(ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
+		reg_arg_assts = assignToRegs reg_arg_amodes
+		
+	    -- push ptrs, then nonptrs, on the stack
+	; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
+	; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
+
+	; returnFC (final_sp,
+	  	    reg_arg_assts `plusStmts` 
+		    ptr_assts `plusStmts` nptr_assts) }
+    
+		  
+-- -----------------------------------------------------------------------------
+-- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
+-- we want to do things in a slightly different order to normal:
+-- 
+-- 		- push return address
+-- 		- adjust stack pointer
+-- 		- r = call(args...)
+-- 		- assign regs for unboxed tuple (usually just R1 = r)
+-- 		- return to continuation
+-- 
+-- The return address (i.e. stack frame) must be on the stack before
+-- doing the call in case the call ends up in the garbage collector.
+-- 
+-- Sadly, the information about the continuation is lost after we push it
+-- (in order to avoid pushing it again), so we end up doing a needless
+-- indirect jump (ToDo).
+
+ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
+ccallReturnUnboxedTuple amodes before_jump
+  = do 	{ eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
+
+	-- Push a return address if necessary
+	; pushReturnAddress eob
+	; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
+	    (do	{ adjustSpAndHp args_sp
+		; before_jump
+  		; returnUnboxedTuple amodes })
+    }
+
+-- -----------------------------------------------------------------------------
+-- Calling an out-of-line primop
+
+tailCallPrimOp :: PrimOp -> [StgArg] -> Code
+tailCallPrimOp op args
+ = do	{	-- We're going to perform a normal-looking tail call, 
+		-- except that *all* the arguments will be in registers.
+		-- Hence the ASSERT( null leftovers )
+	  arg_amodes <- getArgAmodes args
+	; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
+	      jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
+
+	; ASSERT(null leftovers) -- no stack-resident args
+ 	  emitSimultaneously (assignToRegs arg_regs)
+
+	; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
+	; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
+
+-- -----------------------------------------------------------------------------
+-- Return Addresses
+
+-- We always push the return address just before performing a tail call
+-- or return.  The reason we leave it until then is because the stack
+-- slot that the return address is to go into might contain something
+-- useful.
+-- 
+-- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
+-- case expression and the return address is still to be pushed.
+-- 
+-- There are cases where it doesn't look necessary to push the return
+-- address: for example, just before doing a return to a known
+-- continuation.  However, the continuation will expect to find the
+-- return address on the stack in case it needs to do a heap check.
+
+pushReturnAddress :: EndOfBlockInfo -> Code
+
+pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False))
+  = do	{ sp_rel <- getSpRelOffset args_sp
+	; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
+
+-- For a polymorphic case, we have two return addresses to push: the case
+-- return, and stg_seq_frame_info which turns a possible vectored return
+-- into a direct one.
+pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True))
+  = do	{ sp_rel <- getSpRelOffset (args_sp-1)
+	; stmtC (CmmStore sp_rel (mkLblExpr lbl))
+	; sp_rel <- getSpRelOffset args_sp
+	; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) }
+
+pushReturnAddress _ = nopC
+
+-- -----------------------------------------------------------------------------
+-- Misc.
+
+jumpToLbl :: CLabel -> Code
+-- Passes no argument to the destination procedure
+jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
+
+assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
+assignToRegs reg_args 
+  = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
+	    | (expr, reg_id) <- reg_args ] 
+\end{code}
+
+
+%************************************************************************
+%*									*
+\subsection[CgStackery-adjust]{Adjusting the stack pointers}
+%*									*
+%************************************************************************
+
+This function adjusts the stack and heap pointers just before a tail
+call or return.  The stack pointer is adjusted to its final position
+(i.e. to point to the last argument for a tail call, or the activation
+record for a return).  The heap pointer may be moved backwards, in
+cases where we overallocated at the beginning of the basic block (see
+CgCase.lhs for discussion).
+
+These functions {\em do not} deal with high-water-mark adjustment.
+That's done by functions which allocate stack space.
+
+\begin{code}
+adjustSpAndHp :: VirtualSpOffset 	-- New offset for Arg stack ptr
+	      -> Code
+adjustSpAndHp newRealSp 
+  = do	{ -- Adjust stack, if necessary.
+	  -- NB: the conditional on the monad-carried realSp
+	  --     is out of line (via codeOnly), to avoid a black hole
+	; new_sp <- getSpRelOffset newRealSp
+	; checkedAbsC (CmmAssign spReg new_sp)	-- Will generate no code in the case
+	; setRealSp newRealSp			-- where realSp==newRealSp
+
+	  -- Adjust heap.  The virtual heap pointer may be less than the real Hp
+	  -- because the latter was advanced to deal with the worst-case branch
+	  -- of the code, and we may be in a better-case branch.  In that case,
+ 	  -- move the real Hp *back* and retract some ticky allocation count.
+	; hp_usg <- getHpUsage
+	; let rHp = realHp hp_usg
+	      vHp = virtHp hp_usg
+	; new_hp <- getHpRelOffset vHp
+	; checkedAbsC (CmmAssign hpReg new_hp)	-- Generates nothing when vHp==rHp
+	; tickyAllocHeap (vHp - rHp)		-- ...ditto
+	; setRealHp vHp
+	}
+\end{code}