/* -----------------------------------------------------------------------------
 *
 * (c) The GHC Team, 1998-2004
 *
 * Canned Heap-Check and Stack-Check sequences.
 *
 * This file is written in a subset of C--, extended with various
 * features specific to GHC.  It is compiled by GHC directly.  For the
 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
 *
 * ---------------------------------------------------------------------------*/

#include "Cmm.h"

#ifdef __PIC__
import pthread_mutex_unlock;
#endif
import EnterCriticalSection;
import LeaveCriticalSection;

/* Stack/Heap Check Failure
 * ------------------------
 *
 * On discovering that a stack or heap check has failed, we do the following:
 *
 *    - If HpLim==0, indicating that we should context-switch, we yield
 *      to the scheduler (return ThreadYielding).
 *
 * Note that we must leave no slop in the heap (this is a requirement
 * for LDV profiling, at least), so if we just had a heap-check
 * failure, then we must retract Hp by HpAlloc.  How do we know
 * whether there was a heap-check failure?  HpLim might be zero, and
 * yet we got here as a result of a stack-check failure.  Hence, we
 * require that HpAlloc is only non-zero if there was a heap-check
 * failure, otherwise it is zero, so we can always safely subtract
 * HpAlloc from Hp.
 *
 * Hence, HpAlloc is zeroed in LOAD_THREAD_STATE().
 *
 *    - If the context_switch flag is set (the backup plan if setting HpLim
 *      to 0 didn't trigger a context switch), we yield to the scheduler
 *	(return ThreadYielding).
 *
 *    - If Hp > HpLim, we've had a heap check failure.  This means we've
 *	come to the end of the current heap block, so we try to chain
 *	another block on with ExtendNursery().  
 *
 *	     - If this succeeds, we carry on without returning to the 
 *	       scheduler.  
 *
 *	     - If it fails, we return to the scheduler claiming HeapOverflow
 *	       so that a garbage collection can be performed.
 *
 *    - If Hp <= HpLim, it must have been a stack check that failed.  In
 *	which case, we return to the scheduler claiming StackOverflow, the
 *	scheduler will either increase the size of our stack, or raise
 *	an exception if the stack is already too big.
 *
 * The effect of checking for context switch only in the heap/stack check
 * failure code is that we'll switch threads after the current thread has
 * reached the end of its heap block.  If a thread isn't allocating
 * at all, it won't yield.  Hopefully this won't be a problem in practice.
 */
 
#define PRE_RETURN(why,what_next)			\
  StgTSO_what_next(CurrentTSO) = what_next::I16;	\
  StgRegTable_rRet(BaseReg) = why;           	        \
  R1 = BaseReg;

/* Remember that the return address is *removed* when returning to a
 * ThreadRunGHC thread.
 */

#define GC_GENERIC						\
    DEBUG_ONLY(foreign "C" heapCheckFail());			\
    if (Hp > HpLim) {						\
        Hp = Hp - HpAlloc/*in bytes*/;				\
        if (HpLim == 0) { \
                R1 = ThreadYielding;				\
                goto sched;					\
        }						\
        if (HpAlloc <= BLOCK_SIZE				\
            && bdescr_link(CurrentNursery) != NULL) {		\
            HpAlloc = 0;                                        \
            CLOSE_NURSERY();					\
            CurrentNursery = bdescr_link(CurrentNursery);	\
            OPEN_NURSERY();					\
            if (Capability_context_switch(MyCapability()) != 0 :: CInt) { \
                R1 = ThreadYielding;				\
                goto sched;					\
            } else {						\
                jump %ENTRY_CODE(Sp(0));			\
            }							\
	} else {						\
            R1 = HeapOverflow;					\
            goto sched;						\
        }							\
    } else {							\
        R1 = StackOverflow;					\
    }								\
  sched:							\
    PRE_RETURN(R1,ThreadRunGHC);				\
    jump stg_returnToSched;

#define HP_GENERIC				\
   PRE_RETURN(HeapOverflow, ThreadRunGHC)	\
  jump stg_returnToSched;

#define BLOCK_GENERIC				\
   PRE_RETURN(ThreadBlocked,  ThreadRunGHC)	\
  jump stg_returnToSched;

#define YIELD_GENERIC				\
  PRE_RETURN(ThreadYielding, ThreadRunGHC)	\
  jump stg_returnToSched;

#define BLOCK_BUT_FIRST(c)			\
  PRE_RETURN(ThreadBlocked, ThreadRunGHC)	\
  R2 = c;					\
  jump stg_returnToSchedButFirst;

#define YIELD_TO_INTERPRETER			\
  PRE_RETURN(ThreadYielding, ThreadInterpret)	\
  jump stg_returnToSchedNotPaused;

/* -----------------------------------------------------------------------------
   Heap checks in thunks/functions.

   In these cases, node always points to the function closure.  This gives
   us an easy way to return to the function: just leave R1 on the top of
   the stack, and have the scheduler enter it to return.

   There are canned sequences for 'n' pointer values in registers.
   -------------------------------------------------------------------------- */

INFO_TABLE_RET( stg_enter, RET_SMALL, P_ unused)
{
    R1 = Sp(1);
    Sp_adj(2);
    ENTER();
}

__stg_gc_enter_1
{
    Sp_adj(-2);
    Sp(1) = R1;
    Sp(0) = stg_enter_info;
    GC_GENERIC
}

#if defined(GRAN)
/*
  ToDo: merge the block and yield macros, calling something like BLOCK(N)
        at the end;
*/

/* 
   Should we actually ever do a yield in such a case?? -- HWL
*/
gran_yield_0
{
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadYielding;
    jump StgReturn;
}

gran_yield_1
{
    Sp_adj(-1);
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadYielding;
    jump StgReturn;
}

/*- 2 Regs--------------------------------------------------------------------*/

gran_yield_2
{
    Sp_adj(-2);
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadYielding;
    jump StgReturn;
}

/*- 3 Regs -------------------------------------------------------------------*/

gran_yield_3
{
    Sp_adj(-3);
    Sp(2) = R3;
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadYielding;
    jump StgReturn;
}

/*- 4 Regs -------------------------------------------------------------------*/

gran_yield_4
{
    Sp_adj(-4);
    Sp(3) = R4;
    Sp(2) = R3;
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadYielding;
    jump StgReturn;
}

/*- 5 Regs -------------------------------------------------------------------*/

gran_yield_5
{
    Sp_adj(-5);
    Sp(4) = R5;
    Sp(3) = R4;
    Sp(2) = R3;
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadYielding;
    jump StgReturn;
}

/*- 6 Regs -------------------------------------------------------------------*/

gran_yield_6
{
    Sp_adj(-6);
    Sp(5) = R6;
    Sp(4) = R5;
    Sp(3) = R4;
    Sp(2) = R3;
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadYielding;
    jump StgReturn;
}

/*- 7 Regs -------------------------------------------------------------------*/

gran_yield_7
{
    Sp_adj(-7);
    Sp(6) = R7;
    Sp(5) = R6;
    Sp(4) = R5;
    Sp(3) = R4;
    Sp(2) = R3;
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadYielding;
    jump StgReturn;
}

/*- 8 Regs -------------------------------------------------------------------*/

gran_yield_8
{
    Sp_adj(-8);
    Sp(7) = R8;
    Sp(6) = R7;
    Sp(5) = R6;
    Sp(4) = R5;
    Sp(3) = R4;
    Sp(2) = R3;
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadYielding;
    jump StgReturn;
}

// the same routines but with a block rather than a yield

gran_block_1
{
    Sp_adj(-1);
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadBlocked;
    jump StgReturn;
}

/*- 2 Regs--------------------------------------------------------------------*/

gran_block_2
{
    Sp_adj(-2);
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadBlocked;
    jump StgReturn;
}

/*- 3 Regs -------------------------------------------------------------------*/

gran_block_3
{
    Sp_adj(-3);
    Sp(2) = R3;
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadBlocked;
    jump StgReturn;
}

/*- 4 Regs -------------------------------------------------------------------*/

gran_block_4
{
    Sp_adj(-4);
    Sp(3) = R4;
    Sp(2) = R3;
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadBlocked;
    jump StgReturn;
}

/*- 5 Regs -------------------------------------------------------------------*/

gran_block_5
{
    Sp_adj(-5);
    Sp(4) = R5;
    Sp(3) = R4;
    Sp(2) = R3;
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadBlocked;
    jump StgReturn;
}

/*- 6 Regs -------------------------------------------------------------------*/

gran_block_6
{
    Sp_adj(-6);
    Sp(5) = R6;
    Sp(4) = R5;
    Sp(3) = R4;
    Sp(2) = R3;
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadBlocked;
    jump StgReturn;
}

/*- 7 Regs -------------------------------------------------------------------*/

gran_block_7
{
    Sp_adj(-7);
    Sp(6) = R7;
    Sp(5) = R6;
    Sp(4) = R5;
    Sp(3) = R4;
    Sp(2) = R3;
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadBlocked;
    jump StgReturn;
}

/*- 8 Regs -------------------------------------------------------------------*/

gran_block_8
{
    Sp_adj(-8);
    Sp(7) = R8;
    Sp(6) = R7;
    Sp(5) = R6;
    Sp(4) = R5;
    Sp(3) = R4;
    Sp(2) = R3;
    Sp(1) = R2;
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadBlocked;
    jump StgReturn;
}

#endif

#if 0 && defined(PAR)

/*
  Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
  saving of the thread state from the actual jump via an StgReturn.
  We need this separation because we call RTS routines in blocking entry codes
  before jumping back into the RTS (see parallel/FetchMe.hc).
*/

par_block_1_no_jump
{
    Sp_adj(-1);
    Sp(0) = R1;
    SAVE_THREAD_STATE();					
}

par_jump
{
    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
    R1 = ThreadBlocked;
    jump StgReturn;
}

#endif

/* -----------------------------------------------------------------------------
   Heap checks in Primitive case alternatives

   A primitive case alternative is entered with a value either in 
   R1, FloatReg1 or D1 depending on the return convention.  All the
   cases are covered below.
   -------------------------------------------------------------------------- */

/*-- No Registers live ------------------------------------------------------ */

stg_gc_noregs
{
    GC_GENERIC
}

/*-- void return ------------------------------------------------------------ */

INFO_TABLE_RET( stg_gc_void, RET_SMALL)
{
    Sp_adj(1);
    jump %ENTRY_CODE(Sp(0));
}

/*-- R1 is boxed/unpointed -------------------------------------------------- */

INFO_TABLE_RET( stg_gc_unpt_r1, RET_SMALL, P_ unused)
{
    R1 = Sp(1);
    Sp_adj(2);
    jump %ENTRY_CODE(Sp(0));
}

stg_gc_unpt_r1
{
    Sp_adj(-2);
    Sp(1) = R1;
    Sp(0) = stg_gc_unpt_r1_info;
    GC_GENERIC
}

/*-- R1 is unboxed -------------------------------------------------- */

/* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
INFO_TABLE_RET(	stg_gc_unbx_r1, RET_SMALL, W_ unused )
{
    R1 = Sp(1);
    Sp_adj(2);
    jump %ENTRY_CODE(Sp(0));
}

stg_gc_unbx_r1
{
    Sp_adj(-2);
    Sp(1) = R1;
    Sp(0) = stg_gc_unbx_r1_info;
    GC_GENERIC
}

/*-- F1 contains a float ------------------------------------------------- */

INFO_TABLE_RET(	stg_gc_f1, RET_SMALL, F_ unused )
{
    F1 = F_[Sp+WDS(1)];
    Sp_adj(2);
    jump %ENTRY_CODE(Sp(0));
}

stg_gc_f1
{
    Sp_adj(-2);
    F_[Sp + WDS(1)] = F1;
    Sp(0) = stg_gc_f1_info;
    GC_GENERIC
}

/*-- D1 contains a double ------------------------------------------------- */

INFO_TABLE_RET(	stg_gc_d1, RET_SMALL, D_ unused )
{
    D1 = D_[Sp + WDS(1)];
    Sp = Sp + WDS(1) + SIZEOF_StgDouble;
    jump %ENTRY_CODE(Sp(0));
}

stg_gc_d1
{
    Sp = Sp - WDS(1) - SIZEOF_StgDouble;
    D_[Sp + WDS(1)] = D1;
    Sp(0) = stg_gc_d1_info;
    GC_GENERIC
}


/*-- L1 contains an int64 ------------------------------------------------- */

INFO_TABLE_RET( stg_gc_l1, RET_SMALL, L_ unused )
{
    L1 = L_[Sp + WDS(1)];
    Sp_adj(1) + SIZEOF_StgWord64;
    jump %ENTRY_CODE(Sp(0));
}

stg_gc_l1
{
    Sp_adj(-1) - SIZEOF_StgWord64;
    L_[Sp + WDS(1)] = L1;
    Sp(0) = stg_gc_l1_info;
    GC_GENERIC
}

/*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */

INFO_TABLE_RET( stg_ut_1_0_unreg, RET_SMALL, P_ unused )
{
    Sp_adj(1);
    // one ptr is on the stack (Sp(0))
    jump %ENTRY_CODE(Sp(1));
}

/* -----------------------------------------------------------------------------
   Generic function entry heap check code.

   At a function entry point, the arguments are as per the calling convention,
   i.e. some in regs and some on the stack.  There may or may not be 
   a pointer to the function closure in R1 - if there isn't, then the heap
   check failure code in the function will arrange to load it.

   The function's argument types are described in its info table, so we
   can just jump to this bit of generic code to save away all the
   registers and return to the scheduler.

   This code arranges the stack like this:
	 
         |        ....         |
         |        args         |
	 +---------------------+
         |      f_closure      |
	 +---------------------+
         |        size         |
	 +---------------------+
         |   stg_gc_fun_info   |
	 +---------------------+

   The size is the number of words of arguments on the stack, and is cached
   in the frame in order to simplify stack walking: otherwise the size of
   this stack frame would have to be calculated by looking at f's info table.

   -------------------------------------------------------------------------- */

__stg_gc_fun
{
    W_ size;
    W_ info;
    W_ type;

    info = %GET_FUN_INFO(UNTAG(R1));

    // cache the size
    type = TO_W_(StgFunInfoExtra_fun_type(info));
    if (type == ARG_GEN) {
	size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
    } else { 
	if (type == ARG_GEN_BIG) {
#ifdef TABLES_NEXT_TO_CODE
            // bitmap field holds an offset
            size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
                                        + %GET_ENTRY(UNTAG(R1)) /* ### */ );
#else
	    size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
#endif
	} else {
	    size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
	}
    }
    
#ifdef NO_ARG_REGS
    // we don't have to save any registers away
    Sp_adj(-3);
    Sp(2) = R1;
    Sp(1) = size;
    Sp(0) = stg_gc_fun_info;
    GC_GENERIC
#else
    W_ type;
    type = TO_W_(StgFunInfoExtra_fun_type(info));
    // cache the size
    if (type == ARG_GEN || type == ARG_GEN_BIG) {
        // regs already saved by the heap check code
        Sp_adj(-3);
        Sp(2) = R1;
        Sp(1) = size;
        Sp(0) = stg_gc_fun_info;
        // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
        GC_GENERIC
    } else { 
	jump W_[stg_stack_save_entries + WDS(type)];
	    // jumps to stg_gc_noregs after saving stuff
    }
#endif /* !NO_ARG_REGS */
}

/* -----------------------------------------------------------------------------
   Generic Apply (return point)

   The dual to stg_fun_gc_gen (above): this fragment returns to the
   function, passing arguments in the stack and in registers
   appropriately.  The stack layout is given above.
   -------------------------------------------------------------------------- */

INFO_TABLE_RET( stg_gc_fun, RET_FUN )
{
    R1 = Sp(2);
    Sp_adj(3);
#ifdef NO_ARG_REGS
    // Minor optimisation: there are no argument registers to load up,
    // so we can just jump straight to the function's entry point.
    jump %GET_ENTRY(UNTAG(R1));
#else
    W_ info;
    W_ type;
    
    info = %GET_FUN_INFO(UNTAG(R1));
    type = TO_W_(StgFunInfoExtra_fun_type(info));
    if (type == ARG_GEN || type == ARG_GEN_BIG) {
	jump StgFunInfoExtra_slow_apply(info);
    } else { 
	if (type == ARG_BCO) {
	    // cover this case just to be on the safe side
	    Sp_adj(-2);
	    Sp(1) = R1;
	    Sp(0) = stg_apply_interp_info;
	    jump stg_yield_to_interpreter;
	} else {
	    jump W_[stg_ap_stack_entries + WDS(type)];
	}
    }
#endif
}

/* -----------------------------------------------------------------------------
   Generic Heap Check Code.

   Called with Liveness mask in R9,  Return address in R10.
   Stack must be consistent (containing all necessary info pointers
   to relevant SRTs).

   See StgMacros.h for a description of the RET_DYN stack frame.

   We also define an stg_gen_yield here, because it's very similar.
   -------------------------------------------------------------------------- */

// For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
// on a 64-bit machine, we'll end up wasting a couple of words, but
// it's not a big deal.

#define RESTORE_EVERYTHING			\
    L1   = L_[Sp + WDS(19)];			\
    D2   = D_[Sp + WDS(17)];			\
    D1   = D_[Sp + WDS(15)];			\
    F4   = F_[Sp + WDS(14)];			\
    F3   = F_[Sp + WDS(13)];			\
    F2   = F_[Sp + WDS(12)];			\
    F1   = F_[Sp + WDS(11)];			\
    R8 = Sp(10);				\
    R7 = Sp(9);					\
    R6 = Sp(8);					\
    R5 = Sp(7);					\
    R4 = Sp(6);					\
    R3 = Sp(5);					\
    R2 = Sp(4);					\
    R1 = Sp(3);					\
    Sp_adj(21);

#define RET_OFFSET (-19)

#define SAVE_EVERYTHING				\
    Sp_adj(-21);				\
    L_[Sp + WDS(19)] = L1;			\
    D_[Sp + WDS(17)] = D2;			\
    D_[Sp + WDS(15)] = D1;			\
    F_[Sp + WDS(14)] = F4;			\
    F_[Sp + WDS(13)] = F3;			\
    F_[Sp + WDS(12)] = F2;			\
    F_[Sp + WDS(11)] = F1;			\
    Sp(10) = R8;				\
    Sp(9) = R7;					\
    Sp(8) = R6;					\
    Sp(7) = R5;					\
    Sp(6) = R4;					\
    Sp(5) = R3;					\
    Sp(4) = R2;					\
    Sp(3) = R1;					\
    Sp(2) = R10;    /* return address */	\
    Sp(1) = R9;     /* liveness mask  */	\
    Sp(0) = stg_gc_gen_info;

INFO_TABLE_RET( stg_gc_gen, RET_DYN )
/* bitmap in the above info table is unused, the real one is on the stack. */
{
    RESTORE_EVERYTHING;
    jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
}

stg_gc_gen
{
    SAVE_EVERYTHING;
    GC_GENERIC
}	  

// A heap check at an unboxed tuple return point.  The return address
// is on the stack, and we can find it by using the offsets given
// to us in the liveness mask.
stg_gc_ut
{
    R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
    SAVE_EVERYTHING;
    GC_GENERIC
}

/*
 * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
 * because we've just failed doYouWantToGC(), not a standard heap
 * check.  GC_GENERIC would end up returning StackOverflow.
 */
stg_gc_gen_hp
{
    SAVE_EVERYTHING;
    HP_GENERIC
}	  

/* -----------------------------------------------------------------------------
   Yields
   -------------------------------------------------------------------------- */

stg_gen_yield
{
    SAVE_EVERYTHING;
    YIELD_GENERIC
}

stg_yield_noregs
{
    YIELD_GENERIC;
}

/* -----------------------------------------------------------------------------
   Yielding to the interpreter... top of stack says what to do next.
   -------------------------------------------------------------------------- */

stg_yield_to_interpreter
{
    YIELD_TO_INTERPRETER;
}

/* -----------------------------------------------------------------------------
   Blocks
   -------------------------------------------------------------------------- */

stg_gen_block
{
    SAVE_EVERYTHING;
    BLOCK_GENERIC;
}

stg_block_noregs
{
    BLOCK_GENERIC;
}

stg_block_1
{
    Sp_adj(-2);
    Sp(1) = R1;
    Sp(0) = stg_enter_info;
    BLOCK_GENERIC;
}

/* -----------------------------------------------------------------------------
 * takeMVar/putMVar-specific blocks
 *
 * Stack layout for a thread blocked in takeMVar:
 *      
 *       ret. addr
 *       ptr to MVar   (R1)
 *       stg_block_takemvar_info
 *
 * Stack layout for a thread blocked in putMVar:
 *      
 *       ret. addr
 *       ptr to Value  (R2)
 *       ptr to MVar   (R1)
 *       stg_block_putmvar_info
 *
 * See PrimOps.hc for a description of the workings of take/putMVar.
 * 
 * -------------------------------------------------------------------------- */

INFO_TABLE_RET( stg_block_takemvar, RET_SMALL, P_ unused )
{
    R1 = Sp(1);
    Sp_adj(2);
    jump takeMVarzh_fast;
}

// code fragment executed just before we return to the scheduler
stg_block_takemvar_finally
{
#ifdef THREADED_RTS
    unlockClosure(R3, stg_MVAR_DIRTY_info);
#else
    SET_INFO(R3, stg_MVAR_DIRTY_info);
#endif
    jump StgReturn;
}

stg_block_takemvar
{
    Sp_adj(-2);
    Sp(1) = R1;
    Sp(0) = stg_block_takemvar_info;
    R3 = R1;
    BLOCK_BUT_FIRST(stg_block_takemvar_finally);
}

INFO_TABLE_RET( stg_block_putmvar, RET_SMALL, P_ unused1, P_ unused2 )
{
    R2 = Sp(2);
    R1 = Sp(1);
    Sp_adj(3);
    jump putMVarzh_fast;
}

// code fragment executed just before we return to the scheduler
stg_block_putmvar_finally
{
#ifdef THREADED_RTS
    unlockClosure(R3, stg_MVAR_DIRTY_info);
#else
    SET_INFO(R3, stg_MVAR_DIRTY_info);
#endif
    jump StgReturn;
}

stg_block_putmvar
{
    Sp_adj(-3);
    Sp(2) = R2;
    Sp(1) = R1;
    Sp(0) = stg_block_putmvar_info;
    R3 = R1;
    BLOCK_BUT_FIRST(stg_block_putmvar_finally);
}

// code fragment executed just before we return to the scheduler
stg_block_blackhole_finally
{
#if defined(THREADED_RTS)
    // The last thing we do is release sched_lock, which is
    // preventing other threads from accessing blackhole_queue and
    // picking up this thread before we are finished with it.
    RELEASE_LOCK(sched_mutex "ptr");
#endif
    jump StgReturn;
}

stg_block_blackhole
{
    Sp_adj(-2);
    Sp(1) = R1;
    Sp(0) = stg_enter_info;
    BLOCK_BUT_FIRST(stg_block_blackhole_finally);
}

INFO_TABLE_RET( stg_block_throwto, RET_SMALL, P_ unused, P_ unused )
{
    R2 = Sp(2);
    R1 = Sp(1);
    Sp_adj(3);
    jump killThreadzh_fast;
}

stg_block_throwto_finally
{
#ifdef THREADED_RTS
    foreign "C" throwToReleaseTarget (R3 "ptr");
#endif
    jump StgReturn;
}

stg_block_throwto
{
    Sp_adj(-3);
    Sp(2) = R2;
    Sp(1) = R1;
    Sp(0) = stg_block_throwto_info;
    BLOCK_BUT_FIRST(stg_block_throwto_finally);
}

#ifdef mingw32_HOST_OS
INFO_TABLE_RET( stg_block_async, RET_SMALL )
{
    W_ ares;
    W_ len, errC;

    ares = StgTSO_block_info(CurrentTSO);
    len = StgAsyncIOResult_len(ares);
    errC = StgAsyncIOResult_errCode(ares);
    StgTSO_block_info(CurrentTSO) = NULL;
    foreign "C" free(ares "ptr");
    R1 = len;
    Sp(0) = errC;
    jump %ENTRY_CODE(Sp(1));
}

stg_block_async
{
    Sp_adj(-1);
    Sp(0) = stg_block_async_info;
    BLOCK_GENERIC;
}

/* Used by threadDelay implementation; it would be desirable to get rid of
 * this free()'ing void return continuation.
 */
INFO_TABLE_RET( stg_block_async_void, RET_SMALL )
{
    W_ ares;

    ares = StgTSO_block_info(CurrentTSO);
    StgTSO_block_info(CurrentTSO) = NULL;
    foreign "C" free(ares "ptr");
    Sp_adj(1);
    jump %ENTRY_CODE(Sp(0));
}

stg_block_async_void
{
    Sp_adj(-1);
    Sp(0) = stg_block_async_void_info;
    BLOCK_GENERIC;
}

#endif

/* -----------------------------------------------------------------------------
   STM-specific waiting
   -------------------------------------------------------------------------- */

stg_block_stmwait_finally
{
    foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
    jump StgReturn;
}

stg_block_stmwait
{
    BLOCK_BUT_FIRST(stg_block_stmwait_finally);
}