1 files changed, 964 insertions, 0 deletions
diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm
new file mode 100644
index 0000000000..4e5dd24596
--- /dev/null
+++ b/rts/HeapStackCheck.cmm
@@ -0,0 +1,964 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Canned Heap-Check and Stack-Check sequences.
+ *
+ * This file is written in a subset of C--, extended with various
+ * features specific to GHC.  It is compiled by GHC directly.  For the
+ * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#include "Cmm.h"
+
+/* Stack/Heap Check Failure
+ * ------------------------
+ *
+ * On discovering that a stack or heap check has failed, we do the following:
+ *
+ *    - If the context_switch flag is set, indicating that there are more
+ *      threads waiting to run, we yield to the scheduler 
+ *	(return ThreadYielding).
+ *
+ *    - If Hp > HpLim, we've had a heap check failure.  This means we've
+ *	come to the end of the current heap block, so we try to chain
+ *	another block on with ExtendNursery().  
+ *
+ *	     - If this succeeds, we carry on without returning to the 
+ *	       scheduler.  
+ *
+ *	     - If it fails, we return to the scheduler claiming HeapOverflow
+ *	       so that a garbage collection can be performed.
+ *
+ *    - If Hp <= HpLim, it must have been a stack check that failed.  In
+ *	which case, we return to the scheduler claiming StackOverflow, the
+ *	scheduler will either increase the size of our stack, or raise
+ *	an exception if the stack is already too big.
+ *
+ * The effect of checking for context switch only in the heap/stack check
+ * failure code is that we'll switch threads after the current thread has
+ * reached the end of its heap block.  If a thread isn't allocating
+ * at all, it won't yield.  Hopefully this won't be a problem in practice.
+ */
+ 
+#define PRE_RETURN(why,what_next)			\
+  StgTSO_what_next(CurrentTSO) = what_next::I16;	\
+  StgRegTable_rRet(BaseReg) = why;           	        \
+  R1 = BaseReg;
+
+/* Remember that the return address is *removed* when returning to a
+ * ThreadRunGHC thread.
+ */
+
+#define GC_GENERIC						\
+    DEBUG_ONLY(foreign "C" heapCheckFail());			\
+    if (Hp > HpLim) {						\
+        Hp = Hp - HpAlloc/*in bytes*/;				\
+        if (HpAlloc <= BLOCK_SIZE				\
+            && bdescr_link(CurrentNursery) != NULL) {		\
+            CLOSE_NURSERY();					\
+            CurrentNursery = bdescr_link(CurrentNursery);	\
+            OPEN_NURSERY();					\
+            if (CInt[context_switch] != 0 :: CInt) {		\
+                R1 = ThreadYielding;				\
+                goto sched;					\
+            } else {						\
+                jump %ENTRY_CODE(Sp(0));			\
+            }							\
+	} else {						\
+            R1 = HeapOverflow;					\
+            goto sched;						\
+        }							\
+    } else {							\
+        R1 = StackOverflow;					\
+    }								\
+  sched:							\
+    PRE_RETURN(R1,ThreadRunGHC);				\
+    jump stg_returnToSched;
+
+#define HP_GENERIC				\
+   PRE_RETURN(HeapOverflow, ThreadRunGHC)	\
+  jump stg_returnToSched;
+
+#define BLOCK_GENERIC				\
+   PRE_RETURN(ThreadBlocked,  ThreadRunGHC)	\
+  jump stg_returnToSched;
+
+#define YIELD_GENERIC				\
+  PRE_RETURN(ThreadYielding, ThreadRunGHC)	\
+  jump stg_returnToSched;
+
+#define BLOCK_BUT_FIRST(c)			\
+  PRE_RETURN(ThreadBlocked, ThreadRunGHC)	\
+  R2 = c;					\
+  jump stg_returnToSchedButFirst;
+
+#define YIELD_TO_INTERPRETER			\
+  PRE_RETURN(ThreadYielding, ThreadInterpret)	\
+  jump stg_returnToSchedNotPaused;
+
+/* -----------------------------------------------------------------------------
+   Heap checks in thunks/functions.
+
+   In these cases, node always points to the function closure.  This gives
+   us an easy way to return to the function: just leave R1 on the top of
+   the stack, and have the scheduler enter it to return.
+
+   There are canned sequences for 'n' pointer values in registers.
+   -------------------------------------------------------------------------- */
+
+INFO_TABLE_RET( stg_enter, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
+{
+    R1 = Sp(1);
+    Sp_adj(2);
+    ENTER();
+}
+
+__stg_gc_enter_1
+{
+    Sp_adj(-2);
+    Sp(1) = R1;
+    Sp(0) = stg_enter_info;
+    GC_GENERIC
+}
+
+#if defined(GRAN)
+/*
+  ToDo: merge the block and yield macros, calling something like BLOCK(N)
+        at the end;
+*/
+
+/* 
+   Should we actually ever do a yield in such a case?? -- HWL
+*/
+gran_yield_0
+{
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadYielding;
+    jump StgReturn;
+}
+
+gran_yield_1
+{
+    Sp_adj(-1);
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadYielding;
+    jump StgReturn;
+}
+
+/*- 2 Regs--------------------------------------------------------------------*/
+
+gran_yield_2
+{
+    Sp_adj(-2);
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadYielding;
+    jump StgReturn;
+}
+
+/*- 3 Regs -------------------------------------------------------------------*/
+
+gran_yield_3
+{
+    Sp_adj(-3);
+    Sp(2) = R3;
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadYielding;
+    jump StgReturn;
+}
+
+/*- 4 Regs -------------------------------------------------------------------*/
+
+gran_yield_4
+{
+    Sp_adj(-4);
+    Sp(3) = R4;
+    Sp(2) = R3;
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadYielding;
+    jump StgReturn;
+}
+
+/*- 5 Regs -------------------------------------------------------------------*/
+
+gran_yield_5
+{
+    Sp_adj(-5);
+    Sp(4) = R5;
+    Sp(3) = R4;
+    Sp(2) = R3;
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadYielding;
+    jump StgReturn;
+}
+
+/*- 6 Regs -------------------------------------------------------------------*/
+
+gran_yield_6
+{
+    Sp_adj(-6);
+    Sp(5) = R6;
+    Sp(4) = R5;
+    Sp(3) = R4;
+    Sp(2) = R3;
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadYielding;
+    jump StgReturn;
+}
+
+/*- 7 Regs -------------------------------------------------------------------*/
+
+gran_yield_7
+{
+    Sp_adj(-7);
+    Sp(6) = R7;
+    Sp(5) = R6;
+    Sp(4) = R5;
+    Sp(3) = R4;
+    Sp(2) = R3;
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadYielding;
+    jump StgReturn;
+}
+
+/*- 8 Regs -------------------------------------------------------------------*/
+
+gran_yield_8
+{
+    Sp_adj(-8);
+    Sp(7) = R8;
+    Sp(6) = R7;
+    Sp(5) = R6;
+    Sp(4) = R5;
+    Sp(3) = R4;
+    Sp(2) = R3;
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadYielding;
+    jump StgReturn;
+}
+
+// the same routines but with a block rather than a yield
+
+gran_block_1
+{
+    Sp_adj(-1);
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadBlocked;
+    jump StgReturn;
+}
+
+/*- 2 Regs--------------------------------------------------------------------*/
+
+gran_block_2
+{
+    Sp_adj(-2);
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadBlocked;
+    jump StgReturn;
+}
+
+/*- 3 Regs -------------------------------------------------------------------*/
+
+gran_block_3
+{
+    Sp_adj(-3);
+    Sp(2) = R3;
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadBlocked;
+    jump StgReturn;
+}
+
+/*- 4 Regs -------------------------------------------------------------------*/
+
+gran_block_4
+{
+    Sp_adj(-4);
+    Sp(3) = R4;
+    Sp(2) = R3;
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadBlocked;
+    jump StgReturn;
+}
+
+/*- 5 Regs -------------------------------------------------------------------*/
+
+gran_block_5
+{
+    Sp_adj(-5);
+    Sp(4) = R5;
+    Sp(3) = R4;
+    Sp(2) = R3;
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadBlocked;
+    jump StgReturn;
+}
+
+/*- 6 Regs -------------------------------------------------------------------*/
+
+gran_block_6
+{
+    Sp_adj(-6);
+    Sp(5) = R6;
+    Sp(4) = R5;
+    Sp(3) = R4;
+    Sp(2) = R3;
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadBlocked;
+    jump StgReturn;
+}
+
+/*- 7 Regs -------------------------------------------------------------------*/
+
+gran_block_7
+{
+    Sp_adj(-7);
+    Sp(6) = R7;
+    Sp(5) = R6;
+    Sp(4) = R5;
+    Sp(3) = R4;
+    Sp(2) = R3;
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadBlocked;
+    jump StgReturn;
+}
+
+/*- 8 Regs -------------------------------------------------------------------*/
+
+gran_block_8
+{
+    Sp_adj(-8);
+    Sp(7) = R8;
+    Sp(6) = R7;
+    Sp(5) = R6;
+    Sp(4) = R5;
+    Sp(3) = R4;
+    Sp(2) = R3;
+    Sp(1) = R2;
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadBlocked;
+    jump StgReturn;
+}
+
+#endif
+
+#if 0 && defined(PAR)
+
+/*
+  Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the
+  saving of the thread state from the actual jump via an StgReturn.
+  We need this separation because we call RTS routines in blocking entry codes
+  before jumping back into the RTS (see parallel/FetchMe.hc).
+*/
+
+par_block_1_no_jump
+{
+    Sp_adj(-1);
+    Sp(0) = R1;
+    SAVE_THREAD_STATE();					
+}
+
+par_jump
+{
+    TSO_what_next(CurrentTSO) = ThreadRunGHC;		
+    R1 = ThreadBlocked;
+    jump StgReturn;
+}
+
+#endif
+
+/* -----------------------------------------------------------------------------
+   Heap checks in Primitive case alternatives
+
+   A primitive case alternative is entered with a value either in 
+   R1, FloatReg1 or D1 depending on the return convention.  All the
+   cases are covered below.
+   -------------------------------------------------------------------------- */
+
+/*-- No Registers live ------------------------------------------------------ */
+
+stg_gc_noregs
+{
+    GC_GENERIC
+}
+
+/*-- void return ------------------------------------------------------------ */
+
+INFO_TABLE_RET( stg_gc_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL)
+{
+    Sp_adj(1);
+    jump %ENTRY_CODE(Sp(0));
+}
+
+/*-- R1 is boxed/unpointed -------------------------------------------------- */
+
+INFO_TABLE_RET( stg_gc_unpt_r1, 1/*framesize*/, 0/*bitmap*/, RET_SMALL)
+{
+    R1 = Sp(1);
+    Sp_adj(2);
+    jump %ENTRY_CODE(Sp(0));
+}
+
+stg_gc_unpt_r1
+{
+    Sp_adj(-2);
+    Sp(1) = R1;
+    Sp(0) = stg_gc_unpt_r1_info;
+    GC_GENERIC
+}
+
+/*-- R1 is unboxed -------------------------------------------------- */
+
+/* the 1 is a bitmap - i.e. 1 non-pointer word on the stack. */
+INFO_TABLE_RET(	stg_gc_unbx_r1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
+{
+    R1 = Sp(1);
+    Sp_adj(2);
+    jump %ENTRY_CODE(Sp(0));
+}
+
+stg_gc_unbx_r1
+{
+    Sp_adj(-2);
+    Sp(1) = R1;
+    Sp(0) = stg_gc_unbx_r1_info;
+    GC_GENERIC
+}
+
+/*-- F1 contains a float ------------------------------------------------- */
+
+INFO_TABLE_RET(	stg_gc_f1, 1/*framesize*/, 1/*bitmap*/, RET_SMALL )
+{
+    F1 = F_[Sp+WDS(1)];
+    Sp_adj(2);
+    jump %ENTRY_CODE(Sp(0));
+}
+
+stg_gc_f1
+{
+    Sp_adj(-2);
+    F_[Sp + WDS(1)] = F1;
+    Sp(0) = stg_gc_f1_info;
+    GC_GENERIC
+}
+
+/*-- D1 contains a double ------------------------------------------------- */
+
+/* we support doubles of either 1 or 2 words in size */
+
+#if SIZEOF_DOUBLE == SIZEOF_VOID_P
+#  define DBL_BITMAP 1
+#  define DBL_WORDS  1
+#else
+#  define DBL_BITMAP 3
+#  define DBL_WORDS  2
+#endif 
+
+INFO_TABLE_RET(	stg_gc_d1, DBL_WORDS/*framesize*/, DBL_BITMAP/*bitmap*/, RET_SMALL )
+{
+    D1 = D_[Sp + WDS(1)];
+    Sp = Sp + WDS(1) + SIZEOF_StgDouble;
+    jump %ENTRY_CODE(Sp(0));
+}
+
+stg_gc_d1
+{
+    Sp = Sp - WDS(1) - SIZEOF_StgDouble;
+    D_[Sp + WDS(1)] = D1;
+    Sp(0) = stg_gc_d1_info;
+    GC_GENERIC
+}
+
+
+/*-- L1 contains an int64 ------------------------------------------------- */
+
+/* we support int64s of either 1 or 2 words in size */
+
+#if SIZEOF_VOID_P == 8
+#  define LLI_BITMAP 1
+#  define LLI_WORDS  1
+#else
+#  define LLI_BITMAP 3
+#  define LLI_WORDS  2
+#endif 
+
+INFO_TABLE_RET( stg_gc_l1, LLI_WORDS/*framesize*/, LLI_BITMAP/*bitmap*/, RET_SMALL )
+{
+    L1 = L_[Sp + WDS(1)];
+    Sp_adj(1) + SIZEOF_StgWord64;
+    jump %ENTRY_CODE(Sp(0));
+}
+
+stg_gc_l1
+{
+    Sp_adj(-1) - SIZEOF_StgWord64;
+    L_[Sp + WDS(1)] = L1;
+    Sp(0) = stg_gc_l1_info;
+    GC_GENERIC
+}
+
+/*-- Unboxed tuple return, one pointer (unregisterised build only) ---------- */
+
+INFO_TABLE_RET( stg_ut_1_0_unreg, 1/*size*/, 0/*BITMAP*/, RET_SMALL )
+{
+    Sp_adj(1);
+    // one ptr is on the stack (Sp(0))
+    jump %ENTRY_CODE(Sp(1));
+}
+
+/* -----------------------------------------------------------------------------
+   Generic function entry heap check code.
+
+   At a function entry point, the arguments are as per the calling convention,
+   i.e. some in regs and some on the stack.  There may or may not be 
+   a pointer to the function closure in R1 - if there isn't, then the heap
+   check failure code in the function will arrange to load it.
+
+   The function's argument types are described in its info table, so we
+   can just jump to this bit of generic code to save away all the
+   registers and return to the scheduler.
+
+   This code arranges the stack like this:
+	 
+         |        ....         |
+         |        args         |
+	 +---------------------+
+         |      f_closure      |
+	 +---------------------+
+         |        size         |
+	 +---------------------+
+         |   stg_gc_fun_info   |
+	 +---------------------+
+
+   The size is the number of words of arguments on the stack, and is cached
+   in the frame in order to simplify stack walking: otherwise the size of
+   this stack frame would have to be calculated by looking at f's info table.
+
+   -------------------------------------------------------------------------- */
+
+__stg_gc_fun
+{
+    W_ size;
+    W_ info;
+    W_ type;
+
+    info = %GET_FUN_INFO(R1);
+
+    // cache the size
+    type = TO_W_(StgFunInfoExtra_fun_type(info));
+    if (type == ARG_GEN) {
+	size = BITMAP_SIZE(StgFunInfoExtra_bitmap(info));
+    } else { 
+	if (type == ARG_GEN_BIG) {
+#ifdef TABLES_NEXT_TO_CODE
+            // bitmap field holds an offset
+            size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info)
+                                        + %GET_ENTRY(R1) /* ### */ );
+#else
+	    size = StgLargeBitmap_size( StgFunInfoExtra_bitmap(info) );
+#endif
+	} else {
+	    size = BITMAP_SIZE(W_[stg_arg_bitmaps + WDS(type)]);
+	}
+    }
+    
+#ifdef NO_ARG_REGS
+    // we don't have to save any registers away
+    Sp_adj(-3);
+    Sp(2) = R1;
+    Sp(1) = size;
+    Sp(0) = stg_gc_fun_info;
+    GC_GENERIC
+#else
+    W_ type;
+    type = TO_W_(StgFunInfoExtra_fun_type(info));
+    // cache the size
+    if (type == ARG_GEN || type == ARG_GEN_BIG) {
+        // regs already saved by the heap check code
+        Sp_adj(-3);
+        Sp(2) = R1;
+        Sp(1) = size;
+        Sp(0) = stg_gc_fun_info;
+        // DEBUG_ONLY(foreign "C" debugBelch("stg_fun_gc_gen(ARG_GEN)"););
+        GC_GENERIC
+    } else { 
+	jump W_[stg_stack_save_entries + WDS(type)];
+	    // jumps to stg_gc_noregs after saving stuff
+    }
+#endif /* !NO_ARG_REGS */
+}
+
+/* -----------------------------------------------------------------------------
+   Generic Apply (return point)
+
+   The dual to stg_fun_gc_gen (above): this fragment returns to the
+   function, passing arguments in the stack and in registers
+   appropriately.  The stack layout is given above.
+   -------------------------------------------------------------------------- */
+
+INFO_TABLE_RET( stg_gc_fun, 0/*framesize*/, 0/*bitmap*/, RET_FUN )
+{
+    R1 = Sp(2);
+    Sp_adj(3);
+#ifdef NO_ARG_REGS
+    // Minor optimisation: there are no argument registers to load up,
+    // so we can just jump straight to the function's entry point.
+    jump %GET_ENTRY(R1);
+#else
+    W_ info;
+    W_ type;
+    
+    info = %GET_FUN_INFO(R1);
+    type = TO_W_(StgFunInfoExtra_fun_type(info));
+    if (type == ARG_GEN || type == ARG_GEN_BIG) {
+	jump StgFunInfoExtra_slow_apply(info);
+    } else { 
+	if (type == ARG_BCO) {
+	    // cover this case just to be on the safe side
+	    Sp_adj(-2);
+	    Sp(1) = R1;
+	    Sp(0) = stg_apply_interp_info;
+	    jump stg_yield_to_interpreter;
+	} else {
+	    jump W_[stg_ap_stack_entries + WDS(type)];
+	}
+    }
+#endif
+}
+
+/* -----------------------------------------------------------------------------
+   Generic Heap Check Code.
+
+   Called with Liveness mask in R9,  Return address in R10.
+   Stack must be consistent (containing all necessary info pointers
+   to relevant SRTs).
+
+   See StgMacros.h for a description of the RET_DYN stack frame.
+
+   We also define an stg_gen_yield here, because it's very similar.
+   -------------------------------------------------------------------------- */
+
+// For simplicity, we assume that SIZEOF_DOUBLE == 2*SIZEOF_VOID_P
+// on a 64-bit machine, we'll end up wasting a couple of words, but
+// it's not a big deal.
+
+#define RESTORE_EVERYTHING			\
+    L1   = L_[Sp + WDS(19)];			\
+    D2   = D_[Sp + WDS(17)];			\
+    D1   = D_[Sp + WDS(15)];			\
+    F4   = F_[Sp + WDS(14)];			\
+    F3   = F_[Sp + WDS(13)];			\
+    F2   = F_[Sp + WDS(12)];			\
+    F1   = F_[Sp + WDS(11)];			\
+    R8 = Sp(10);				\
+    R7 = Sp(9);					\
+    R6 = Sp(8);					\
+    R5 = Sp(7);					\
+    R4 = Sp(6);					\
+    R3 = Sp(5);					\
+    R2 = Sp(4);					\
+    R1 = Sp(3);					\
+    Sp_adj(21);
+
+#define RET_OFFSET (-19)
+
+#define SAVE_EVERYTHING				\
+    Sp_adj(-21);				\
+    L_[Sp + WDS(19)] = L1;			\
+    D_[Sp + WDS(17)] = D2;			\
+    D_[Sp + WDS(15)] = D1;			\
+    F_[Sp + WDS(14)] = F4;			\
+    F_[Sp + WDS(13)] = F3;			\
+    F_[Sp + WDS(12)] = F2;			\
+    F_[Sp + WDS(11)] = F1;			\
+    Sp(10) = R8;				\
+    Sp(9) = R7;					\
+    Sp(8) = R6;					\
+    Sp(7) = R5;					\
+    Sp(6) = R4;					\
+    Sp(5) = R3;					\
+    Sp(4) = R2;					\
+    Sp(3) = R1;					\
+    Sp(2) = R10;    /* return address */	\
+    Sp(1) = R9;     /* liveness mask  */	\
+    Sp(0) = stg_gc_gen_info;
+
+INFO_TABLE_RET( stg_gc_gen, 0/*framesize*/, 0/*bitmap*/, RET_DYN )
+/* bitmap in the above info table is unused, the real one is on the stack. */
+{
+    RESTORE_EVERYTHING;
+    jump Sp(RET_OFFSET); /* No %ENTRY_CODE( - this is an actual code ptr */
+}
+
+stg_gc_gen
+{
+    SAVE_EVERYTHING;
+    GC_GENERIC
+}	  
+
+// A heap check at an unboxed tuple return point.  The return address
+// is on the stack, and we can find it by using the offsets given
+// to us in the liveness mask.
+stg_gc_ut
+{
+    R10 = %ENTRY_CODE(Sp(RET_DYN_NONPTRS(R9) + RET_DYN_PTRS(R9)));
+    SAVE_EVERYTHING;
+    GC_GENERIC
+}
+
+/*
+ * stg_gen_hp is used by MAYBE_GC, where we can't use GC_GENERIC
+ * because we've just failed doYouWantToGC(), not a standard heap
+ * check.  GC_GENERIC would end up returning StackOverflow.
+ */
+stg_gc_gen_hp
+{
+    SAVE_EVERYTHING;
+    HP_GENERIC
+}	  
+
+/* -----------------------------------------------------------------------------
+   Yields
+   -------------------------------------------------------------------------- */
+
+stg_gen_yield
+{
+    SAVE_EVERYTHING;
+    YIELD_GENERIC
+}
+
+stg_yield_noregs
+{
+    YIELD_GENERIC;
+}
+
+/* -----------------------------------------------------------------------------
+   Yielding to the interpreter... top of stack says what to do next.
+   -------------------------------------------------------------------------- */
+
+stg_yield_to_interpreter
+{
+    YIELD_TO_INTERPRETER;
+}
+
+/* -----------------------------------------------------------------------------
+   Blocks
+   -------------------------------------------------------------------------- */
+
+stg_gen_block
+{
+    SAVE_EVERYTHING;
+    BLOCK_GENERIC;
+}
+
+stg_block_noregs
+{
+    BLOCK_GENERIC;
+}
+
+stg_block_1
+{
+    Sp_adj(-2);
+    Sp(1) = R1;
+    Sp(0) = stg_enter_info;
+    BLOCK_GENERIC;
+}
+
+/* -----------------------------------------------------------------------------
+ * takeMVar/putMVar-specific blocks
+ *
+ * Stack layout for a thread blocked in takeMVar:
+ *      
+ *       ret. addr
+ *       ptr to MVar   (R1)
+ *       stg_block_takemvar_info
+ *
+ * Stack layout for a thread blocked in putMVar:
+ *      
+ *       ret. addr
+ *       ptr to Value  (R2)
+ *       ptr to MVar   (R1)
+ *       stg_block_putmvar_info
+ *
+ * See PrimOps.hc for a description of the workings of take/putMVar.
+ * 
+ * -------------------------------------------------------------------------- */
+
+INFO_TABLE_RET( stg_block_takemvar, 1/*framesize*/, 0/*bitmap*/, RET_SMALL )
+{
+    R1 = Sp(1);
+    Sp_adj(2);
+    jump takeMVarzh_fast;
+}
+
+// code fragment executed just before we return to the scheduler
+stg_block_takemvar_finally
+{
+#ifdef THREADED_RTS
+    foreign "C" unlockClosure(R3 "ptr", stg_EMPTY_MVAR_info);
+#endif
+    jump StgReturn;
+}
+
+stg_block_takemvar
+{
+    Sp_adj(-2);
+    Sp(1) = R1;
+    Sp(0) = stg_block_takemvar_info;
+    R3 = R1;
+    BLOCK_BUT_FIRST(stg_block_takemvar_finally);
+}
+
+INFO_TABLE_RET( stg_block_putmvar, 2/*framesize*/, 0/*bitmap*/, RET_SMALL )
+{
+    R2 = Sp(2);
+    R1 = Sp(1);
+    Sp_adj(3);
+    jump putMVarzh_fast;
+}
+
+// code fragment executed just before we return to the scheduler
+stg_block_putmvar_finally
+{
+#ifdef THREADED_RTS
+    foreign "C" unlockClosure(R3 "ptr", stg_FULL_MVAR_info);
+#endif
+    jump StgReturn;
+}
+
+stg_block_putmvar
+{
+    Sp_adj(-3);
+    Sp(2) = R2;
+    Sp(1) = R1;
+    Sp(0) = stg_block_putmvar_info;
+    R3 = R1;
+    BLOCK_BUT_FIRST(stg_block_putmvar_finally);
+}
+
+// code fragment executed just before we return to the scheduler
+stg_block_blackhole_finally
+{
+#if defined(THREADED_RTS)
+    // The last thing we do is release sched_lock, which is
+    // preventing other threads from accessing blackhole_queue and
+    // picking up this thread before we are finished with it.
+    foreign "C" RELEASE_LOCK(sched_mutex "ptr");
+#endif
+    jump StgReturn;
+}
+
+stg_block_blackhole
+{
+    Sp_adj(-2);
+    Sp(1) = R1;
+    Sp(0) = stg_enter_info;
+    BLOCK_BUT_FIRST(stg_block_blackhole_finally);
+}
+
+#ifdef mingw32_HOST_OS
+INFO_TABLE_RET( stg_block_async, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
+{
+    W_ ares;
+    W_ len, errC;
+
+    ares = StgTSO_block_info(CurrentTSO);
+    len = StgAsyncIOResult_len(ares);
+    errC = StgAsyncIOResult_errCode(ares);
+    StgTSO_block_info(CurrentTSO) = NULL;
+    foreign "C" free(ares "ptr");
+    R1 = len;
+    Sp(0) = errC;
+    jump %ENTRY_CODE(Sp(1));
+}
+
+stg_block_async
+{
+    Sp_adj(-1);
+    Sp(0) = stg_block_async_info;
+    BLOCK_GENERIC;
+}
+
+/* Used by threadDelay implementation; it would be desirable to get rid of
+ * this free()'ing void return continuation.
+ */
+INFO_TABLE_RET( stg_block_async_void, 0/*framesize*/, 0/*bitmap*/, RET_SMALL )
+{
+    W_ ares;
+
+    ares = StgTSO_block_info(CurrentTSO);
+    StgTSO_block_info(CurrentTSO) = NULL;
+    foreign "C" free(ares "ptr");
+    Sp_adj(1);
+    jump %ENTRY_CODE(Sp(0));
+}
+
+stg_block_async_void
+{
+    Sp_adj(-1);
+    Sp(0) = stg_block_async_void_info;
+    BLOCK_GENERIC;
+}
+
+#endif
+
+/* -----------------------------------------------------------------------------
+   STM-specific waiting
+   -------------------------------------------------------------------------- */
+
+stg_block_stmwait_finally
+{
+    foreign "C" stmWaitUnlock(MyCapability() "ptr", R3 "ptr");
+    jump StgReturn;
+}
+
+stg_block_stmwait
+{
+    BLOCK_BUT_FIRST(stg_block_stmwait_finally);
+}