summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
authorLuite Stegeman <stegeman@gmail.com>2021-01-22 00:09:17 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-03-20 07:49:15 -0400
commit1f94e0f7601f8e22fdd81a47f130650265a44196 (patch)
treed06d02317049b56763b2f1da27f71f3663efa5a0 /rts
parent7de3532f0317032f75b76150c5d3a6f76178be04 (diff)
downloadhaskell-1f94e0f7601f8e22fdd81a47f130650265a44196.tar.gz
Generate GHCi bytecode from STG instead of Core and support unboxed
tuples and sums. fixes #1257
Diffstat (limited to 'rts')
-rw-r--r--rts/Disassembler.c16
-rw-r--r--rts/Interpreter.c143
-rw-r--r--rts/Printer.c60
-rw-r--r--rts/RtsSymbols.c2
-rw-r--r--rts/StgMiscClosures.cmm268
5 files changed, 466 insertions, 23 deletions
diff --git a/rts/Disassembler.c b/rts/Disassembler.c
index 67a451e7e6..451521d57e 100644
--- a/rts/Disassembler.c
+++ b/rts/Disassembler.c
@@ -148,6 +148,13 @@ disInstr ( StgBCO *bco, int pc )
debugBelch("PUSH_ALTS_V " ); printPtr( ptrs[instrs[pc]] );
debugBelch("\n");
pc += 1; break;
+ case bci_PUSH_ALTS_T:
+ debugBelch("PUSH_ALTS_T ");
+ printPtr( ptrs[instrs[pc]] );
+ debugBelch(" 0x%" FMT_HexWord " ", literals[instrs[pc+1]] );
+ printPtr( ptrs[instrs[pc+2]] );
+ debugBelch("\n");
+ pc += 3; break;
case bci_PUSH_PAD8:
debugBelch("PUSH_PAD8\n");
pc += 1; break;
@@ -310,6 +317,9 @@ disInstr ( StgBCO *bco, int pc )
case bci_RETURN_V:
debugBelch("RETURN_V\n" );
break;
+ case bci_RETURN_T:
+ debugBelch("RETURN_T\n ");
+ break;
default:
barf("disInstr: unknown opcode %u", (unsigned int) instr);
@@ -317,12 +327,6 @@ disInstr ( StgBCO *bco, int pc )
return pc;
}
-
-/* Something of a kludge .. how do we know where the end of the insn
- array is, since it isn't recorded anywhere? Answer: the first
- short is the number of bytecodes which follow it.
- See GHC.CoreToByteCode.linkBCO.insns_arr for construction ...
-*/
void disassemble( StgBCO *bco )
{
uint32_t i, j;
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index 6929aec5fd..efbfd091d8 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -4,6 +4,7 @@
* Copyright (c) The GHC Team, 1994-2002.
* ---------------------------------------------------------------------------*/
+
#include "PosixSource.h"
#include "Rts.h"
#include "RtsAPI.h"
@@ -681,12 +682,13 @@ do_return_unboxed:
|| SpW(0) == (W_)&stg_ret_f_info
|| SpW(0) == (W_)&stg_ret_d_info
|| SpW(0) == (W_)&stg_ret_l_info
+ || SpW(0) == (W_)&stg_ret_t_info
);
IF_DEBUG(interpreter,
debugBelch(
"\n---------------------------------------------------------------\n");
- debugBelch("Returning: "); printObj(obj);
+ debugBelch("Returning unboxed\n");
debugBelch("Sp = %p\n", Sp);
#if defined(PROFILING)
fprintCCS(stderr, cap->r.rCCCS);
@@ -697,7 +699,7 @@ do_return_unboxed:
debugBelch("\n\n");
);
- // get the offset of the stg_ctoi_ret_XXX itbl
+ // get the offset of the header of the next stack frame
offset = stack_frame_sizeW((StgClosure *)Sp);
switch (get_itbl((StgClosure*)(Sp_plusW(offset)))->type) {
@@ -934,6 +936,43 @@ run_BCO_return_unboxed:
// Stack checks aren't necessary at return points, the stack use
// is aggregated into the enclosing function entry point.
+#if defined(PROFILING)
+ /*
+ Restore the current cost centre stack if a tuple is being returned.
+
+ When a "simple" unboxed value is returned, the cccs is restored with
+ an stg_restore_cccs frame on the stack, for example:
+
+ ...
+ stg_ctoi_D1
+ <CCCS>
+ stg_restore_cccs
+
+ But stg_restore_cccs cannot deal with tuples, which may have more
+ things on the stack. Therefore we store the CCCS inside the
+ stg_ctoi_t frame.
+
+ If we have a tuple being returned, the stack looks like this:
+
+ ...
+ <CCCS> <- to restore, Sp offset <next frame + 4 words>
+ tuple_BCO
+ tuple_info
+ cont_BCO
+ stg_ctoi_t <- next frame
+ tuple_data_1
+ ...
+ tuple_data_n
+ tuple_info
+ tuple_BCO
+ stg_ret_t <- Sp
+ */
+
+ if(SpW(0) == (W_)&stg_ret_t_info) {
+ cap->r.rCCCS = (CostCentreStack*)SpW(stack_frame_sizeW((StgClosure *)Sp) + 4);
+ }
+#endif
+
goto run_BCO;
run_BCO_fun:
@@ -1329,6 +1368,100 @@ run_BCO:
goto nextInsn;
}
+ case bci_PUSH_ALTS_T: {
+ int o_bco = BCO_GET_LARGE_ARG;
+ W_ tuple_info = (W_)BCO_LIT(BCO_GET_LARGE_ARG);
+ int o_tuple_bco = BCO_GET_LARGE_ARG;
+
+#if defined(PROFILING)
+ SpW(-1) = (W_)cap->r.rCCCS;
+ Sp_subW(1);
+#endif
+
+ SpW(-1) = BCO_PTR(o_tuple_bco);
+ SpW(-2) = tuple_info;
+ SpW(-3) = BCO_PTR(o_bco);
+ W_ ctoi_t_offset;
+ int tuple_stack_words = tuple_info & 0x3fff;
+ switch(tuple_stack_words) {
+ case 0: ctoi_t_offset = (W_)&stg_ctoi_t0_info; break;
+ case 1: ctoi_t_offset = (W_)&stg_ctoi_t1_info; break;
+ case 2: ctoi_t_offset = (W_)&stg_ctoi_t2_info; break;
+ case 3: ctoi_t_offset = (W_)&stg_ctoi_t3_info; break;
+ case 4: ctoi_t_offset = (W_)&stg_ctoi_t4_info; break;
+ case 5: ctoi_t_offset = (W_)&stg_ctoi_t5_info; break;
+ case 6: ctoi_t_offset = (W_)&stg_ctoi_t6_info; break;
+ case 7: ctoi_t_offset = (W_)&stg_ctoi_t7_info; break;
+ case 8: ctoi_t_offset = (W_)&stg_ctoi_t8_info; break;
+ case 9: ctoi_t_offset = (W_)&stg_ctoi_t9_info; break;
+
+ case 10: ctoi_t_offset = (W_)&stg_ctoi_t10_info; break;
+ case 11: ctoi_t_offset = (W_)&stg_ctoi_t11_info; break;
+ case 12: ctoi_t_offset = (W_)&stg_ctoi_t12_info; break;
+ case 13: ctoi_t_offset = (W_)&stg_ctoi_t13_info; break;
+ case 14: ctoi_t_offset = (W_)&stg_ctoi_t14_info; break;
+ case 15: ctoi_t_offset = (W_)&stg_ctoi_t15_info; break;
+ case 16: ctoi_t_offset = (W_)&stg_ctoi_t16_info; break;
+ case 17: ctoi_t_offset = (W_)&stg_ctoi_t17_info; break;
+ case 18: ctoi_t_offset = (W_)&stg_ctoi_t18_info; break;
+ case 19: ctoi_t_offset = (W_)&stg_ctoi_t19_info; break;
+
+ case 20: ctoi_t_offset = (W_)&stg_ctoi_t20_info; break;
+ case 21: ctoi_t_offset = (W_)&stg_ctoi_t21_info; break;
+ case 22: ctoi_t_offset = (W_)&stg_ctoi_t22_info; break;
+ case 23: ctoi_t_offset = (W_)&stg_ctoi_t23_info; break;
+ case 24: ctoi_t_offset = (W_)&stg_ctoi_t24_info; break;
+ case 25: ctoi_t_offset = (W_)&stg_ctoi_t25_info; break;
+ case 26: ctoi_t_offset = (W_)&stg_ctoi_t26_info; break;
+ case 27: ctoi_t_offset = (W_)&stg_ctoi_t27_info; break;
+ case 28: ctoi_t_offset = (W_)&stg_ctoi_t28_info; break;
+ case 29: ctoi_t_offset = (W_)&stg_ctoi_t29_info; break;
+
+ case 30: ctoi_t_offset = (W_)&stg_ctoi_t30_info; break;
+ case 31: ctoi_t_offset = (W_)&stg_ctoi_t31_info; break;
+ case 32: ctoi_t_offset = (W_)&stg_ctoi_t32_info; break;
+ case 33: ctoi_t_offset = (W_)&stg_ctoi_t33_info; break;
+ case 34: ctoi_t_offset = (W_)&stg_ctoi_t34_info; break;
+ case 35: ctoi_t_offset = (W_)&stg_ctoi_t35_info; break;
+ case 36: ctoi_t_offset = (W_)&stg_ctoi_t36_info; break;
+ case 37: ctoi_t_offset = (W_)&stg_ctoi_t37_info; break;
+ case 38: ctoi_t_offset = (W_)&stg_ctoi_t38_info; break;
+ case 39: ctoi_t_offset = (W_)&stg_ctoi_t39_info; break;
+
+ case 40: ctoi_t_offset = (W_)&stg_ctoi_t40_info; break;
+ case 41: ctoi_t_offset = (W_)&stg_ctoi_t41_info; break;
+ case 42: ctoi_t_offset = (W_)&stg_ctoi_t42_info; break;
+ case 43: ctoi_t_offset = (W_)&stg_ctoi_t43_info; break;
+ case 44: ctoi_t_offset = (W_)&stg_ctoi_t44_info; break;
+ case 45: ctoi_t_offset = (W_)&stg_ctoi_t45_info; break;
+ case 46: ctoi_t_offset = (W_)&stg_ctoi_t46_info; break;
+ case 47: ctoi_t_offset = (W_)&stg_ctoi_t47_info; break;
+ case 48: ctoi_t_offset = (W_)&stg_ctoi_t48_info; break;
+ case 49: ctoi_t_offset = (W_)&stg_ctoi_t49_info; break;
+
+ case 50: ctoi_t_offset = (W_)&stg_ctoi_t50_info; break;
+ case 51: ctoi_t_offset = (W_)&stg_ctoi_t51_info; break;
+ case 52: ctoi_t_offset = (W_)&stg_ctoi_t52_info; break;
+ case 53: ctoi_t_offset = (W_)&stg_ctoi_t53_info; break;
+ case 54: ctoi_t_offset = (W_)&stg_ctoi_t54_info; break;
+ case 55: ctoi_t_offset = (W_)&stg_ctoi_t55_info; break;
+ case 56: ctoi_t_offset = (W_)&stg_ctoi_t56_info; break;
+ case 57: ctoi_t_offset = (W_)&stg_ctoi_t57_info; break;
+ case 58: ctoi_t_offset = (W_)&stg_ctoi_t58_info; break;
+ case 59: ctoi_t_offset = (W_)&stg_ctoi_t59_info; break;
+
+ case 60: ctoi_t_offset = (W_)&stg_ctoi_t60_info; break;
+ case 61: ctoi_t_offset = (W_)&stg_ctoi_t61_info; break;
+ case 62: ctoi_t_offset = (W_)&stg_ctoi_t62_info; break;
+
+ default: barf("unsupported tuple size %d", tuple_stack_words);
+ }
+
+ SpW(-4) = ctoi_t_offset;
+ Sp_subW(4);
+ goto nextInsn;
+ }
+
case bci_PUSH_APPLY_N:
Sp_subW(1); SpW(0) = (W_)&stg_ap_n_info;
goto nextInsn;
@@ -1708,6 +1841,12 @@ run_BCO:
Sp_subW(1);
SpW(0) = (W_)&stg_ret_v_info;
goto do_return_unboxed;
+ case bci_RETURN_T: {
+ /* tuple_info and tuple_bco must already be on the stack */
+ Sp_subW(1);
+ SpW(0) = (W_)&stg_ret_t_info;
+ goto do_return_unboxed;
+ }
case bci_SWIZZLE: {
int stkoff = BCO_NEXT;
diff --git a/rts/Printer.c b/rts/Printer.c
index ef9a52719b..7d9614cfd7 100644
--- a/rts/Printer.c
+++ b/rts/Printer.c
@@ -529,17 +529,7 @@ printStackChunk( StgPtr sp, StgPtr spBottom )
case RET_SMALL: {
StgWord c = *sp;
- if (c == (StgWord)&stg_ctoi_R1p_info) {
- debugBelch("tstg_ctoi_ret_R1p_info\n" );
- } else if (c == (StgWord)&stg_ctoi_R1n_info) {
- debugBelch("stg_ctoi_ret_R1n_info\n" );
- } else if (c == (StgWord)&stg_ctoi_F1_info) {
- debugBelch("stg_ctoi_ret_F1_info\n" );
- } else if (c == (StgWord)&stg_ctoi_D1_info) {
- debugBelch("stg_ctoi_ret_D1_info\n" );
- } else if (c == (StgWord)&stg_ctoi_V_info) {
- debugBelch("stg_ctoi_ret_V_info\n" );
- } else if (c == (StgWord)&stg_ap_v_info) {
+ if (c == (StgWord)&stg_ap_v_info) {
debugBelch("stg_ap_v_info\n" );
} else if (c == (StgWord)&stg_ap_f_info) {
debugBelch("stg_ap_f_info\n" );
@@ -595,11 +585,51 @@ printStackChunk( StgPtr sp, StgPtr spBottom )
}
case RET_BCO: {
- StgBCO *bco;
-
- bco = ((StgBCO *)sp[1]);
+ StgWord c = *sp;
+ StgBCO *bco = ((StgBCO *)sp[1]);
- debugBelch("RET_BCO (%p)\n", sp);
+ if (c == (StgWord)&stg_ctoi_R1p_info) {
+ debugBelch("stg_ctoi_R1p_info" );
+ } else if (c == (StgWord)&stg_ctoi_R1unpt_info) {
+ debugBelch("stg_ctoi_R1unpt_info" );
+ } else if (c == (StgWord)&stg_ctoi_R1n_info) {
+ debugBelch("stg_ctoi_R1n_info" );
+ } else if (c == (StgWord)&stg_ctoi_F1_info) {
+ debugBelch("stg_ctoi_F1_info" );
+ } else if (c == (StgWord)&stg_ctoi_D1_info) {
+ debugBelch("stg_ctoi_D1_info" );
+ } else if (c == (StgWord)&stg_ctoi_V_info) {
+ debugBelch("stg_ctoi_V_info" );
+ } else if (c == (StgWord)&stg_BCO_info) {
+ debugBelch("stg_BCO_info" );
+ } else if (c == (StgWord)&stg_apply_interp_info) {
+ debugBelch("stg_apply_interp_info" );
+ } else if (c == (StgWord)&stg_ret_t_info) {
+ debugBelch("stg_ret_t_info" );
+ } else if (c == (StgWord)&stg_ctoi_t0_info) {
+ debugBelch("stg_ctoi_t0_info" );
+ } else if (c == (StgWord)&stg_ctoi_t1_info) {
+ debugBelch("stg_ctoi_t1_info" );
+ } else if (c == (StgWord)&stg_ctoi_t2_info) {
+ debugBelch("stg_ctoi_t2_info" );
+ } else if (c == (StgWord)&stg_ctoi_t3_info) {
+ debugBelch("stg_ctoi_t3_info" );
+ } else if (c == (StgWord)&stg_ctoi_t4_info) {
+ debugBelch("stg_ctoi_t4_info" );
+ } else if (c == (StgWord)&stg_ctoi_t5_info) {
+ debugBelch("stg_ctoi_t5_info" );
+ } else if (c == (StgWord)&stg_ctoi_t6_info) {
+ debugBelch("stg_ctoi_t6_info" );
+ } else if (c == (StgWord)&stg_ctoi_t7_info) {
+ debugBelch("stg_ctoi_t7_info" );
+ } else if (c == (StgWord)&stg_ctoi_t8_info) {
+ debugBelch("stg_ctoi_t8_info" );
+ /* there are more stg_ctoi_tN_info frames,
+ but we don't print them all */
+ } else {
+ debugBelch("RET_BCO");
+ }
+ debugBelch(" (%p)\n", sp);
printLargeBitmap(spBottom, sp+2,
BCO_BITMAP(bco), BCO_BITMAP_SIZE(bco));
continue;
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index 50a3bae267..3a9f568ed4 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -561,6 +561,8 @@
SymI_HasProto(stg_ret_f_info) \
SymI_HasProto(stg_ret_d_info) \
SymI_HasProto(stg_ret_l_info) \
+ SymI_HasProto(stg_ret_t_info) \
+ SymI_HasProto(stg_ctoi_t) \
SymI_HasProto(stg_gc_prim_p) \
SymI_HasProto(stg_gc_prim_pp) \
SymI_HasProto(stg_gc_prim_n) \
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index 7a8f20dded..b9379ab3e6 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -195,6 +195,274 @@ INFO_TABLE_RET( stg_ctoi_V, RET_BCO )
jump stg_yield_to_interpreter [];
}
+/* Note [GHCi unboxed tuples stack spills]
+
+ In the calling convention for compiled code, a tuple is returned
+ in registers, with everything that doesn't fit spilled onto the STG
+ stack.
+
+ At the time the continuation is called, Sp points to the highest word
+ used on the stack:
+
+ ...
+ stg_ctoi_t (next stack frame, continuation)
+ spilled_1
+ spilled_2
+ spilled_3 <- Sp
+
+ This makes it difficult to write a procedure that can handle tuples of
+ any size.
+
+ To get around this, we use a Cmm procedure that adjusts the stack pointer
+ to skip over the tuple:
+
+ ...
+ stg_ctoi_t3 (advances Sp by 3 words, then calls stg_ctoi_t)
+ spilled_1
+ spilled_2
+ spilled_3 <- Sp
+
+ When stg_ctoi_t is called, the stack looks like:
+
+ ...
+ tuple_BCO
+ tuple_info
+ cont_BCO (continuation in bytecode)
+ stg_ctoi_t3 <- Sp
+ spilled_1
+ spilled_2
+ spilled_3
+
+ stg_ctoi_t then reads the tuple_info word to determine the registers
+ to save onto the stack and construct a call to tuple_BCO. Afterwards the
+ stack looks as follows:
+
+ ...
+ tuple_BCO
+ tuple_info
+ cont_BCO
+ stg_ctoi_t3
+ spilled_1
+ spilled_2
+ spilled_3
+ saved_R2
+ saved_R1
+ saved_D3
+ ...
+ tuple_BCO
+ stg_apply_interp <- Sp
+
+
+ tuple_BCO contains the bytecode instructions to return the tuple to
+ cont_BCO. The bitmap in tuple_BCO describes the contents of
+ the tuple to the storage manager.
+
+ At this point we can safely jump to the interpreter.
+
+ */
+
+#define MK_STG_CTOI_T(N) INFO_TABLE_RET( \
+ stg_ctoi_t ## N, RET_BCO ) \
+ { Sp_adj(N); jump stg_ctoi_t [*]; }
+
+MK_STG_CTOI_T(0)
+MK_STG_CTOI_T(1)
+MK_STG_CTOI_T(2)
+MK_STG_CTOI_T(3)
+MK_STG_CTOI_T(4)
+MK_STG_CTOI_T(5)
+MK_STG_CTOI_T(6)
+MK_STG_CTOI_T(7)
+MK_STG_CTOI_T(8)
+MK_STG_CTOI_T(9)
+
+MK_STG_CTOI_T(10)
+MK_STG_CTOI_T(11)
+MK_STG_CTOI_T(12)
+MK_STG_CTOI_T(13)
+MK_STG_CTOI_T(14)
+MK_STG_CTOI_T(15)
+MK_STG_CTOI_T(16)
+MK_STG_CTOI_T(17)
+MK_STG_CTOI_T(18)
+MK_STG_CTOI_T(19)
+
+MK_STG_CTOI_T(20)
+MK_STG_CTOI_T(21)
+MK_STG_CTOI_T(22)
+MK_STG_CTOI_T(23)
+MK_STG_CTOI_T(24)
+MK_STG_CTOI_T(25)
+MK_STG_CTOI_T(26)
+MK_STG_CTOI_T(27)
+MK_STG_CTOI_T(28)
+MK_STG_CTOI_T(29)
+
+MK_STG_CTOI_T(30)
+MK_STG_CTOI_T(31)
+MK_STG_CTOI_T(32)
+MK_STG_CTOI_T(33)
+MK_STG_CTOI_T(34)
+MK_STG_CTOI_T(35)
+MK_STG_CTOI_T(36)
+MK_STG_CTOI_T(37)
+MK_STG_CTOI_T(38)
+MK_STG_CTOI_T(39)
+
+MK_STG_CTOI_T(40)
+MK_STG_CTOI_T(41)
+MK_STG_CTOI_T(42)
+MK_STG_CTOI_T(43)
+MK_STG_CTOI_T(44)
+MK_STG_CTOI_T(45)
+MK_STG_CTOI_T(46)
+MK_STG_CTOI_T(47)
+MK_STG_CTOI_T(48)
+MK_STG_CTOI_T(49)
+
+MK_STG_CTOI_T(50)
+MK_STG_CTOI_T(51)
+MK_STG_CTOI_T(52)
+MK_STG_CTOI_T(53)
+MK_STG_CTOI_T(54)
+MK_STG_CTOI_T(55)
+MK_STG_CTOI_T(56)
+MK_STG_CTOI_T(57)
+MK_STG_CTOI_T(58)
+MK_STG_CTOI_T(59)
+
+MK_STG_CTOI_T(60)
+MK_STG_CTOI_T(61)
+MK_STG_CTOI_T(62)
+
+/*
+ Note [GHCi tuple layout]
+
+ the tuple_info word describes the register and stack usage of the tuple:
+
+ [ rrrr ffff ffdd dddd llss ssss ssss ssss ]
+
+ - r: number of vanilla registers R1..Rn
+ - f: bitmap of float registers F1..F6
+ - d: bitmap of double registers D1..D6
+ - l: bitmap of long registers L1..Ln
+ - s: number of words on stack (in addition to registers)
+
+ The order in which the registers are pushed on the stack is determined by
+ the Ord instance of GHC.Cmm.Expr.GlobalReg. If you change the Ord instance,
+ the order in stg_ctoi_t and stg_ret_t needs to be adjusted accordingly.
+
+ */
+
+stg_ctoi_t
+ /* explicit stack */
+{
+
+ W_ tuple_info, tuple_stack, tuple_regs_R,
+ tuple_regs_F, tuple_regs_D, tuple_regs_L;
+ P_ tuple_BCO;
+
+ tuple_info = Sp(2); /* tuple information word */
+ tuple_BCO = Sp(3); /* bytecode object that returns the tuple in
+ the interpreter */
+
+#if defined(PROFILING)
+ CCCS = Sp(4);
+#endif
+
+ tuple_stack = tuple_info & 0x3fff; /* number of words spilled on stack */
+ tuple_regs_R = (tuple_info >> 28) & 0xf; /* number of R1..Rn */
+ tuple_regs_F = (tuple_info >> 22) & 0x3f; /* 6 bits bitmap */
+ tuple_regs_D = (tuple_info >> 16) & 0x3f; /* 6 bits bitmap */
+ tuple_regs_L = (tuple_info >> 14) & 0x3; /* 2 bits bitmap */
+
+ Sp = Sp - WDS(tuple_stack);
+
+ /* save long registers */
+ /* fixme L2 ? */
+ if((tuple_regs_L & 1) != 0) { Sp = Sp - 8; L_[Sp] = L1; }
+
+ /* save double registers */
+ if((tuple_regs_D & 32) != 0) { Sp = Sp - SIZEOF_DOUBLE; D_[Sp] = D6; }
+ if((tuple_regs_D & 16) != 0) { Sp = Sp - SIZEOF_DOUBLE; D_[Sp] = D5; }
+ if((tuple_regs_D & 8) != 0) { Sp = Sp - SIZEOF_DOUBLE; D_[Sp] = D4; }
+ if((tuple_regs_D & 4) != 0) { Sp = Sp - SIZEOF_DOUBLE; D_[Sp] = D3; }
+ if((tuple_regs_D & 2) != 0) { Sp = Sp - SIZEOF_DOUBLE; D_[Sp] = D2; }
+ if((tuple_regs_D & 1) != 0) { Sp = Sp - SIZEOF_DOUBLE; D_[Sp] = D1; }
+
+ /* save float registers */
+ if((tuple_regs_F & 32) != 0) { Sp_adj(-1); F_[Sp] = F6; }
+ if((tuple_regs_F & 16) != 0) { Sp_adj(-1); F_[Sp] = F5; }
+ if((tuple_regs_F & 8) != 0) { Sp_adj(-1); F_[Sp] = F4; }
+ if((tuple_regs_F & 4) != 0) { Sp_adj(-1); F_[Sp] = F3; }
+ if((tuple_regs_F & 2) != 0) { Sp_adj(-1); F_[Sp] = F2; }
+ if((tuple_regs_F & 1) != 0) { Sp_adj(-1); F_[Sp] = F1; }
+
+ /* save vanilla registers */
+ if(tuple_regs_R >= 6) { Sp_adj(-1); Sp(0) = R6; }
+ if(tuple_regs_R >= 5) { Sp_adj(-1); Sp(0) = R5; }
+ if(tuple_regs_R >= 4) { Sp_adj(-1); Sp(0) = R4; }
+ if(tuple_regs_R >= 3) { Sp_adj(-1); Sp(0) = R3; }
+ if(tuple_regs_R >= 2) { Sp_adj(-1); Sp(0) = R2; }
+ if(tuple_regs_R >= 1) { Sp_adj(-1); Sp(0) = R1; }
+
+ /* jump to the BCO that will finish the return of the tuple */
+ Sp_adj(-3);
+ Sp(2) = tuple_info;
+ Sp(1) = tuple_BCO;
+ Sp(0) = stg_ret_t_info;
+
+ jump stg_yield_to_interpreter [];
+}
+
+INFO_TABLE_RET( stg_ret_t, RET_BCO )
+{
+ W_ tuple_info, tuple_stack, tuple_regs_R, tuple_regs_F,
+ tuple_regs_D, tuple_regs_L;
+
+ tuple_info = Sp(2);
+ Sp_adj(3);
+
+ tuple_stack = tuple_info & 0x3fff; /* number of words spilled on stack */
+ tuple_regs_R = (tuple_info >> 28) & 0xf; /* number of R1..Rn */
+ tuple_regs_F = (tuple_info >> 22) & 0x3f; /* 6 bits bitmap */
+ tuple_regs_D = (tuple_info >> 16) & 0x3f; /* 6 bits bitmap */
+ tuple_regs_L = (tuple_info >> 14) & 0x3; /* 2 bits bitmap */
+
+ /* restore everything in the reverse order of stg_ctoi_t */
+
+ /* restore vanilla registers */
+ if(tuple_regs_R >= 1) { R1 = Sp(0); Sp_adj(1); }
+ if(tuple_regs_R >= 2) { R2 = Sp(0); Sp_adj(1); }
+ if(tuple_regs_R >= 3) { R3 = Sp(0); Sp_adj(1); }
+ if(tuple_regs_R >= 4) { R4 = Sp(0); Sp_adj(1); }
+ if(tuple_regs_R >= 5) { R5 = Sp(0); Sp_adj(1); }
+ if(tuple_regs_R >= 6) { R6 = Sp(0); Sp_adj(1); }
+
+ /* restore float registers */
+ if((tuple_regs_F & 1) != 0) { F1 = F_[Sp]; Sp_adj(1); }
+ if((tuple_regs_F & 2) != 0) { F2 = F_[Sp]; Sp_adj(1); }
+ if((tuple_regs_F & 4) != 0) { F3 = F_[Sp]; Sp_adj(1); }
+ if((tuple_regs_F & 8) != 0) { F4 = F_[Sp]; Sp_adj(1); }
+ if((tuple_regs_F & 16) != 0) { F5 = F_[Sp]; Sp_adj(1); }
+ if((tuple_regs_F & 32) != 0) { F6 = F_[Sp]; Sp_adj(1); }
+
+ /* restore double registers */
+ if((tuple_regs_D & 1) != 0) { D1 = D_[Sp]; Sp = Sp + SIZEOF_DOUBLE; }
+ if((tuple_regs_D & 2) != 0) { D2 = D_[Sp]; Sp = Sp + SIZEOF_DOUBLE; }
+ if((tuple_regs_D & 4) != 0) { D3 = D_[Sp]; Sp = Sp + SIZEOF_DOUBLE; }
+ if((tuple_regs_D & 8) != 0) { D4 = D_[Sp]; Sp = Sp + SIZEOF_DOUBLE; }
+ if((tuple_regs_D & 16) != 0) { D5 = D_[Sp]; Sp = Sp + SIZEOF_DOUBLE; }
+ if((tuple_regs_D & 32) != 0) { D6 = D_[Sp]; Sp = Sp + SIZEOF_DOUBLE; }
+
+ /* restore long registers */
+ if((tuple_regs_L & 1) != 0) { L1 = L_[Sp]; Sp = Sp + 8; }
+
+ /* Sp points to the topmost argument now */
+ jump %ENTRY_CODE(Sp(tuple_stack)) [*]; // NB. all registers live!
+}
+
+
/*
* Dummy info table pushed on the top of the stack when the interpreter
* should apply the BCO on the stack to its arguments, also on the