Generate GHCi bytecode from STG instead of Core and support unboxed

tuples and sums. fixes #1257
author: Luite Stegeman <stegeman@gmail.com> 2021-01-22 00:09:17 +0100
committer: Marge Bot <ben+marge-bot@smart-cactus.org> 2021-03-20 07:49:15 -0400
commit: 1f94e0f7601f8e22fdd81a47f130650265a44196 (patch)
tree: d06d02317049b56763b2f1da27f71f3663efa5a0 /rts
parent: 7de3532f0317032f75b76150c5d3a6f76178be04 (diff)
download: haskell-1f94e0f7601f8e22fdd81a47f130650265a44196.tar.gz
5 files changed, 466 insertions, 23 deletions
diff --git a/rts/Disassembler.c b/rts/Disassembler.c
index 67a451e7e6..451521d57e 100644
--- a/rts/Disassembler.c
+++ b/rts/Disassembler.c
@@ -148,6 +148,13 @@ disInstr ( StgBCO *bco, int pc )
          debugBelch("PUSH_ALTS_V  " ); printPtr( ptrs[instrs[pc]] );
          debugBelch("\n");
          pc += 1; break;
+      case bci_PUSH_ALTS_T:
+         debugBelch("PUSH_ALTS_T  ");
+         printPtr( ptrs[instrs[pc]] );
+         debugBelch(" 0x%" FMT_HexWord " ", literals[instrs[pc+1]] );
+         printPtr( ptrs[instrs[pc+2]] );
+         debugBelch("\n");
+         pc += 3; break;
       case bci_PUSH_PAD8:
          debugBelch("PUSH_PAD8\n");
          pc += 1; break;
@@ -310,6 +317,9 @@ disInstr ( StgBCO *bco, int pc )
       case bci_RETURN_V:
          debugBelch("RETURN_V\n" );
          break;
+      case bci_RETURN_T:
+         debugBelch("RETURN_T\n ");
+         break;
 
       default:
          barf("disInstr: unknown opcode %u", (unsigned int) instr);
@@ -317,12 +327,6 @@ disInstr ( StgBCO *bco, int pc )
    return pc;
 }
 
-
-/* Something of a kludge .. how do we know where the end of the insn
-   array is, since it isn't recorded anywhere?  Answer: the first
-   short is the number of bytecodes which follow it.
-   See GHC.CoreToByteCode.linkBCO.insns_arr for construction ...
-*/
 void disassemble( StgBCO *bco )
 {
    uint32_t i, j;
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index 6929aec5fd..efbfd091d8 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -4,6 +4,7 @@
  * Copyright (c) The GHC Team, 1994-2002.
  * ---------------------------------------------------------------------------*/
 
+
 #include "PosixSource.h"
 #include "Rts.h"
 #include "RtsAPI.h"
@@ -681,12 +682,13 @@ do_return_unboxed:
                 || SpW(0) == (W_)&stg_ret_f_info
                 || SpW(0) == (W_)&stg_ret_d_info
                 || SpW(0) == (W_)&stg_ret_l_info
+                || SpW(0) == (W_)&stg_ret_t_info
             );
 
         IF_DEBUG(interpreter,
              debugBelch(
              "\n---------------------------------------------------------------\n");
-             debugBelch("Returning: "); printObj(obj);
+             debugBelch("Returning unboxed\n");
              debugBelch("Sp = %p\n", Sp);
 #if defined(PROFILING)
              fprintCCS(stderr, cap->r.rCCCS);
@@ -697,7 +699,7 @@ do_return_unboxed:
              debugBelch("\n\n");
             );
 
-        // get the offset of the stg_ctoi_ret_XXX itbl
+        // get the offset of the header of the next stack frame
         offset = stack_frame_sizeW((StgClosure *)Sp);
 
         switch (get_itbl((StgClosure*)(Sp_plusW(offset)))->type) {
@@ -934,6 +936,43 @@ run_BCO_return_unboxed:
     // Stack checks aren't necessary at return points, the stack use
     // is aggregated into the enclosing function entry point.
 
+#if defined(PROFILING)
+    /*
+       Restore the current cost centre stack if a tuple is being returned.
+
+       When a "simple" unboxed value is returned, the cccs is restored with
+       an stg_restore_cccs frame on the stack, for example:
+
+           ...
+           stg_ctoi_D1
+           <CCCS>
+           stg_restore_cccs
+
+       But stg_restore_cccs cannot deal with tuples, which may have more
+       things on the stack. Therefore we store the CCCS inside the
+       stg_ctoi_t frame.
+
+       If we have a tuple being returned, the stack looks like this:
+
+           ...
+           <CCCS>           <- to restore, Sp offset <next frame + 4 words>
+           tuple_BCO
+           tuple_info
+           cont_BCO
+           stg_ctoi_t       <- next frame
+           tuple_data_1
+           ...
+           tuple_data_n
+           tuple_info
+           tuple_BCO
+           stg_ret_t        <- Sp
+     */
+
+    if(SpW(0) == (W_)&stg_ret_t_info) {
+        cap->r.rCCCS = (CostCentreStack*)SpW(stack_frame_sizeW((StgClosure *)Sp) + 4);
+    }
+#endif
+
     goto run_BCO;
 
 run_BCO_fun:
@@ -1329,6 +1368,100 @@ run_BCO:
             goto nextInsn;
         }
 
+        case bci_PUSH_ALTS_T: {
+            int o_bco = BCO_GET_LARGE_ARG;
+            W_ tuple_info = (W_)BCO_LIT(BCO_GET_LARGE_ARG);
+            int o_tuple_bco = BCO_GET_LARGE_ARG;
+
+#if defined(PROFILING)
+            SpW(-1) = (W_)cap->r.rCCCS;
+            Sp_subW(1);
+#endif
+
+            SpW(-1) = BCO_PTR(o_tuple_bco);
+            SpW(-2) = tuple_info;
+            SpW(-3) = BCO_PTR(o_bco);
+            W_ ctoi_t_offset;
+            int tuple_stack_words = tuple_info & 0x3fff;
+            switch(tuple_stack_words) {
+                case 0:  ctoi_t_offset = (W_)&stg_ctoi_t0_info;  break;
+                case 1:  ctoi_t_offset = (W_)&stg_ctoi_t1_info;  break;
+                case 2:  ctoi_t_offset = (W_)&stg_ctoi_t2_info;  break;
+                case 3:  ctoi_t_offset = (W_)&stg_ctoi_t3_info;  break;
+                case 4:  ctoi_t_offset = (W_)&stg_ctoi_t4_info;  break;
+                case 5:  ctoi_t_offset = (W_)&stg_ctoi_t5_info;  break;
+                case 6:  ctoi_t_offset = (W_)&stg_ctoi_t6_info;  break;
+                case 7:  ctoi_t_offset = (W_)&stg_ctoi_t7_info;  break;
+                case 8:  ctoi_t_offset = (W_)&stg_ctoi_t8_info;  break;
+                case 9:  ctoi_t_offset = (W_)&stg_ctoi_t9_info;  break;
+
+                case 10: ctoi_t_offset = (W_)&stg_ctoi_t10_info; break;
+                case 11: ctoi_t_offset = (W_)&stg_ctoi_t11_info; break;
+                case 12: ctoi_t_offset = (W_)&stg_ctoi_t12_info; break;
+                case 13: ctoi_t_offset = (W_)&stg_ctoi_t13_info; break;
+                case 14: ctoi_t_offset = (W_)&stg_ctoi_t14_info; break;
+                case 15: ctoi_t_offset = (W_)&stg_ctoi_t15_info; break;
+                case 16: ctoi_t_offset = (W_)&stg_ctoi_t16_info; break;
+                case 17: ctoi_t_offset = (W_)&stg_ctoi_t17_info; break;
+                case 18: ctoi_t_offset = (W_)&stg_ctoi_t18_info; break;
+                case 19: ctoi_t_offset = (W_)&stg_ctoi_t19_info; break;
+
+                case 20: ctoi_t_offset = (W_)&stg_ctoi_t20_info; break;
+                case 21: ctoi_t_offset = (W_)&stg_ctoi_t21_info; break;
+                case 22: ctoi_t_offset = (W_)&stg_ctoi_t22_info; break;
+                case 23: ctoi_t_offset = (W_)&stg_ctoi_t23_info; break;
+                case 24: ctoi_t_offset = (W_)&stg_ctoi_t24_info; break;
+                case 25: ctoi_t_offset = (W_)&stg_ctoi_t25_info; break;
+                case 26: ctoi_t_offset = (W_)&stg_ctoi_t26_info; break;
+                case 27: ctoi_t_offset = (W_)&stg_ctoi_t27_info; break;
+                case 28: ctoi_t_offset = (W_)&stg_ctoi_t28_info; break;
+                case 29: ctoi_t_offset = (W_)&stg_ctoi_t29_info; break;
+
+                case 30: ctoi_t_offset = (W_)&stg_ctoi_t30_info; break;
+                case 31: ctoi_t_offset = (W_)&stg_ctoi_t31_info; break;
+                case 32: ctoi_t_offset = (W_)&stg_ctoi_t32_info; break;
+                case 33: ctoi_t_offset = (W_)&stg_ctoi_t33_info; break;
+                case 34: ctoi_t_offset = (W_)&stg_ctoi_t34_info; break;
+                case 35: ctoi_t_offset = (W_)&stg_ctoi_t35_info; break;
+                case 36: ctoi_t_offset = (W_)&stg_ctoi_t36_info; break;
+                case 37: ctoi_t_offset = (W_)&stg_ctoi_t37_info; break;
+                case 38: ctoi_t_offset = (W_)&stg_ctoi_t38_info; break;
+                case 39: ctoi_t_offset = (W_)&stg_ctoi_t39_info; break;
+
+                case 40: ctoi_t_offset = (W_)&stg_ctoi_t40_info; break;
+                case 41: ctoi_t_offset = (W_)&stg_ctoi_t41_info; break;
+                case 42: ctoi_t_offset = (W_)&stg_ctoi_t42_info; break;
+                case 43: ctoi_t_offset = (W_)&stg_ctoi_t43_info; break;
+                case 44: ctoi_t_offset = (W_)&stg_ctoi_t44_info; break;
+                case 45: ctoi_t_offset = (W_)&stg_ctoi_t45_info; break;
+                case 46: ctoi_t_offset = (W_)&stg_ctoi_t46_info; break;
+                case 47: ctoi_t_offset = (W_)&stg_ctoi_t47_info; break;
+                case 48: ctoi_t_offset = (W_)&stg_ctoi_t48_info; break;
+                case 49: ctoi_t_offset = (W_)&stg_ctoi_t49_info; break;
+
+                case 50: ctoi_t_offset = (W_)&stg_ctoi_t50_info; break;
+                case 51: ctoi_t_offset = (W_)&stg_ctoi_t51_info; break;
+                case 52: ctoi_t_offset = (W_)&stg_ctoi_t52_info; break;
+                case 53: ctoi_t_offset = (W_)&stg_ctoi_t53_info; break;
+                case 54: ctoi_t_offset = (W_)&stg_ctoi_t54_info; break;
+                case 55: ctoi_t_offset = (W_)&stg_ctoi_t55_info; break;
+                case 56: ctoi_t_offset = (W_)&stg_ctoi_t56_info; break;
+                case 57: ctoi_t_offset = (W_)&stg_ctoi_t57_info; break;
+                case 58: ctoi_t_offset = (W_)&stg_ctoi_t58_info; break;
+                case 59: ctoi_t_offset = (W_)&stg_ctoi_t59_info; break;
+
+                case 60: ctoi_t_offset = (W_)&stg_ctoi_t60_info; break;
+                case 61: ctoi_t_offset = (W_)&stg_ctoi_t61_info; break;
+                case 62: ctoi_t_offset = (W_)&stg_ctoi_t62_info; break;
+
+                default: barf("unsupported tuple size %d", tuple_stack_words);
+            }
+
+            SpW(-4) = ctoi_t_offset;
+            Sp_subW(4);
+            goto nextInsn;
+        }
+
         case bci_PUSH_APPLY_N:
             Sp_subW(1); SpW(0) = (W_)&stg_ap_n_info;
             goto nextInsn;
@@ -1708,6 +1841,12 @@ run_BCO:
             Sp_subW(1);
             SpW(0) = (W_)&stg_ret_v_info;
             goto do_return_unboxed;
+        case bci_RETURN_T: {
+            /* tuple_info and tuple_bco must already be on the stack */
+            Sp_subW(1);
+            SpW(0) = (W_)&stg_ret_t_info;
+            goto do_return_unboxed;
+        }
 
         case bci_SWIZZLE: {
             int stkoff = BCO_NEXT;
diff --git a/rts/Printer.c b/rts/Printer.c
index ef9a52719b..7d9614cfd7 100644
--- a/rts/Printer.c
+++ b/rts/Printer.c
@@ -529,17 +529,7 @@ printStackChunk( StgPtr sp, StgPtr spBottom )
 
         case RET_SMALL: {
             StgWord c = *sp;
-            if (c == (StgWord)&stg_ctoi_R1p_info) {
-                debugBelch("tstg_ctoi_ret_R1p_info\n" );
-            } else if (c == (StgWord)&stg_ctoi_R1n_info) {
-                debugBelch("stg_ctoi_ret_R1n_info\n" );
-            } else if (c == (StgWord)&stg_ctoi_F1_info) {
-                debugBelch("stg_ctoi_ret_F1_info\n" );
-            } else if (c == (StgWord)&stg_ctoi_D1_info) {
-                debugBelch("stg_ctoi_ret_D1_info\n" );
-            } else if (c == (StgWord)&stg_ctoi_V_info) {
-                debugBelch("stg_ctoi_ret_V_info\n" );
-            } else if (c == (StgWord)&stg_ap_v_info) {
+            if (c == (StgWord)&stg_ap_v_info) {
                 debugBelch("stg_ap_v_info\n" );
             } else if (c == (StgWord)&stg_ap_f_info) {
                 debugBelch("stg_ap_f_info\n" );
@@ -595,11 +585,51 @@ printStackChunk( StgPtr sp, StgPtr spBottom )
         }
 
         case RET_BCO: {
-            StgBCO *bco;
-
-            bco = ((StgBCO *)sp[1]);
+            StgWord c = *sp;
+            StgBCO *bco = ((StgBCO *)sp[1]);
 
-            debugBelch("RET_BCO (%p)\n", sp);
+            if (c == (StgWord)&stg_ctoi_R1p_info) {
+                debugBelch("stg_ctoi_R1p_info" );
+            } else if (c == (StgWord)&stg_ctoi_R1unpt_info) {
+                debugBelch("stg_ctoi_R1unpt_info" );
+            } else if (c == (StgWord)&stg_ctoi_R1n_info) {
+                debugBelch("stg_ctoi_R1n_info" );
+            } else if (c == (StgWord)&stg_ctoi_F1_info) {
+                debugBelch("stg_ctoi_F1_info" );
+            } else if (c == (StgWord)&stg_ctoi_D1_info) {
+                debugBelch("stg_ctoi_D1_info" );
+            } else if (c == (StgWord)&stg_ctoi_V_info) {
+                debugBelch("stg_ctoi_V_info" );
+            } else if (c == (StgWord)&stg_BCO_info) {
+                debugBelch("stg_BCO_info" );
+            } else if (c == (StgWord)&stg_apply_interp_info) {
+                debugBelch("stg_apply_interp_info" );
+            } else if (c == (StgWord)&stg_ret_t_info) {
+                debugBelch("stg_ret_t_info" );
+            } else if (c == (StgWord)&stg_ctoi_t0_info) {
+                debugBelch("stg_ctoi_t0_info" );
+            } else if (c == (StgWord)&stg_ctoi_t1_info) {
+                debugBelch("stg_ctoi_t1_info" );
+            } else if (c == (StgWord)&stg_ctoi_t2_info) {
+                debugBelch("stg_ctoi_t2_info" );
+            } else if (c == (StgWord)&stg_ctoi_t3_info) {
+                debugBelch("stg_ctoi_t3_info" );
+            } else if (c == (StgWord)&stg_ctoi_t4_info) {
+                debugBelch("stg_ctoi_t4_info" );
+            } else if (c == (StgWord)&stg_ctoi_t5_info) {
+                debugBelch("stg_ctoi_t5_info" );
+            } else if (c == (StgWord)&stg_ctoi_t6_info) {
+                debugBelch("stg_ctoi_t6_info" );
+            } else if (c == (StgWord)&stg_ctoi_t7_info) {
+                debugBelch("stg_ctoi_t7_info" );
+            } else if (c == (StgWord)&stg_ctoi_t8_info) {
+                debugBelch("stg_ctoi_t8_info" );
+            /* there are more stg_ctoi_tN_info frames,
+               but we don't print them all */
+            } else {
+                debugBelch("RET_BCO");
+            }
+            debugBelch(" (%p)\n", sp);
             printLargeBitmap(spBottom, sp+2,
                              BCO_BITMAP(bco), BCO_BITMAP_SIZE(bco));
             continue;
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index 50a3bae267..3a9f568ed4 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -561,6 +561,8 @@
       SymI_HasProto(stg_ret_f_info)                                     \
       SymI_HasProto(stg_ret_d_info)                                     \
       SymI_HasProto(stg_ret_l_info)                                     \
+      SymI_HasProto(stg_ret_t_info)                                     \
+      SymI_HasProto(stg_ctoi_t)                                         \
       SymI_HasProto(stg_gc_prim_p)                                      \
       SymI_HasProto(stg_gc_prim_pp)                                     \
       SymI_HasProto(stg_gc_prim_n)                                      \
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index 7a8f20dded..b9379ab3e6 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -195,6 +195,274 @@ INFO_TABLE_RET( stg_ctoi_V, RET_BCO )
     jump stg_yield_to_interpreter [];
 }
 
+/*   Note [GHCi unboxed tuples stack spills]
+
+   In the calling convention for compiled code, a tuple is returned
+   in registers, with everything that doesn't fit spilled onto the STG
+   stack.
+
+   At the time the continuation is called, Sp points to the highest word
+   used on the stack:
+
+       ...
+       stg_ctoi_t  (next stack frame, continuation)
+       spilled_1
+       spilled_2
+       spilled_3   <- Sp
+
+   This makes it difficult to write a procedure that can handle tuples of
+   any size.
+
+   To get around this, we use a Cmm procedure that adjusts the stack pointer
+   to skip over the tuple:
+
+       ...
+       stg_ctoi_t3  (advances Sp by 3 words, then calls stg_ctoi_t)
+       spilled_1
+       spilled_2
+       spilled_3    <- Sp
+  
+   When stg_ctoi_t is called, the stack looks like:
+
+       ...
+       tuple_BCO
+       tuple_info
+       cont_BCO     (continuation in bytecode)
+       stg_ctoi_t3  <- Sp
+       spilled_1
+       spilled_2
+       spilled_3
+
+   stg_ctoi_t then reads the tuple_info word to determine the registers
+   to save onto the stack and construct a call to tuple_BCO. Afterwards the
+   stack looks as follows:
+
+       ...
+       tuple_BCO
+       tuple_info
+       cont_BCO
+       stg_ctoi_t3
+       spilled_1
+       spilled_2
+       spilled_3
+       saved_R2
+       saved_R1
+       saved_D3
+       ...
+       tuple_BCO
+       stg_apply_interp <- Sp
+
+
+   tuple_BCO contains the bytecode instructions to return the tuple to
+   cont_BCO. The bitmap in tuple_BCO describes the contents of
+   the tuple to the storage manager.
+
+   At this point we can safely jump to the interpreter.
+
+ */
+
+#define MK_STG_CTOI_T(N) INFO_TABLE_RET( \
+  stg_ctoi_t ## N, RET_BCO ) \
+  { Sp_adj(N); jump stg_ctoi_t [*]; }
+
+MK_STG_CTOI_T(0)
+MK_STG_CTOI_T(1)
+MK_STG_CTOI_T(2)
+MK_STG_CTOI_T(3)
+MK_STG_CTOI_T(4)
+MK_STG_CTOI_T(5)
+MK_STG_CTOI_T(6)
+MK_STG_CTOI_T(7)
+MK_STG_CTOI_T(8)
+MK_STG_CTOI_T(9)
+
+MK_STG_CTOI_T(10)
+MK_STG_CTOI_T(11)
+MK_STG_CTOI_T(12)
+MK_STG_CTOI_T(13)
+MK_STG_CTOI_T(14)
+MK_STG_CTOI_T(15)
+MK_STG_CTOI_T(16)
+MK_STG_CTOI_T(17)
+MK_STG_CTOI_T(18)
+MK_STG_CTOI_T(19)
+
+MK_STG_CTOI_T(20)
+MK_STG_CTOI_T(21)
+MK_STG_CTOI_T(22)
+MK_STG_CTOI_T(23)
+MK_STG_CTOI_T(24)
+MK_STG_CTOI_T(25)
+MK_STG_CTOI_T(26)
+MK_STG_CTOI_T(27)
+MK_STG_CTOI_T(28)
+MK_STG_CTOI_T(29)
+
+MK_STG_CTOI_T(30)
+MK_STG_CTOI_T(31)
+MK_STG_CTOI_T(32)
+MK_STG_CTOI_T(33)
+MK_STG_CTOI_T(34)
+MK_STG_CTOI_T(35)
+MK_STG_CTOI_T(36)
+MK_STG_CTOI_T(37)
+MK_STG_CTOI_T(38)
+MK_STG_CTOI_T(39)
+
+MK_STG_CTOI_T(40)
+MK_STG_CTOI_T(41)
+MK_STG_CTOI_T(42)
+MK_STG_CTOI_T(43)
+MK_STG_CTOI_T(44)
+MK_STG_CTOI_T(45)
+MK_STG_CTOI_T(46)
+MK_STG_CTOI_T(47)
+MK_STG_CTOI_T(48)
+MK_STG_CTOI_T(49)
+
+MK_STG_CTOI_T(50)
+MK_STG_CTOI_T(51)
+MK_STG_CTOI_T(52)
+MK_STG_CTOI_T(53)
+MK_STG_CTOI_T(54)
+MK_STG_CTOI_T(55)
+MK_STG_CTOI_T(56)
+MK_STG_CTOI_T(57)
+MK_STG_CTOI_T(58)
+MK_STG_CTOI_T(59)
+
+MK_STG_CTOI_T(60)
+MK_STG_CTOI_T(61)
+MK_STG_CTOI_T(62)
+
+/*
+  Note [GHCi tuple layout]
+
+  the tuple_info word describes the register and stack usage of the tuple:
+
+  [ rrrr ffff ffdd dddd llss ssss ssss ssss ]
+
+  - r: number of vanilla registers R1..Rn
+  - f: bitmap of float registers F1..F6
+  - d: bitmap of double registers D1..D6
+  - l: bitmap of long registers L1..Ln
+  - s: number of words on stack (in addition to registers)
+
+  The order in which the registers are pushed on the stack is determined by
+  the Ord instance of GHC.Cmm.Expr.GlobalReg. If you change the Ord instance,
+  the order in stg_ctoi_t and stg_ret_t needs to be adjusted accordingly.
+
+ */
+
+stg_ctoi_t
+    /* explicit stack */
+{
+
+    W_ tuple_info, tuple_stack, tuple_regs_R,
+       tuple_regs_F, tuple_regs_D, tuple_regs_L;
+    P_ tuple_BCO;
+
+    tuple_info = Sp(2); /* tuple information word */
+    tuple_BCO  = Sp(3); /* bytecode object that returns the tuple in
+                           the interpreter */
+
+#if defined(PROFILING)
+    CCCS = Sp(4);
+#endif
+
+    tuple_stack  = tuple_info & 0x3fff; /* number of words spilled on stack */
+    tuple_regs_R = (tuple_info >> 28) & 0xf;  /* number of R1..Rn */
+    tuple_regs_F = (tuple_info >> 22) & 0x3f; /* 6 bits bitmap */
+    tuple_regs_D = (tuple_info >> 16) & 0x3f; /* 6 bits bitmap */
+    tuple_regs_L = (tuple_info >> 14) & 0x3;  /* 2 bits bitmap */
+
+    Sp = Sp - WDS(tuple_stack);
+
+    /* save long registers */
+    /* fixme L2 ? */
+    if((tuple_regs_L & 1) != 0) { Sp = Sp - 8; L_[Sp] = L1; }
+
+    /* save double registers */
+    if((tuple_regs_D & 32) != 0) { Sp = Sp - SIZEOF_DOUBLE; D_[Sp] = D6; }
+    if((tuple_regs_D & 16) != 0) { Sp = Sp - SIZEOF_DOUBLE; D_[Sp] = D5; }
+    if((tuple_regs_D & 8)  != 0) { Sp = Sp - SIZEOF_DOUBLE; D_[Sp] = D4; }
+    if((tuple_regs_D & 4)  != 0) { Sp = Sp - SIZEOF_DOUBLE; D_[Sp] = D3; }
+    if((tuple_regs_D & 2)  != 0) { Sp = Sp - SIZEOF_DOUBLE; D_[Sp] = D2; }
+    if((tuple_regs_D & 1)  != 0) { Sp = Sp - SIZEOF_DOUBLE; D_[Sp] = D1; }
+
+    /* save float registers */
+    if((tuple_regs_F & 32) != 0) { Sp_adj(-1); F_[Sp] = F6; }
+    if((tuple_regs_F & 16) != 0) { Sp_adj(-1); F_[Sp] = F5; }
+    if((tuple_regs_F & 8)  != 0) { Sp_adj(-1); F_[Sp] = F4; }
+    if((tuple_regs_F & 4)  != 0) { Sp_adj(-1); F_[Sp] = F3; }
+    if((tuple_regs_F & 2)  != 0) { Sp_adj(-1); F_[Sp] = F2; }
+    if((tuple_regs_F & 1)  != 0) { Sp_adj(-1); F_[Sp] = F1; }
+
+    /* save vanilla registers */
+    if(tuple_regs_R >= 6) { Sp_adj(-1); Sp(0) = R6; }
+    if(tuple_regs_R >= 5) { Sp_adj(-1); Sp(0) = R5; }
+    if(tuple_regs_R >= 4) { Sp_adj(-1); Sp(0) = R4; }
+    if(tuple_regs_R >= 3) { Sp_adj(-1); Sp(0) = R3; }
+    if(tuple_regs_R >= 2) { Sp_adj(-1); Sp(0) = R2; }
+    if(tuple_regs_R >= 1) { Sp_adj(-1); Sp(0) = R1; }
+
+    /* jump to the BCO that will finish the return of the tuple */
+    Sp_adj(-3);
+    Sp(2) = tuple_info;
+    Sp(1) = tuple_BCO;
+    Sp(0) = stg_ret_t_info;
+
+    jump stg_yield_to_interpreter [];
+}
+
+INFO_TABLE_RET( stg_ret_t, RET_BCO )
+{
+    W_ tuple_info, tuple_stack, tuple_regs_R, tuple_regs_F,
+       tuple_regs_D, tuple_regs_L;
+
+    tuple_info = Sp(2);
+    Sp_adj(3);
+
+    tuple_stack  = tuple_info & 0x3fff; /* number of words spilled on stack */
+    tuple_regs_R = (tuple_info >> 28) & 0xf;  /* number of R1..Rn */
+    tuple_regs_F = (tuple_info >> 22) & 0x3f; /* 6 bits bitmap */
+    tuple_regs_D = (tuple_info >> 16) & 0x3f; /* 6 bits bitmap */
+    tuple_regs_L = (tuple_info >> 14) & 0x3;  /* 2 bits bitmap */
+
+    /* restore everything in the reverse order of stg_ctoi_t */
+
+    /* restore vanilla registers */
+    if(tuple_regs_R >= 1) { R1 = Sp(0); Sp_adj(1); }
+    if(tuple_regs_R >= 2) { R2 = Sp(0); Sp_adj(1); }
+    if(tuple_regs_R >= 3) { R3 = Sp(0); Sp_adj(1); }
+    if(tuple_regs_R >= 4) { R4 = Sp(0); Sp_adj(1); }
+    if(tuple_regs_R >= 5) { R5 = Sp(0); Sp_adj(1); }
+    if(tuple_regs_R >= 6) { R6 = Sp(0); Sp_adj(1); }
+
+    /* restore float registers */
+    if((tuple_regs_F & 1)  != 0) { F1 = F_[Sp]; Sp_adj(1); }
+    if((tuple_regs_F & 2)  != 0) { F2 = F_[Sp]; Sp_adj(1); }
+    if((tuple_regs_F & 4)  != 0) { F3 = F_[Sp]; Sp_adj(1); }
+    if((tuple_regs_F & 8)  != 0) { F4 = F_[Sp]; Sp_adj(1); }
+    if((tuple_regs_F & 16) != 0) { F5 = F_[Sp]; Sp_adj(1); }
+    if((tuple_regs_F & 32) != 0) { F6 = F_[Sp]; Sp_adj(1); }
+
+    /* restore double registers */
+    if((tuple_regs_D & 1)  != 0) { D1 = D_[Sp]; Sp = Sp + SIZEOF_DOUBLE; }
+    if((tuple_regs_D & 2)  != 0) { D2 = D_[Sp]; Sp = Sp + SIZEOF_DOUBLE; }
+    if((tuple_regs_D & 4)  != 0) { D3 = D_[Sp]; Sp = Sp + SIZEOF_DOUBLE; }
+    if((tuple_regs_D & 8)  != 0) { D4 = D_[Sp]; Sp = Sp + SIZEOF_DOUBLE; }
+    if((tuple_regs_D & 16) != 0) { D5 = D_[Sp]; Sp = Sp + SIZEOF_DOUBLE; }
+    if((tuple_regs_D & 32) != 0) { D6 = D_[Sp]; Sp = Sp + SIZEOF_DOUBLE; }
+
+    /* restore long registers */
+    if((tuple_regs_L & 1) != 0) { L1 = L_[Sp]; Sp = Sp + 8; }
+
+    /* Sp points to the topmost argument now */
+    jump %ENTRY_CODE(Sp(tuple_stack)) [*]; // NB. all registers live!
+}
+
+
 /*
  * Dummy info table pushed on the top of the stack when the interpreter
  * should apply the BCO on the stack to its arguments, also on the
author	Luite Stegeman <stegeman@gmail.com>	2021-01-22 00:09:17 +0100
committer	Marge Bot <ben+marge-bot@smart-cactus.org>	2021-03-20 07:49:15 -0400
commit	1f94e0f7601f8e22fdd81a47f130650265a44196 (patch)
tree	d06d02317049b56763b2f1da27f71f3663efa5a0 /rts
parent	7de3532f0317032f75b76150c5d3a6f76178be04 (diff)
download	haskell-1f94e0f7601f8e22fdd81a47f130650265a44196.tar.gz