Produce new-style Cmm from the Cmm parser

The main change here is that the Cmm parser now allows high-level cmm code with argument-passing and function calls. For example: foo ( gcptr a, bits32 b ) { if (b > 0) { // we can make tail calls passing arguments: jump stg_ap_0_fast(a); } return (x,y); } More details on the new cmm syntax are in Note [Syntax of .cmm files] in CmmParse.y. The old syntax is still more-or-less supported for those occasional code fragments that really need to explicitly manipulate the stack. However there are a couple of differences: it is now obligatory to give a list of live GlobalRegs on every jump, e.g. jump %ENTRY_CODE(Sp(0)) [R1]; Again, more details in Note [Syntax of .cmm files]. I have rewritten most of the .cmm files in the RTS into the new syntax, except for AutoApply.cmm which is generated by the genapply program: this file could be generated in the new syntax instead and would probably be better off for it, but I ran out of enthusiasm. Some other changes in this batch: - The PrimOp calling convention is gone, primops now use the ordinary NativeNodeCall convention. This means that primops and "foreign import prim" code must be written in high-level cmm, but they can now take more than 10 arguments. - CmmSink now does constant-folding (should fix #7219) - .cmm files now go through the cmmPipeline, and as a result we generate better code in many cases. All the object files generated for the RTS .cmm files are now smaller. Performance should be better too, but I haven't measured it yet. - RET_DYN frames are removed from the RTS, lots of code goes away - we now have some more canned GC points to cover unboxed-tuples with 2-4 pointers, which will reduce code size a little.
author: Simon Marlow <marlowsd@gmail.com> 2012-10-03 09:30:56 +0100
committer: Simon Marlow <marlowsd@gmail.com> 2012-10-08 09:04:40 +0100
commit: a7c0387d20c1c9994d1100b14fbb8fb4e28a259e (patch)
tree: b95d0a512f951a4a463f1aa5178b0cd5c4fdb410 /rts/Interpreter.c
parent: aed37acd4d157791381800d5de960a2461bcbef3 (diff)
download: haskell-a7c0387d20c1c9994d1100b14fbb8fb4e28a259e.tar.gz
1 files changed, 30 insertions, 30 deletions
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index 83973e8c9b..2eb2d0789f 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -503,7 +503,7 @@ do_return:
     // 	  |   XXXX_info   |
     // 	  +---------------+
     //
-    // where XXXX_info is one of the stg_gc_unbx_r1_info family.
+    // where XXXX_info is one of the stg_ret_*_info family.
     //
     // We're only interested in the case when the real return address
     // is a BCO; otherwise we'll return to the scheduler.
@@ -512,12 +512,12 @@ do_return_unboxed:
     { 
 	int offset;
 	
-	ASSERT( Sp[0] == (W_)&stg_gc_unbx_r1_info
-		|| Sp[0] == (W_)&stg_gc_unpt_r1_info
-		|| Sp[0] == (W_)&stg_gc_f1_info
-		|| Sp[0] == (W_)&stg_gc_d1_info
-		|| Sp[0] == (W_)&stg_gc_l1_info
-		|| Sp[0] == (W_)&stg_gc_void_info // VoidRep
+        ASSERT(    Sp[0] == (W_)&stg_ret_v_info
+                || Sp[0] == (W_)&stg_ret_p_info
+                || Sp[0] == (W_)&stg_ret_n_info
+                || Sp[0] == (W_)&stg_ret_f_info
+                || Sp[0] == (W_)&stg_ret_d_info
+                || Sp[0] == (W_)&stg_ret_l_info
 	    );
 
 	// get the offset of the stg_ctoi_ret_XXX itbl
@@ -1336,27 +1336,27 @@ run_BCO:
 
 	case bci_RETURN_P:
 	    Sp--;
-	    Sp[0] = (W_)&stg_gc_unpt_r1_info;
+            Sp[0] = (W_)&stg_ret_p_info;
 	    goto do_return_unboxed;
 	case bci_RETURN_N:
 	    Sp--;
-	    Sp[0] = (W_)&stg_gc_unbx_r1_info;
+            Sp[0] = (W_)&stg_ret_n_info;
 	    goto do_return_unboxed;
 	case bci_RETURN_F:
 	    Sp--;
-	    Sp[0] = (W_)&stg_gc_f1_info;
+            Sp[0] = (W_)&stg_ret_f_info;
 	    goto do_return_unboxed;
 	case bci_RETURN_D:
 	    Sp--;
-	    Sp[0] = (W_)&stg_gc_d1_info;
+            Sp[0] = (W_)&stg_ret_d_info;
 	    goto do_return_unboxed;
 	case bci_RETURN_L:
 	    Sp--;
-	    Sp[0] = (W_)&stg_gc_l1_info;
+            Sp[0] = (W_)&stg_ret_l_info;
 	    goto do_return_unboxed;
 	case bci_RETURN_V:
 	    Sp--;
-	    Sp[0] = (W_)&stg_gc_void_info;
+            Sp[0] = (W_)&stg_ret_v_info;
 	    goto do_return_unboxed;
 
 	case bci_SWIZZLE: {
@@ -1372,9 +1372,6 @@ run_BCO:
 	    int o_itbl                = BCO_GET_LARGE_ARG;
 	    int interruptible         = BCO_NEXT;
 	    void(*marshall_fn)(void*) = (void (*)(void*))BCO_LIT(o_itbl);
-	    int ret_dyn_size = 
-		RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE
-		+ sizeofW(StgRetDyn);
 
             /* the stack looks like this:
                
@@ -1405,6 +1402,7 @@ run_BCO:
             nat nargs = cif->nargs;
             nat ret_size;
             nat i;
+            int j;
             StgPtr p;
             W_ ret[2];                  // max needed
 	    W_ *arguments[stk_offset];  // max needed
@@ -1446,17 +1444,19 @@ run_BCO:
 	    //
 	    // We know how many (non-ptr) words there are before the
 	    // next valid stack frame: it is the stk_offset arg to the
-	    // CCALL instruction.   So we build a RET_DYN stack frame
-	    // on the stack frame to describe this chunk of stack.
-	    //
-	    Sp -= ret_dyn_size;
-	    ((StgRetDyn *)Sp)->liveness = R1_PTR | N_NONPTRS(stk_offset);
-	    ((StgRetDyn *)Sp)->info = (StgInfoTable *)&stg_gc_gen_info;
+	    // CCALL instruction.   So we overwrite this area of the
+            // stack with empty stack frames (stg_ret_v_info);
+            //
+            for (j = 0; j < stk_offset; j++) {
+                Sp[j] = (W_)&stg_ret_v_info; /* an empty stack frame */
+            }
 
             // save obj (pointer to the current BCO), since this
-            // might move during the call.  We use the R1 slot in the
-            // RET_DYN frame for this, hence R1_PTR above.
-            ((StgRetDyn *)Sp)->payload[0] = (StgClosure *)obj;
+            // might move during the call.  We push an stg_ret_p frame
+            // for this.
+            Sp -= 2;
+            Sp[1] = (W_)obj;
+            Sp[0] = (W_)&stg_ret_p_info;
 
 	    SAVE_STACK_POINTERS;
 	    tok = suspendThread(&cap->r, interruptible ? rtsTrue : rtsFalse);
@@ -1464,11 +1464,11 @@ run_BCO:
 	    // We already made a copy of the arguments above.
             ffi_call(cif, fn, ret, argptrs);
 
-	    // And restart the thread again, popping the RET_DYN frame.
+            // And restart the thread again, popping the stg_ret_p frame.
 	    cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - STG_FIELD_OFFSET(Capability,r)));
 	    LOAD_STACK_POINTERS;
 
-            if (Sp[0] != (W_)&stg_gc_gen_info) {
+            if (Sp[0] != (W_)&stg_ret_p_info) {
                 // the stack is not how we left it.  This probably
                 // means that an exception got raised on exit from the
                 // foreign call, so we should just continue with
@@ -1476,16 +1476,16 @@ run_BCO:
                 RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
             }
 
-            // Re-load the pointer to the BCO from the RET_DYN frame,
+            // Re-load the pointer to the BCO from the stg_ret_p frame,
             // it might have moved during the call.  Also reload the
             // pointers to the components of the BCO.
-            obj        = ((StgRetDyn *)Sp)->payload[0];
+            obj        = (P_)Sp[1];
             bco        = (StgBCO*)obj;
             instrs     = (StgWord16*)(bco->instrs->payload);
             literals   = (StgWord*)(&bco->literals->payload[0]);
             ptrs       = (StgPtr*)(&bco->ptrs->payload[0]);
 
-	    Sp += ret_dyn_size;
+            Sp += 2; // pop the stg_ret_p frame
 	    
 	    // Save the Haskell thread's current value of errno
 	    cap->r.rCurrentTSO->saved_errno = errno;
author	Simon Marlow <marlowsd@gmail.com>	2012-10-03 09:30:56 +0100
committer	Simon Marlow <marlowsd@gmail.com>	2012-10-08 09:04:40 +0100
commit	a7c0387d20c1c9994d1100b14fbb8fb4e28a259e (patch)
tree	b95d0a512f951a4a463f1aa5178b0cd5c4fdb410 /rts/Interpreter.c
parent	aed37acd4d157791381800d5de960a2461bcbef3 (diff)
download	haskell-a7c0387d20c1c9994d1100b14fbb8fb4e28a259e.tar.gz