summaryrefslogtreecommitdiff
path: root/rts/Interpreter.c
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-10-03 09:30:56 +0100
committerSimon Marlow <marlowsd@gmail.com>2012-10-08 09:04:40 +0100
commita7c0387d20c1c9994d1100b14fbb8fb4e28a259e (patch)
treeb95d0a512f951a4a463f1aa5178b0cd5c4fdb410 /rts/Interpreter.c
parentaed37acd4d157791381800d5de960a2461bcbef3 (diff)
downloadhaskell-a7c0387d20c1c9994d1100b14fbb8fb4e28a259e.tar.gz
Produce new-style Cmm from the Cmm parser
The main change here is that the Cmm parser now allows high-level cmm code with argument-passing and function calls. For example: foo ( gcptr a, bits32 b ) { if (b > 0) { // we can make tail calls passing arguments: jump stg_ap_0_fast(a); } return (x,y); } More details on the new cmm syntax are in Note [Syntax of .cmm files] in CmmParse.y. The old syntax is still more-or-less supported for those occasional code fragments that really need to explicitly manipulate the stack. However there are a couple of differences: it is now obligatory to give a list of live GlobalRegs on every jump, e.g. jump %ENTRY_CODE(Sp(0)) [R1]; Again, more details in Note [Syntax of .cmm files]. I have rewritten most of the .cmm files in the RTS into the new syntax, except for AutoApply.cmm which is generated by the genapply program: this file could be generated in the new syntax instead and would probably be better off for it, but I ran out of enthusiasm. Some other changes in this batch: - The PrimOp calling convention is gone, primops now use the ordinary NativeNodeCall convention. This means that primops and "foreign import prim" code must be written in high-level cmm, but they can now take more than 10 arguments. - CmmSink now does constant-folding (should fix #7219) - .cmm files now go through the cmmPipeline, and as a result we generate better code in many cases. All the object files generated for the RTS .cmm files are now smaller. Performance should be better too, but I haven't measured it yet. - RET_DYN frames are removed from the RTS, lots of code goes away - we now have some more canned GC points to cover unboxed-tuples with 2-4 pointers, which will reduce code size a little.
Diffstat (limited to 'rts/Interpreter.c')
-rw-r--r--rts/Interpreter.c60
1 files changed, 30 insertions, 30 deletions
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index 83973e8c9b..2eb2d0789f 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -503,7 +503,7 @@ do_return:
// | XXXX_info |
// +---------------+
//
- // where XXXX_info is one of the stg_gc_unbx_r1_info family.
+ // where XXXX_info is one of the stg_ret_*_info family.
//
// We're only interested in the case when the real return address
// is a BCO; otherwise we'll return to the scheduler.
@@ -512,12 +512,12 @@ do_return_unboxed:
{
int offset;
- ASSERT( Sp[0] == (W_)&stg_gc_unbx_r1_info
- || Sp[0] == (W_)&stg_gc_unpt_r1_info
- || Sp[0] == (W_)&stg_gc_f1_info
- || Sp[0] == (W_)&stg_gc_d1_info
- || Sp[0] == (W_)&stg_gc_l1_info
- || Sp[0] == (W_)&stg_gc_void_info // VoidRep
+ ASSERT( Sp[0] == (W_)&stg_ret_v_info
+ || Sp[0] == (W_)&stg_ret_p_info
+ || Sp[0] == (W_)&stg_ret_n_info
+ || Sp[0] == (W_)&stg_ret_f_info
+ || Sp[0] == (W_)&stg_ret_d_info
+ || Sp[0] == (W_)&stg_ret_l_info
);
// get the offset of the stg_ctoi_ret_XXX itbl
@@ -1336,27 +1336,27 @@ run_BCO:
case bci_RETURN_P:
Sp--;
- Sp[0] = (W_)&stg_gc_unpt_r1_info;
+ Sp[0] = (W_)&stg_ret_p_info;
goto do_return_unboxed;
case bci_RETURN_N:
Sp--;
- Sp[0] = (W_)&stg_gc_unbx_r1_info;
+ Sp[0] = (W_)&stg_ret_n_info;
goto do_return_unboxed;
case bci_RETURN_F:
Sp--;
- Sp[0] = (W_)&stg_gc_f1_info;
+ Sp[0] = (W_)&stg_ret_f_info;
goto do_return_unboxed;
case bci_RETURN_D:
Sp--;
- Sp[0] = (W_)&stg_gc_d1_info;
+ Sp[0] = (W_)&stg_ret_d_info;
goto do_return_unboxed;
case bci_RETURN_L:
Sp--;
- Sp[0] = (W_)&stg_gc_l1_info;
+ Sp[0] = (W_)&stg_ret_l_info;
goto do_return_unboxed;
case bci_RETURN_V:
Sp--;
- Sp[0] = (W_)&stg_gc_void_info;
+ Sp[0] = (W_)&stg_ret_v_info;
goto do_return_unboxed;
case bci_SWIZZLE: {
@@ -1372,9 +1372,6 @@ run_BCO:
int o_itbl = BCO_GET_LARGE_ARG;
int interruptible = BCO_NEXT;
void(*marshall_fn)(void*) = (void (*)(void*))BCO_LIT(o_itbl);
- int ret_dyn_size =
- RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE
- + sizeofW(StgRetDyn);
/* the stack looks like this:
@@ -1405,6 +1402,7 @@ run_BCO:
nat nargs = cif->nargs;
nat ret_size;
nat i;
+ int j;
StgPtr p;
W_ ret[2]; // max needed
W_ *arguments[stk_offset]; // max needed
@@ -1446,17 +1444,19 @@ run_BCO:
//
// We know how many (non-ptr) words there are before the
// next valid stack frame: it is the stk_offset arg to the
- // CCALL instruction. So we build a RET_DYN stack frame
- // on the stack frame to describe this chunk of stack.
- //
- Sp -= ret_dyn_size;
- ((StgRetDyn *)Sp)->liveness = R1_PTR | N_NONPTRS(stk_offset);
- ((StgRetDyn *)Sp)->info = (StgInfoTable *)&stg_gc_gen_info;
+ // CCALL instruction. So we overwrite this area of the
+ // stack with empty stack frames (stg_ret_v_info);
+ //
+ for (j = 0; j < stk_offset; j++) {
+ Sp[j] = (W_)&stg_ret_v_info; /* an empty stack frame */
+ }
// save obj (pointer to the current BCO), since this
- // might move during the call. We use the R1 slot in the
- // RET_DYN frame for this, hence R1_PTR above.
- ((StgRetDyn *)Sp)->payload[0] = (StgClosure *)obj;
+ // might move during the call. We push an stg_ret_p frame
+ // for this.
+ Sp -= 2;
+ Sp[1] = (W_)obj;
+ Sp[0] = (W_)&stg_ret_p_info;
SAVE_STACK_POINTERS;
tok = suspendThread(&cap->r, interruptible ? rtsTrue : rtsFalse);
@@ -1464,11 +1464,11 @@ run_BCO:
// We already made a copy of the arguments above.
ffi_call(cif, fn, ret, argptrs);
- // And restart the thread again, popping the RET_DYN frame.
+ // And restart the thread again, popping the stg_ret_p frame.
cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - STG_FIELD_OFFSET(Capability,r)));
LOAD_STACK_POINTERS;
- if (Sp[0] != (W_)&stg_gc_gen_info) {
+ if (Sp[0] != (W_)&stg_ret_p_info) {
// the stack is not how we left it. This probably
// means that an exception got raised on exit from the
// foreign call, so we should just continue with
@@ -1476,16 +1476,16 @@ run_BCO:
RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding);
}
- // Re-load the pointer to the BCO from the RET_DYN frame,
+ // Re-load the pointer to the BCO from the stg_ret_p frame,
// it might have moved during the call. Also reload the
// pointers to the components of the BCO.
- obj = ((StgRetDyn *)Sp)->payload[0];
+ obj = (P_)Sp[1];
bco = (StgBCO*)obj;
instrs = (StgWord16*)(bco->instrs->payload);
literals = (StgWord*)(&bco->literals->payload[0]);
ptrs = (StgPtr*)(&bco->ptrs->payload[0]);
- Sp += ret_dyn_size;
+ Sp += 2; // pop the stg_ret_p frame
// Save the Haskell thread's current value of errno
cap->r.rCurrentTSO->saved_errno = errno;