diff options
author | Simon Marlow <marlowsd@gmail.com> | 2012-10-03 09:30:56 +0100 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2012-10-08 09:04:40 +0100 |
commit | a7c0387d20c1c9994d1100b14fbb8fb4e28a259e (patch) | |
tree | b95d0a512f951a4a463f1aa5178b0cd5c4fdb410 /rts/Interpreter.c | |
parent | aed37acd4d157791381800d5de960a2461bcbef3 (diff) | |
download | haskell-a7c0387d20c1c9994d1100b14fbb8fb4e28a259e.tar.gz |
Produce new-style Cmm from the Cmm parser
The main change here is that the Cmm parser now allows high-level cmm
code with argument-passing and function calls. For example:
foo ( gcptr a, bits32 b )
{
if (b > 0) {
// we can make tail calls passing arguments:
jump stg_ap_0_fast(a);
}
return (x,y);
}
More details on the new cmm syntax are in Note [Syntax of .cmm files]
in CmmParse.y.
The old syntax is still more-or-less supported for those occasional
code fragments that really need to explicitly manipulate the stack.
However there are a couple of differences: it is now obligatory to
give a list of live GlobalRegs on every jump, e.g.
jump %ENTRY_CODE(Sp(0)) [R1];
Again, more details in Note [Syntax of .cmm files].
I have rewritten most of the .cmm files in the RTS into the new
syntax, except for AutoApply.cmm which is generated by the genapply
program: this file could be generated in the new syntax instead and
would probably be better off for it, but I ran out of enthusiasm.
Some other changes in this batch:
- The PrimOp calling convention is gone, primops now use the ordinary
NativeNodeCall convention. This means that primops and "foreign
import prim" code must be written in high-level cmm, but they can
now take more than 10 arguments.
- CmmSink now does constant-folding (should fix #7219)
- .cmm files now go through the cmmPipeline, and as a result we
generate better code in many cases. All the object files generated
for the RTS .cmm files are now smaller. Performance should be
better too, but I haven't measured it yet.
- RET_DYN frames are removed from the RTS, lots of code goes away
- we now have some more canned GC points to cover unboxed-tuples with
2-4 pointers, which will reduce code size a little.
Diffstat (limited to 'rts/Interpreter.c')
-rw-r--r-- | rts/Interpreter.c | 60 |
1 files changed, 30 insertions, 30 deletions
diff --git a/rts/Interpreter.c b/rts/Interpreter.c index 83973e8c9b..2eb2d0789f 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -503,7 +503,7 @@ do_return: // | XXXX_info | // +---------------+ // - // where XXXX_info is one of the stg_gc_unbx_r1_info family. + // where XXXX_info is one of the stg_ret_*_info family. // // We're only interested in the case when the real return address // is a BCO; otherwise we'll return to the scheduler. @@ -512,12 +512,12 @@ do_return_unboxed: { int offset; - ASSERT( Sp[0] == (W_)&stg_gc_unbx_r1_info - || Sp[0] == (W_)&stg_gc_unpt_r1_info - || Sp[0] == (W_)&stg_gc_f1_info - || Sp[0] == (W_)&stg_gc_d1_info - || Sp[0] == (W_)&stg_gc_l1_info - || Sp[0] == (W_)&stg_gc_void_info // VoidRep + ASSERT( Sp[0] == (W_)&stg_ret_v_info + || Sp[0] == (W_)&stg_ret_p_info + || Sp[0] == (W_)&stg_ret_n_info + || Sp[0] == (W_)&stg_ret_f_info + || Sp[0] == (W_)&stg_ret_d_info + || Sp[0] == (W_)&stg_ret_l_info ); // get the offset of the stg_ctoi_ret_XXX itbl @@ -1336,27 +1336,27 @@ run_BCO: case bci_RETURN_P: Sp--; - Sp[0] = (W_)&stg_gc_unpt_r1_info; + Sp[0] = (W_)&stg_ret_p_info; goto do_return_unboxed; case bci_RETURN_N: Sp--; - Sp[0] = (W_)&stg_gc_unbx_r1_info; + Sp[0] = (W_)&stg_ret_n_info; goto do_return_unboxed; case bci_RETURN_F: Sp--; - Sp[0] = (W_)&stg_gc_f1_info; + Sp[0] = (W_)&stg_ret_f_info; goto do_return_unboxed; case bci_RETURN_D: Sp--; - Sp[0] = (W_)&stg_gc_d1_info; + Sp[0] = (W_)&stg_ret_d_info; goto do_return_unboxed; case bci_RETURN_L: Sp--; - Sp[0] = (W_)&stg_gc_l1_info; + Sp[0] = (W_)&stg_ret_l_info; goto do_return_unboxed; case bci_RETURN_V: Sp--; - Sp[0] = (W_)&stg_gc_void_info; + Sp[0] = (W_)&stg_ret_v_info; goto do_return_unboxed; case bci_SWIZZLE: { @@ -1372,9 +1372,6 @@ run_BCO: int o_itbl = BCO_GET_LARGE_ARG; int interruptible = BCO_NEXT; void(*marshall_fn)(void*) = (void (*)(void*))BCO_LIT(o_itbl); - int ret_dyn_size = - RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE - + sizeofW(StgRetDyn); /* the stack looks like this: @@ -1405,6 +1402,7 @@ run_BCO: nat nargs = cif->nargs; nat ret_size; nat i; + int j; StgPtr p; W_ ret[2]; // max needed W_ *arguments[stk_offset]; // max needed @@ -1446,17 +1444,19 @@ run_BCO: // // We know how many (non-ptr) words there are before the // next valid stack frame: it is the stk_offset arg to the - // CCALL instruction. So we build a RET_DYN stack frame - // on the stack frame to describe this chunk of stack. - // - Sp -= ret_dyn_size; - ((StgRetDyn *)Sp)->liveness = R1_PTR | N_NONPTRS(stk_offset); - ((StgRetDyn *)Sp)->info = (StgInfoTable *)&stg_gc_gen_info; + // CCALL instruction. So we overwrite this area of the + // stack with empty stack frames (stg_ret_v_info); + // + for (j = 0; j < stk_offset; j++) { + Sp[j] = (W_)&stg_ret_v_info; /* an empty stack frame */ + } // save obj (pointer to the current BCO), since this - // might move during the call. We use the R1 slot in the - // RET_DYN frame for this, hence R1_PTR above. - ((StgRetDyn *)Sp)->payload[0] = (StgClosure *)obj; + // might move during the call. We push an stg_ret_p frame + // for this. + Sp -= 2; + Sp[1] = (W_)obj; + Sp[0] = (W_)&stg_ret_p_info; SAVE_STACK_POINTERS; tok = suspendThread(&cap->r, interruptible ? rtsTrue : rtsFalse); @@ -1464,11 +1464,11 @@ run_BCO: // We already made a copy of the arguments above. ffi_call(cif, fn, ret, argptrs); - // And restart the thread again, popping the RET_DYN frame. + // And restart the thread again, popping the stg_ret_p frame. cap = (Capability *)((void *)((unsigned char*)resumeThread(tok) - STG_FIELD_OFFSET(Capability,r))); LOAD_STACK_POINTERS; - if (Sp[0] != (W_)&stg_gc_gen_info) { + if (Sp[0] != (W_)&stg_ret_p_info) { // the stack is not how we left it. This probably // means that an exception got raised on exit from the // foreign call, so we should just continue with @@ -1476,16 +1476,16 @@ run_BCO: RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding); } - // Re-load the pointer to the BCO from the RET_DYN frame, + // Re-load the pointer to the BCO from the stg_ret_p frame, // it might have moved during the call. Also reload the // pointers to the components of the BCO. - obj = ((StgRetDyn *)Sp)->payload[0]; + obj = (P_)Sp[1]; bco = (StgBCO*)obj; instrs = (StgWord16*)(bco->instrs->payload); literals = (StgWord*)(&bco->literals->payload[0]); ptrs = (StgPtr*)(&bco->ptrs->payload[0]); - Sp += ret_dyn_size; + Sp += 2; // pop the stg_ret_p frame // Save the Haskell thread's current value of errno cap->r.rCurrentTSO->saved_errno = errno; |