diff options
author | Simon Marlow <marlowsd@gmail.com> | 2012-10-03 09:30:56 +0100 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2012-10-08 09:04:40 +0100 |
commit | a7c0387d20c1c9994d1100b14fbb8fb4e28a259e (patch) | |
tree | b95d0a512f951a4a463f1aa5178b0cd5c4fdb410 /rts/StgStartup.cmm | |
parent | aed37acd4d157791381800d5de960a2461bcbef3 (diff) | |
download | haskell-a7c0387d20c1c9994d1100b14fbb8fb4e28a259e.tar.gz |
Produce new-style Cmm from the Cmm parser
The main change here is that the Cmm parser now allows high-level cmm
code with argument-passing and function calls. For example:
foo ( gcptr a, bits32 b )
{
if (b > 0) {
// we can make tail calls passing arguments:
jump stg_ap_0_fast(a);
}
return (x,y);
}
More details on the new cmm syntax are in Note [Syntax of .cmm files]
in CmmParse.y.
The old syntax is still more-or-less supported for those occasional
code fragments that really need to explicitly manipulate the stack.
However there are a couple of differences: it is now obligatory to
give a list of live GlobalRegs on every jump, e.g.
jump %ENTRY_CODE(Sp(0)) [R1];
Again, more details in Note [Syntax of .cmm files].
I have rewritten most of the .cmm files in the RTS into the new
syntax, except for AutoApply.cmm which is generated by the genapply
program: this file could be generated in the new syntax instead and
would probably be better off for it, but I ran out of enthusiasm.
Some other changes in this batch:
- The PrimOp calling convention is gone, primops now use the ordinary
NativeNodeCall convention. This means that primops and "foreign
import prim" code must be written in high-level cmm, but they can
now take more than 10 arguments.
- CmmSink now does constant-folding (should fix #7219)
- .cmm files now go through the cmmPipeline, and as a result we
generate better code in many cases. All the object files generated
for the RTS .cmm files are now smaller. Performance should be
better too, but I haven't measured it yet.
- RET_DYN frames are removed from the RTS, lots of code goes away
- we now have some more canned GC points to cover unboxed-tuples with
2-4 pointers, which will reduce code size a little.
Diffstat (limited to 'rts/StgStartup.cmm')
-rw-r--r-- | rts/StgStartup.cmm | 56 |
1 files changed, 32 insertions, 24 deletions
diff --git a/rts/StgStartup.cmm b/rts/StgStartup.cmm index 4aace82deb..6793913464 100644 --- a/rts/StgStartup.cmm +++ b/rts/StgStartup.cmm @@ -35,11 +35,9 @@ -------------------------------------------------------------------------- */ INFO_TABLE_RET(stg_stop_thread, STOP_FRAME, -#if defined(PROFILING) - W_ unused, - W_ unused -#endif -) + W_ info_ptr, + PROF_HDR_FIELDS(W_)) +/* no return list: explicit stack layout */ { /* The final exit. @@ -75,7 +73,7 @@ INFO_TABLE_RET(stg_stop_thread, STOP_FRAME, StgRegTable_rRet(BaseReg) = ThreadFinished; R1 = BaseReg; - jump StgReturn; + jump StgReturn [R1]; } /* ----------------------------------------------------------------------------- @@ -87,46 +85,57 @@ INFO_TABLE_RET(stg_stop_thread, STOP_FRAME, the thread's state away nicely. -------------------------------------------------------------------------- */ -stg_returnToStackTop +stg_returnToStackTop /* no args: explicit stack layout */ { LOAD_THREAD_STATE(); CHECK_SENSIBLE_REGS(); - jump %ENTRY_CODE(Sp(0)); + jump %ENTRY_CODE(Sp(0)) []; } -stg_returnToSched +stg_returnToSched /* no args: explicit stack layout */ { + W_ r1; + r1 = R1; // foreign calls may clobber R1 SAVE_THREAD_STATE(); foreign "C" threadPaused(MyCapability() "ptr", CurrentTSO); - jump StgReturn; + R1 = r1; + jump StgReturn [R1]; } // A variant of stg_returnToSched that doesn't call threadPaused() on the // current thread. This is used for switching from compiled execution to the // interpreter, where calling threadPaused() on every switch would be too // expensive. -stg_returnToSchedNotPaused +stg_returnToSchedNotPaused /* no args: explicit stack layout */ { SAVE_THREAD_STATE(); - jump StgReturn; + jump StgReturn [R1]; } // A variant of stg_returnToSched, but instead of returning directly to the // scheduler, we jump to the code fragment pointed to by R2. This lets us // perform some final actions after making the thread safe, such as unlocking // the MVar on which we are about to block in SMP mode. -stg_returnToSchedButFirst +stg_returnToSchedButFirst /* no args: explicit stack layout */ { + W_ r1, r2, r3; + r1 = R1; + r2 = R2; + r3 = R3; SAVE_THREAD_STATE(); + // foreign calls may clobber R1/R2/.., so we save them above foreign "C" threadPaused(MyCapability() "ptr", CurrentTSO); - jump R2; + R1 = r1; + R2 = r2; + R3 = r3; + jump R2 [R1,R3]; } -stg_threadFinished +stg_threadFinished /* no args: explicit stack layout */ { StgRegTable_rRet(BaseReg) = ThreadFinished; R1 = BaseReg; - jump StgReturn; + jump StgReturn [R1]; } /* ----------------------------------------------------------------------------- @@ -143,31 +152,30 @@ stg_threadFinished ------------------------------------------------------------------------- */ -INFO_TABLE_RET(stg_forceIO, RET_SMALL) - +INFO_TABLE_RET(stg_forceIO, RET_SMALL, P_ info_ptr) + return (P_ ret) { - Sp_adj(1); - ENTER(); + ENTER(ret); } /* ----------------------------------------------------------------------------- Special STG entry points for module registration. -------------------------------------------------------------------------- */ -stg_init_finish +stg_init_finish /* no args: explicit stack layout */ { - jump StgReturn; + jump StgReturn []; } /* On entry to stg_init: * init_stack[0] = &stg_init_ret; * init_stack[1] = __stginit_Something; */ -stg_init +stg_init /* no args: explicit stack layout */ { W_ next; Sp = W_[BaseReg + OFFSET_StgRegTable_rSp]; next = W_[Sp]; Sp_adj(1); - jump next; + jump next []; } |