diff options
author | Simon Marlow <marlowsd@gmail.com> | 2012-10-03 09:30:56 +0100 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2012-10-08 09:04:40 +0100 |
commit | a7c0387d20c1c9994d1100b14fbb8fb4e28a259e (patch) | |
tree | b95d0a512f951a4a463f1aa5178b0cd5c4fdb410 /includes | |
parent | aed37acd4d157791381800d5de960a2461bcbef3 (diff) | |
download | haskell-a7c0387d20c1c9994d1100b14fbb8fb4e28a259e.tar.gz |
Produce new-style Cmm from the Cmm parser
The main change here is that the Cmm parser now allows high-level cmm
code with argument-passing and function calls. For example:
foo ( gcptr a, bits32 b )
{
if (b > 0) {
// we can make tail calls passing arguments:
jump stg_ap_0_fast(a);
}
return (x,y);
}
More details on the new cmm syntax are in Note [Syntax of .cmm files]
in CmmParse.y.
The old syntax is still more-or-less supported for those occasional
code fragments that really need to explicitly manipulate the stack.
However there are a couple of differences: it is now obligatory to
give a list of live GlobalRegs on every jump, e.g.
jump %ENTRY_CODE(Sp(0)) [R1];
Again, more details in Note [Syntax of .cmm files].
I have rewritten most of the .cmm files in the RTS into the new
syntax, except for AutoApply.cmm which is generated by the genapply
program: this file could be generated in the new syntax instead and
would probably be better off for it, but I ran out of enthusiasm.
Some other changes in this batch:
- The PrimOp calling convention is gone, primops now use the ordinary
NativeNodeCall convention. This means that primops and "foreign
import prim" code must be written in high-level cmm, but they can
now take more than 10 arguments.
- CmmSink now does constant-folding (should fix #7219)
- .cmm files now go through the cmmPipeline, and as a result we
generate better code in many cases. All the object files generated
for the RTS .cmm files are now smaller. Performance should be
better too, but I haven't measured it yet.
- RET_DYN frames are removed from the RTS, lots of code goes away
- we now have some more canned GC points to cover unboxed-tuples with
2-4 pointers, which will reduce code size a little.
Diffstat (limited to 'includes')
-rw-r--r-- | includes/Cmm.h | 293 | ||||
-rw-r--r-- | includes/Rts.h | 1 | ||||
-rw-r--r-- | includes/rts/Constants.h | 24 | ||||
-rw-r--r-- | includes/rts/storage/ClosureMacros.h | 8 | ||||
-rw-r--r-- | includes/rts/storage/ClosureTypes.h | 55 | ||||
-rw-r--r-- | includes/rts/storage/Closures.h | 56 | ||||
-rw-r--r-- | includes/rts/storage/Liveness.h | 34 | ||||
-rw-r--r-- | includes/rts/storage/SMPClosureOps.h | 2 | ||||
-rw-r--r-- | includes/stg/MiscClosures.h | 55 | ||||
-rw-r--r-- | includes/stg/Regs.h | 331 |
10 files changed, 264 insertions, 595 deletions
diff --git a/includes/Cmm.h b/includes/Cmm.h index edcf46e7c0..afe08a26a3 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -9,36 +9,6 @@ * * For the syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y. * - * If you're used to the old HC file syntax, here's a quick cheat sheet - * for converting HC code: - * - * - Remove FB_/FE_ - * - Remove all type casts - * - Remove '&' - * - STGFUN(foo) { ... } ==> foo { ... } - * - FN_(foo) { ... } ==> foo { ... } - * - JMP_(e) ==> jump e; - * - Remove EXTFUN(foo) - * - Sp[n] ==> Sp(n) - * - Hp[n] ==> Hp(n) - * - Sp += n ==> Sp_adj(n) - * - Hp += n ==> Hp_adj(n) - * - R1.i ==> R1 (similarly for R1.w, R1.cl etc.) - * - You need to explicitly dereference variables; eg. - * alloc_blocks ==> CInt[alloc_blocks] - * - convert all word offsets into byte offsets: - * - e ==> WDS(e) - * - sizeofW(StgFoo) ==> SIZEOF_StgFoo - * - ENTRY_CODE(e) ==> %ENTRY_CODE(e) - * - get_itbl(c) ==> %GET_STD_INFO(c) - * - Change liveness masks in STK_CHK_GEN, HP_CHK_GEN: - * R1_PTR | R2_PTR ==> R1_PTR & R2_PTR - * (NOTE: | becomes &) - * - Declarations like 'StgPtr p;' become just 'W_ p;' - * - e->payload[n] ==> PAYLOAD(e,n) - * - Be very careful with comparisons: the infix versions (>, >=, etc.) - * are unsigned, so use %lt(a,b) to get signed less-than for example. - * * Accessing fields of structures defined in the RTS header files is * done via automatically-generated macros in DerivedConstants.h. For * example, where previously we used @@ -136,6 +106,8 @@ Misc useful stuff -------------------------------------------------------------------------- */ +#define ccall foreign "C" + #define NULL (0::W_) #define STRING(name,str) \ @@ -210,7 +182,7 @@ #define Sp(n) W_[Sp + WDS(n)] #define Hp(n) W_[Hp + WDS(n)] -#define Sp_adj(n) Sp = Sp + WDS(n) +#define Sp_adj(n) Sp = Sp + WDS(n) /* pronounced "spadge" */ #define Hp_adj(n) Hp = Hp + WDS(n) /* ----------------------------------------------------------------------------- @@ -278,25 +250,37 @@ #define LOAD_INFO \ info = %INFO_PTR(UNTAG(P1)); -#define UNTAG_R1 \ - P1 = UNTAG(P1); +#define MAYBE_UNTAG(x) UNTAG(x); #else -#define LOAD_INFO \ - if (GETTAG(P1) != 0) { \ - jump %ENTRY_CODE(Sp(0)); \ +#define LOAD_INFO(ret,x) \ + if (GETTAG(x) != 0) { \ + ret(x); \ } \ - info = %INFO_PTR(P1); + info = %INFO_PTR(x); -#define UNTAG_R1 /* nothing */ +#define MAYBE_UNTAG(x) (x) /* already untagged */ #endif -#define ENTER() \ +// We need two versions of ENTER(): +// - ENTER(x) takes the closure as an argument and uses return(), +// for use in civilized code where the stack is handled by GHC +// +// - ENTER_NOSTACK() where the closure is in R1, and returns are +// explicit jumps, for use when we are doing the stack management +// ourselves. + +#define ENTER(x) ENTER_(return,x) +#define ENTER_R1() ENTER_(RET_R1,R1) + +#define RET_R1(x) jump %ENTRY_CODE(Sp(0)) [R1] + +#define ENTER_(ret,x) \ again: \ W_ info; \ - LOAD_INFO \ + LOAD_INFO(ret,x) \ switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \ (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \ case \ @@ -304,7 +288,7 @@ IND_PERM, \ IND_STATIC: \ { \ - P1 = StgInd_indirectee(P1); \ + x = StgInd_indirectee(x); \ goto again; \ } \ case \ @@ -318,12 +302,12 @@ BCO, \ PAP: \ { \ - jump %ENTRY_CODE(Sp(0)); \ + ret(x); \ } \ default: \ { \ - UNTAG_R1 \ - jump %ENTRY_CODE(info); \ + x = MAYBE_UNTAG(x); \ + jump %ENTRY_CODE(info) (x); \ } \ } @@ -348,7 +332,6 @@ */ #include "stg/RtsMachRegs.h" -#include "rts/storage/Liveness.h" #include "rts/prof/LDV.h" #undef BLOCK_SIZE @@ -359,6 +342,18 @@ #define MyCapability() (BaseReg - OFFSET_Capability_r) /* ------------------------------------------------------------------------- + Info tables + ------------------------------------------------------------------------- */ + +#if defined(PROFILING) +#define PROF_HDR_FIELDS(w_) \ + w_ prof_hdr_1, \ + w_ prof_hdr_2, +#else +#define PROF_HDR_FIELDS(w_) /* nothing */ +#endif + +/* ------------------------------------------------------------------------- Allocation and garbage collection ------------------------------------------------------------------------- */ @@ -371,30 +366,134 @@ * ticky-ticky. It's not clear whether eg. the size field of an array * should be counted as "admin", or the various fields of a BCO. */ -#define ALLOC_PRIM(bytes,liveness,reentry) \ - HP_CHK_GEN_TICKY(bytes,liveness,reentry); \ +#define ALLOC_PRIM(bytes) \ + HP_CHK_GEN_TICKY(bytes); \ TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \ CCCS_ALLOC(bytes); +#define HEAP_CHECK(bytes,failure) \ + Hp = Hp + bytes; \ + if (Hp > HpLim) { HpAlloc = bytes; failure; } \ + TICK_ALLOC_HEAP_NOCTR(bytes); + +#define ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,failure) \ + HEAP_CHECK(bytes,failure) \ + TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \ + CCCS_ALLOC(bytes); + +#define ALLOC_PRIM_P(bytes,fun,arg) \ + ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_P(fun,arg)); + +#define ALLOC_PRIM_N(bytes,fun,arg) \ + ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_N(fun,arg)); + /* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */ #define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS) -#define HP_CHK_GEN_TICKY(alloc,liveness,reentry) \ - HP_CHK_GEN(alloc,liveness,reentry); \ +#define HP_CHK_GEN_TICKY(alloc) \ + HP_CHK_GEN(alloc); \ TICK_ALLOC_HEAP_NOCTR(alloc); +#define HP_CHK_P(bytes, fun, arg) \ + HEAP_CHECK(bytes, GC_PRIM_P(fun,arg)) + +#define ALLOC_P_TICKY(alloc, fun, arg) \ + HP_CHK_P(alloc); \ + TICK_ALLOC_HEAP_NOCTR(alloc); + +#define CHECK_GC() \ + (bdescr_link(CurrentNursery) == NULL || \ + generation_n_new_large_words(W_[g0]) >= TO_W_(CLong[large_alloc_lim])) + // allocate() allocates from the nursery, so we check to see // whether the nursery is nearly empty in any function that uses // allocate() - this includes many of the primops. -#define MAYBE_GC(liveness,reentry) \ - if (bdescr_link(CurrentNursery) == NULL || \ - generation_n_new_large_words(W_[g0]) >= TO_W_(CLong[large_alloc_lim])) { \ - R9 = liveness; \ - R10 = reentry; \ - HpAlloc = 0; \ - jump stg_gc_gen_hp; \ +// +// HACK alert: the __L__ stuff is here to coax the common-block +// eliminator into commoning up the call stg_gc_noregs() with the same +// code that gets generated by a STK_CHK_GEN() in the same proc. We +// also need an if (0) { goto __L__; } so that the __L__ label isn't +// optimised away by the control-flow optimiser prior to common-block +// elimination (it will be optimised away later). +// +// This saves some code in gmp-wrappers.cmm where we have lots of +// MAYBE_GC() in the same proc as STK_CHK_GEN(). +// +#define MAYBE_GC(retry) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + goto __L__; \ + __L__: \ + call stg_gc_noregs(); \ + goto retry; \ + } \ + if (0) { goto __L__; } + +#define GC_PRIM(fun) \ + R9 = fun; \ + jump stg_gc_prim(); + +#define GC_PRIM_N(fun,arg) \ + R9 = fun; \ + jump stg_gc_prim_n(arg); + +#define GC_PRIM_P(fun,arg) \ + R9 = fun; \ + jump stg_gc_prim_p(arg); + +#define GC_PRIM_PP(fun,arg1,arg2) \ + R9 = fun; \ + jump stg_gc_prim_pp(arg1,arg2); + +#define MAYBE_GC_(fun) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + GC_PRIM(fun) \ + } + +#define MAYBE_GC_N(fun,arg) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + GC_PRIM_N(fun,arg) \ + } + +#define MAYBE_GC_P(fun,arg) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + GC_PRIM_P(fun,arg) \ } +#define MAYBE_GC_PP(fun,arg1,arg2) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + GC_PRIM_PP(fun,arg1,arg2) \ + } + +#define STK_CHK(n, fun) \ + if (Sp - n < SpLim) { \ + GC_PRIM(fun) \ + } + +#define STK_CHK_P(n, fun, arg) \ + if (Sp - n < SpLim) { \ + GC_PRIM_P(fun,arg) \ + } + +#define STK_CHK_PP(n, fun, arg1, arg2) \ + if (Sp - n < SpLim) { \ + GC_PRIM_PP(fun,arg1,arg2) \ + } + +#define STK_CHK_ENTER(n, closure) \ + if (Sp - n < SpLim) { \ + jump __stg_gc_enter_1(closure); \ + } + +// A funky heap check used by AutoApply.cmm + +#define HP_CHK_NP_ASSIGN_SP0(size,f) \ + HEAP_CHECK(size, Sp(0) = f; jump __stg_gc_enter_1 [R1];) + /* ----------------------------------------------------------------------------- Closure headers -------------------------------------------------------------------------- */ @@ -481,23 +580,6 @@ #endif /* ----------------------------------------------------------------------------- - Voluntary Yields/Blocks - - We only have a generic version of this at the moment - if it turns - out to be slowing us down we can make specialised ones. - -------------------------------------------------------------------------- */ - -#define YIELD(liveness,reentry) \ - R9 = liveness; \ - R10 = reentry; \ - jump stg_gen_yield; - -#define BLOCK(liveness,reentry) \ - R9 = liveness; \ - R10 = reentry; \ - jump stg_gen_block; - -/* ----------------------------------------------------------------------------- Ticky macros -------------------------------------------------------------------------- */ @@ -585,6 +667,63 @@ TICK_BUMP_BY(ALLOC_HEAP_tot,n) /* ----------------------------------------------------------------------------- + Saving and restoring STG registers + + STG registers must be saved around a C call, just in case the STG + register is mapped to a caller-saves machine register. Normally we + don't need to worry about this the code generator has already + loaded any live STG registers into variables for us, but in + hand-written low-level Cmm code where we don't know which registers + are live, we might have to save them all. + -------------------------------------------------------------------------- */ + +#define SAVE_STGREGS \ + W_ r1, r2, r3, r4, r5, r6, r7, r8; \ + F_ f1, f2, f3, f4; \ + D_ d1, d2; \ + L_ l1; \ + \ + r1 = R1; \ + r2 = R2; \ + r3 = R3; \ + r4 = R4; \ + r5 = R5; \ + r6 = R6; \ + r7 = R7; \ + r8 = R8; \ + \ + f1 = F1; \ + f2 = F2; \ + f3 = F3; \ + f4 = F4; \ + \ + d1 = D1; \ + d2 = D2; \ + \ + l1 = L1; + + +#define RESTORE_STGREGS \ + R1 = r1; \ + R2 = r2; \ + R3 = r3; \ + R4 = r4; \ + R5 = r5; \ + R6 = r6; \ + R7 = r7; \ + R8 = r8; \ + \ + F1 = f1; \ + F2 = f2; \ + F3 = f3; \ + F4 = f4; \ + \ + D1 = d1; \ + D2 = d2; \ + \ + L1 = l1; + +/* ----------------------------------------------------------------------------- Misc junk -------------------------------------------------------------------------- */ @@ -592,14 +731,14 @@ #define END_TSO_QUEUE stg_END_TSO_QUEUE_closure #define END_INVARIANT_CHECK_QUEUE stg_END_INVARIANT_CHECK_QUEUE_closure -#define recordMutableCap(p, gen, regs) \ +#define recordMutableCap(p, gen) \ W_ __bd; \ W_ mut_list; \ mut_list = Capability_mut_lists(MyCapability()) + WDS(gen); \ __bd = W_[mut_list]; \ if (bdescr_free(__bd) >= bdescr_start(__bd) + BLOCK_SIZE) { \ W_ __new_bd; \ - ("ptr" __new_bd) = foreign "C" allocBlock_lock() [regs]; \ + ("ptr" __new_bd) = foreign "C" allocBlock_lock(); \ bdescr_link(__new_bd) = __bd; \ __bd = __new_bd; \ W_[mut_list] = __bd; \ @@ -609,13 +748,13 @@ W_[free] = p; \ bdescr_free(__bd) = free + WDS(1); -#define recordMutable(p, regs) \ +#define recordMutable(p) \ P_ __p; \ W_ __bd; \ W_ __gen; \ __p = p; \ __bd = Bdescr(__p); \ __gen = TO_W_(bdescr_gen_no(__bd)); \ - if (__gen > 0) { recordMutableCap(__p, __gen, regs); } + if (__gen > 0) { recordMutableCap(__p, __gen); } #endif /* CMM_H */ diff --git a/includes/Rts.h b/includes/Rts.h index c52fe63d78..b31776828f 100644 --- a/includes/Rts.h +++ b/includes/Rts.h @@ -208,7 +208,6 @@ INLINE_HEADER Time fsecondsToTime (double t) #include "rts/storage/FunTypes.h" #include "rts/storage/InfoTables.h" #include "rts/storage/Closures.h" -#include "rts/storage/Liveness.h" #include "rts/storage/ClosureTypes.h" #include "rts/storage/TSO.h" #include "stg/MiscClosures.h" /* InfoTables, closures etc. defined in the RTS */ diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h index cd741be7e0..2fab041c22 100644 --- a/includes/rts/Constants.h +++ b/includes/rts/Constants.h @@ -118,11 +118,6 @@ pushed in one of the heap check fragments in HeapStackCheck.hc (ie. currently the generic heap checks - 3 words for StgRetDyn, 18 words for the saved registers, see StgMacros.h). - - In the event of an unboxed tuple or let-no-escape stack/heap check - failure, there will be other words on the stack which are covered - by the RET_DYN frame. These will have been accounted for by stack - checks however, so we don't need to allow for them here. -------------------------------------------------------------------------- */ #define RESERVED_STACK_WORDS 21 @@ -277,25 +272,6 @@ */ #define TSO_SQUEEZED 128 -/* ----------------------------------------------------------------------------- - RET_DYN stack frames - -------------------------------------------------------------------------- */ - -/* VERY MAGIC CONSTANTS! - * must agree with code in HeapStackCheck.c, stg_gen_chk, and - * RESERVED_STACK_WORDS in Constants.h. - */ -#define RET_DYN_BITMAP_SIZE 8 -#define RET_DYN_NONPTR_REGS_SIZE 10 - -/* Sanity check that RESERVED_STACK_WORDS is reasonable. We can't - * just derive RESERVED_STACK_WORDS because it's used in Haskell code - * too. - */ -#if RESERVED_STACK_WORDS != (3 + RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE) -#error RESERVED_STACK_WORDS may be wrong! -#endif - /* * The number of times we spin in a spin lock before yielding (see * #3758). To tune this value, use the benchmark in #3758: run the diff --git a/includes/rts/storage/ClosureMacros.h b/includes/rts/storage/ClosureMacros.h index 6fdd55727a..dd5f428135 100644 --- a/includes/rts/storage/ClosureMacros.h +++ b/includes/rts/storage/ClosureMacros.h @@ -410,14 +410,6 @@ EXTERN_INLINE StgWord stack_frame_sizeW( StgClosure *frame ) info = get_ret_itbl(frame); switch (info->i.type) { - case RET_DYN: - { - StgRetDyn *dyn = (StgRetDyn *)frame; - return sizeofW(StgRetDyn) + RET_DYN_BITMAP_SIZE + - RET_DYN_NONPTR_REGS_SIZE + - RET_DYN_PTRS(dyn->liveness) + RET_DYN_NONPTRS(dyn->liveness); - } - case RET_FUN: return sizeofW(StgRetFun) + ((StgRetFun *)frame)->size; diff --git a/includes/rts/storage/ClosureTypes.h b/includes/rts/storage/ClosureTypes.h index 75ec08bf18..4e3b1e6a72 100644 --- a/includes/rts/storage/ClosureTypes.h +++ b/includes/rts/storage/ClosureTypes.h @@ -52,33 +52,32 @@ #define RET_BCO 31 #define RET_SMALL 32 #define RET_BIG 33 -#define RET_DYN 34 -#define RET_FUN 35 -#define UPDATE_FRAME 36 -#define CATCH_FRAME 37 -#define UNDERFLOW_FRAME 38 -#define STOP_FRAME 39 -#define BLOCKING_QUEUE 40 -#define BLACKHOLE 41 -#define MVAR_CLEAN 42 -#define MVAR_DIRTY 43 -#define ARR_WORDS 44 -#define MUT_ARR_PTRS_CLEAN 45 -#define MUT_ARR_PTRS_DIRTY 46 -#define MUT_ARR_PTRS_FROZEN0 47 -#define MUT_ARR_PTRS_FROZEN 48 -#define MUT_VAR_CLEAN 49 -#define MUT_VAR_DIRTY 50 -#define WEAK 51 -#define PRIM 52 -#define MUT_PRIM 53 -#define TSO 54 -#define STACK 55 -#define TREC_CHUNK 56 -#define ATOMICALLY_FRAME 57 -#define CATCH_RETRY_FRAME 58 -#define CATCH_STM_FRAME 59 -#define WHITEHOLE 60 -#define N_CLOSURE_TYPES 61 +#define RET_FUN 34 +#define UPDATE_FRAME 35 +#define CATCH_FRAME 36 +#define UNDERFLOW_FRAME 37 +#define STOP_FRAME 38 +#define BLOCKING_QUEUE 39 +#define BLACKHOLE 40 +#define MVAR_CLEAN 41 +#define MVAR_DIRTY 42 +#define ARR_WORDS 43 +#define MUT_ARR_PTRS_CLEAN 44 +#define MUT_ARR_PTRS_DIRTY 45 +#define MUT_ARR_PTRS_FROZEN0 46 +#define MUT_ARR_PTRS_FROZEN 47 +#define MUT_VAR_CLEAN 48 +#define MUT_VAR_DIRTY 49 +#define WEAK 50 +#define PRIM 51 +#define MUT_PRIM 52 +#define TSO 53 +#define STACK 54 +#define TREC_CHUNK 55 +#define ATOMICALLY_FRAME 56 +#define CATCH_RETRY_FRAME 57 +#define CATCH_STM_FRAME 58 +#define WHITEHOLE 59 +#define N_CLOSURE_TYPES 60 #endif /* RTS_STORAGE_CLOSURETYPES_H */ diff --git a/includes/rts/storage/Closures.h b/includes/rts/storage/Closures.h index 5f4f03541f..fcba1ebeb6 100644 --- a/includes/rts/storage/Closures.h +++ b/includes/rts/storage/Closures.h @@ -240,60 +240,6 @@ typedef struct { #define BCO_BITMAP_SIZEW(bco) ((BCO_BITMAP_SIZE(bco) + BITS_IN(StgWord) - 1) \ / BITS_IN(StgWord)) -/* ----------------------------------------------------------------------------- - Dynamic stack frames for generic heap checks. - - These generic heap checks are slow, but have the advantage of being - usable in a variety of situations. - - The one restriction is that any relevant SRTs must already be pointed - to from the stack. The return address doesn't need to have an info - table attached: hence it can be any old code pointer. - - The liveness mask contains a 1 at bit n, if register Rn contains a - non-pointer. The contents of all 8 vanilla registers are always saved - on the stack; the liveness mask tells the GC which ones contain - pointers. - - Good places to use a generic heap check: - - - case alternatives (the return address with an SRT is already - on the stack). - - - primitives (no SRT required). - - The stack frame layout for a RET_DYN is like this: - - some pointers |-- RET_DYN_PTRS(liveness) words - some nonpointers |-- RET_DYN_NONPTRS(liveness) words - - L1 \ - D1-2 |-- RET_DYN_NONPTR_REGS_SIZE words - F1-4 / - - R1-8 |-- RET_DYN_BITMAP_SIZE words - - return address \ - liveness mask |-- StgRetDyn structure - stg_gen_chk_info / - - we assume that the size of a double is always 2 pointers (wasting a - word when it is only one pointer, but avoiding lots of #ifdefs). - - See Liveness.h for the macros (RET_DYN_PTRS() etc.). - - NOTE: if you change the layout of RET_DYN stack frames, then you - might also need to adjust the value of RESERVED_STACK_WORDS in - Constants.h. - -------------------------------------------------------------------------- */ - -typedef struct { - const StgInfoTable* info; - StgWord liveness; - StgWord ret_addr; - StgClosure * payload[FLEXIBLE_ARRAY]; -} StgRetDyn; - /* A function return stack frame: used when saving the state for a * garbage collection at a function entry point. The function * arguments are on the stack, and we also save the function (its @@ -430,7 +376,7 @@ typedef struct { typedef struct { StgHeader header; - StgBool running_alt_code; + StgWord running_alt_code; StgClosure *first_code; StgClosure *alt_code; } StgCatchRetryFrame; diff --git a/includes/rts/storage/Liveness.h b/includes/rts/storage/Liveness.h deleted file mode 100644 index 66c82f3134..0000000000 --- a/includes/rts/storage/Liveness.h +++ /dev/null @@ -1,34 +0,0 @@ -/* ----------------------------------------------------------------------------- - * - * (c) The University of Glasgow 2004 - * - * Building liveness masks for RET_DYN stack frames. - * A few macros that are used in both .cmm and .c sources. - * - * A liveness mask is constructed like so: - * - * R1_PTR & R2_PTR & R3_PTR - * - * -------------------------------------------------------------------------- */ - -#ifndef RTS_STORAGE_LIVENESS_H -#define RTS_STORAGE_LIVENESS_H - -#define NO_PTRS 0xff -#define R1_PTR (NO_PTRS ^ (1<<0)) -#define R2_PTR (NO_PTRS ^ (1<<1)) -#define R3_PTR (NO_PTRS ^ (1<<2)) -#define R4_PTR (NO_PTRS ^ (1<<3)) -#define R5_PTR (NO_PTRS ^ (1<<4)) -#define R6_PTR (NO_PTRS ^ (1<<5)) -#define R7_PTR (NO_PTRS ^ (1<<6)) -#define R8_PTR (NO_PTRS ^ (1<<7)) - -#define N_NONPTRS(n) ((n)<<16) -#define N_PTRS(n) ((n)<<24) - -#define RET_DYN_NONPTRS(l) ((l)>>16 & 0xff) -#define RET_DYN_PTRS(l) ((l)>>24 & 0xff) -#define RET_DYN_LIVENESS(l) ((l) & 0xffff) - -#endif /* RTS_STORAGE_LIVENESS_H */ diff --git a/includes/rts/storage/SMPClosureOps.h b/includes/rts/storage/SMPClosureOps.h index 8dee7cbcf9..cd6a789af4 100644 --- a/includes/rts/storage/SMPClosureOps.h +++ b/includes/rts/storage/SMPClosureOps.h @@ -12,7 +12,7 @@ #ifdef CMINUSMINUS #define unlockClosure(ptr,info) \ - prim %write_barrier() []; \ + prim %write_barrier(); \ StgHeader_info(ptr) = info; #else diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index c93cc319c0..b7b24a8632 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -169,23 +169,6 @@ RTS_RET(stg_noforceIO); /* standard selector thunks */ -RTS_RET(stg_sel_ret_0_upd); -RTS_RET(stg_sel_ret_1_upd); -RTS_RET(stg_sel_ret_2_upd); -RTS_RET(stg_sel_ret_3_upd); -RTS_RET(stg_sel_ret_4_upd); -RTS_RET(stg_sel_ret_5_upd); -RTS_RET(stg_sel_ret_6_upd); -RTS_RET(stg_sel_ret_7_upd); -RTS_RET(stg_sel_ret_8_upd); -RTS_RET(stg_sel_ret_9_upd); -RTS_RET(stg_sel_ret_10_upd); -RTS_RET(stg_sel_ret_11_upd); -RTS_RET(stg_sel_ret_12_upd); -RTS_RET(stg_sel_ret_13_upd); -RTS_RET(stg_sel_ret_14_upd); -RTS_RET(stg_sel_ret_15_upd); - RTS_ENTRY(stg_sel_0_upd); RTS_ENTRY(stg_sel_1_upd); RTS_ENTRY(stg_sel_2_upd); @@ -267,45 +250,39 @@ RTS_FUN_DECL(stg_PAP_apply); /* standard GC & stack check entry points, all defined in HeapStackCheck.hc */ -RTS_RET(stg_enter); +RTS_FUN_DECL(stg_gc_noregs); + RTS_RET(stg_enter_checkbh); -RTS_RET(stg_gc_void); +RTS_RET(stg_ret_v); +RTS_RET(stg_ret_p); +RTS_RET(stg_ret_n); +RTS_RET(stg_ret_f); +RTS_RET(stg_ret_d); +RTS_RET(stg_ret_l); +RTS_FUN_DECL(stg_gc_prim_p); +RTS_FUN_DECL(stg_gc_prim_pp); +RTS_FUN_DECL(stg_gc_prim_n); + +RTS_RET(stg_enter); RTS_FUN_DECL(__stg_gc_enter_1); -RTS_FUN_DECL(stg_gc_noregs); -RTS_RET(stg_gc_unpt_r1); RTS_FUN_DECL(stg_gc_unpt_r1); - -RTS_RET(stg_gc_unbx_r1); RTS_FUN_DECL(stg_gc_unbx_r1); - -RTS_RET(stg_gc_f1); RTS_FUN_DECL(stg_gc_f1); - -RTS_RET(stg_gc_d1); RTS_FUN_DECL(stg_gc_d1); - -RTS_RET(stg_gc_l1); RTS_FUN_DECL(stg_gc_l1); +RTS_FUN_DECL(stg_gc_pp); +RTS_FUN_DECL(stg_gc_ppp); +RTS_FUN_DECL(stg_gc_pppp); RTS_RET(stg_gc_fun); RTS_FUN_DECL(__stg_gc_fun); -RTS_RET(stg_gc_gen); -RTS_FUN_DECL(stg_gc_gen); - -RTS_RET(stg_ut_1_0_unreg); - -RTS_FUN_DECL(stg_gc_gen_hp); -RTS_FUN_DECL(stg_gc_ut); -RTS_FUN_DECL(stg_gen_yield); RTS_FUN_DECL(stg_yield_noregs); RTS_FUN_DECL(stg_yield_to_interpreter); -RTS_FUN_DECL(stg_gen_block); RTS_FUN_DECL(stg_block_noregs); -RTS_FUN_DECL(stg_block_1); RTS_FUN_DECL(stg_block_blackhole); RTS_FUN_DECL(stg_block_blackhole_finally); RTS_FUN_DECL(stg_block_takemvar); diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h index bf17b7e825..70e93d3234 100644 --- a/includes/stg/Regs.h +++ b/includes/stg/Regs.h @@ -93,10 +93,10 @@ typedef struct { /* * Registers Hp and HpLim are global across the entire system, and are - * copied into the RegTable before executing a thread. + * copied into the RegTable or registers before executing a thread. * - * Registers Sp and SpLim are saved in the TSO for the - * thread, but are copied into the RegTable before executing a thread. + * Registers Sp and SpLim are saved in the TSO for the thread, but are + * copied into the RegTable or registers before executing a thread. * * All other registers are "general purpose", and are used for passing * arguments to functions, and returning values. The code generator @@ -116,45 +116,6 @@ typedef struct { * (pseudo-)registers in those cases. */ -/* - * Locations for saving per-thread registers. - */ - -#define SAVE_Sp (CurrentTSO->sp) -#define SAVE_SpLim (CurrentTSO->splim) - -#define SAVE_Hp (BaseReg->rHp) - -#define SAVE_CurrentTSO (BaseReg->rCurrentTSO) -#define SAVE_CurrentNursery (BaseReg->rCurrentNursery) -#define SAVE_HpAlloc (BaseReg->rHpAlloc) - -/* We sometimes need to save registers across a C-call, eg. if they - * are clobbered in the standard calling convention. We define the - * save locations for all registers in the register table. - */ - -#define SAVE_R1 (BaseReg->rR1) -#define SAVE_R2 (BaseReg->rR2) -#define SAVE_R3 (BaseReg->rR3) -#define SAVE_R4 (BaseReg->rR4) -#define SAVE_R5 (BaseReg->rR5) -#define SAVE_R6 (BaseReg->rR6) -#define SAVE_R7 (BaseReg->rR7) -#define SAVE_R8 (BaseReg->rR8) -#define SAVE_R9 (BaseReg->rR9) -#define SAVE_R10 (BaseReg->rR10) - -#define SAVE_F1 (BaseReg->rF1) -#define SAVE_F2 (BaseReg->rF2) -#define SAVE_F3 (BaseReg->rF3) -#define SAVE_F4 (BaseReg->rF4) - -#define SAVE_D1 (BaseReg->rD1) -#define SAVE_D2 (BaseReg->rD2) - -#define SAVE_L1 (BaseReg->rL1) - /* ----------------------------------------------------------------------------- * Emit the GCC-specific register declarations for each machine * register being used. If any STG register isn't mapped to a machine @@ -163,11 +124,6 @@ typedef struct { * First, the general purpose registers. The idea is, if a particular * general-purpose STG register can't be mapped to a real machine * register, it won't be used at all. Instead, we'll use the stack. - * - * This is an improvement on the way things used to be done, when all - * registers were mapped to locations in the register table, and stuff - * was being shifted from the stack to the register table and back - * again for no good reason (on register-poor architectures). */ /* define NO_REGS to omit register declarations - used in RTS C code @@ -402,287 +358,6 @@ GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc) #define stg_gc_enter_1 (FunReg->stgGCEnter1) #define stg_gc_fun (FunReg->stgGCFun) -/* ----------------------------------------------------------------------------- - For any registers which are denoted "caller-saves" by the C calling - convention, we have to emit code to save and restore them across C - calls. - -------------------------------------------------------------------------- */ - -#ifdef CALLER_SAVES_R1 -#define CALLER_SAVE_R1 SAVE_R1 = R1; -#define CALLER_RESTORE_R1 R1 = SAVE_R1; -#else -#define CALLER_SAVE_R1 /* nothing */ -#define CALLER_RESTORE_R1 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R2 -#define CALLER_SAVE_R2 SAVE_R2 = R2; -#define CALLER_RESTORE_R2 R2 = SAVE_R2; -#else -#define CALLER_SAVE_R2 /* nothing */ -#define CALLER_RESTORE_R2 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R3 -#define CALLER_SAVE_R3 SAVE_R3 = R3; -#define CALLER_RESTORE_R3 R3 = SAVE_R3; -#else -#define CALLER_SAVE_R3 /* nothing */ -#define CALLER_RESTORE_R3 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R4 -#define CALLER_SAVE_R4 SAVE_R4 = R4; -#define CALLER_RESTORE_R4 R4 = SAVE_R4; -#else -#define CALLER_SAVE_R4 /* nothing */ -#define CALLER_RESTORE_R4 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R5 -#define CALLER_SAVE_R5 SAVE_R5 = R5; -#define CALLER_RESTORE_R5 R5 = SAVE_R5; -#else -#define CALLER_SAVE_R5 /* nothing */ -#define CALLER_RESTORE_R5 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R6 -#define CALLER_SAVE_R6 SAVE_R6 = R6; -#define CALLER_RESTORE_R6 R6 = SAVE_R6; -#else -#define CALLER_SAVE_R6 /* nothing */ -#define CALLER_RESTORE_R6 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R7 -#define CALLER_SAVE_R7 SAVE_R7 = R7; -#define CALLER_RESTORE_R7 R7 = SAVE_R7; -#else -#define CALLER_SAVE_R7 /* nothing */ -#define CALLER_RESTORE_R7 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R8 -#define CALLER_SAVE_R8 SAVE_R8 = R8; -#define CALLER_RESTORE_R8 R8 = SAVE_R8; -#else -#define CALLER_SAVE_R8 /* nothing */ -#define CALLER_RESTORE_R8 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R9 -#define CALLER_SAVE_R9 SAVE_R9 = R9; -#define CALLER_RESTORE_R9 R9 = SAVE_R9; -#else -#define CALLER_SAVE_R9 /* nothing */ -#define CALLER_RESTORE_R9 /* nothing */ -#endif - -#ifdef CALLER_SAVES_R10 -#define CALLER_SAVE_R10 SAVE_R10 = R10; -#define CALLER_RESTORE_R10 R10 = SAVE_R10; -#else -#define CALLER_SAVE_R10 /* nothing */ -#define CALLER_RESTORE_R10 /* nothing */ -#endif - -#ifdef CALLER_SAVES_F1 -#define CALLER_SAVE_F1 SAVE_F1 = F1; -#define CALLER_RESTORE_F1 F1 = SAVE_F1; -#else -#define CALLER_SAVE_F1 /* nothing */ -#define CALLER_RESTORE_F1 /* nothing */ -#endif - -#ifdef CALLER_SAVES_F2 -#define CALLER_SAVE_F2 SAVE_F2 = F2; -#define CALLER_RESTORE_F2 F2 = SAVE_F2; -#else -#define CALLER_SAVE_F2 /* nothing */ -#define CALLER_RESTORE_F2 /* nothing */ -#endif - -#ifdef CALLER_SAVES_F3 -#define CALLER_SAVE_F3 SAVE_F3 = F3; -#define CALLER_RESTORE_F3 F3 = SAVE_F3; -#else -#define CALLER_SAVE_F3 /* nothing */ -#define CALLER_RESTORE_F3 /* nothing */ -#endif - -#ifdef CALLER_SAVES_F4 -#define CALLER_SAVE_F4 SAVE_F4 = F4; -#define CALLER_RESTORE_F4 F4 = SAVE_F4; -#else -#define CALLER_SAVE_F4 /* nothing */ -#define CALLER_RESTORE_F4 /* nothing */ -#endif - -#ifdef CALLER_SAVES_D1 -#define CALLER_SAVE_D1 SAVE_D1 = D1; -#define CALLER_RESTORE_D1 D1 = SAVE_D1; -#else -#define CALLER_SAVE_D1 /* nothing */ -#define CALLER_RESTORE_D1 /* nothing */ -#endif - -#ifdef CALLER_SAVES_D2 -#define CALLER_SAVE_D2 SAVE_D2 = D2; -#define CALLER_RESTORE_D2 D2 = SAVE_D2; -#else -#define CALLER_SAVE_D2 /* nothing */ -#define CALLER_RESTORE_D2 /* nothing */ -#endif - -#ifdef CALLER_SAVES_L1 -#define CALLER_SAVE_L1 SAVE_L1 = L1; -#define CALLER_RESTORE_L1 L1 = SAVE_L1; -#else -#define CALLER_SAVE_L1 /* nothing */ -#define CALLER_RESTORE_L1 /* nothing */ -#endif - -#ifdef CALLER_SAVES_Sp -#define CALLER_SAVE_Sp SAVE_Sp = Sp; -#define CALLER_RESTORE_Sp Sp = SAVE_Sp; -#else -#define CALLER_SAVE_Sp /* nothing */ -#define CALLER_RESTORE_Sp /* nothing */ -#endif - -#ifdef CALLER_SAVES_SpLim -#define CALLER_SAVE_SpLim SAVE_SpLim = SpLim; -#define CALLER_RESTORE_SpLim SpLim = SAVE_SpLim; -#else -#define CALLER_SAVE_SpLim /* nothing */ -#define CALLER_RESTORE_SpLim /* nothing */ -#endif - -#ifdef CALLER_SAVES_Hp -#define CALLER_SAVE_Hp SAVE_Hp = Hp; -#define CALLER_RESTORE_Hp Hp = SAVE_Hp; -#else -#define CALLER_SAVE_Hp /* nothing */ -#define CALLER_RESTORE_Hp /* nothing */ -#endif - -#ifdef CALLER_SAVES_Base -#ifdef THREADED_RTS -#error "Can't have caller-saved BaseReg with THREADED_RTS" -#endif -#define CALLER_SAVE_Base /* nothing */ -#define CALLER_RESTORE_Base BaseReg = &MainRegTable; -#else -#define CALLER_SAVE_Base /* nothing */ -#define CALLER_RESTORE_Base /* nothing */ -#endif - -#ifdef CALLER_SAVES_CurrentTSO -#define CALLER_SAVE_CurrentTSO SAVE_CurrentTSO = CurrentTSO; -#define CALLER_RESTORE_CurrentTSO CurrentTSO = SAVE_CurrentTSO; -#else -#define CALLER_SAVE_CurrentTSO /* nothing */ -#define CALLER_RESTORE_CurrentTSO /* nothing */ -#endif - -#ifdef CALLER_SAVES_CurrentNursery -#define CALLER_SAVE_CurrentNursery SAVE_CurrentNursery = CurrentNursery; -#define CALLER_RESTORE_CurrentNursery CurrentNursery = SAVE_CurrentNursery; -#else -#define CALLER_SAVE_CurrentNursery /* nothing */ -#define CALLER_RESTORE_CurrentNursery /* nothing */ -#endif - -#ifdef CALLER_SAVES_HpAlloc -#define CALLER_SAVE_HpAlloc SAVE_HpAlloc = HpAlloc; -#define CALLER_RESTORE_HpAlloc HpAlloc = SAVE_HpAlloc; -#else -#define CALLER_SAVE_HpAlloc /* nothing */ -#define CALLER_RESTORE_HpAlloc /* nothing */ -#endif - #endif /* IN_STG_CODE */ -/* ---------------------------------------------------------------------------- - Handy bunches of saves/restores - ------------------------------------------------------------------------ */ - -#if IN_STG_CODE - -#define CALLER_SAVE_USER \ - CALLER_SAVE_R1 \ - CALLER_SAVE_R2 \ - CALLER_SAVE_R3 \ - CALLER_SAVE_R4 \ - CALLER_SAVE_R5 \ - CALLER_SAVE_R6 \ - CALLER_SAVE_R7 \ - CALLER_SAVE_R8 \ - CALLER_SAVE_R9 \ - CALLER_SAVE_R10 \ - CALLER_SAVE_F1 \ - CALLER_SAVE_F2 \ - CALLER_SAVE_F3 \ - CALLER_SAVE_F4 \ - CALLER_SAVE_D1 \ - CALLER_SAVE_D2 \ - CALLER_SAVE_L1 - - /* Save Base last, since the others may - be addressed relative to it */ -#define CALLER_SAVE_SYSTEM \ - CALLER_SAVE_Sp \ - CALLER_SAVE_SpLim \ - CALLER_SAVE_Hp \ - CALLER_SAVE_CurrentTSO \ - CALLER_SAVE_CurrentNursery \ - CALLER_SAVE_Base - -#define CALLER_RESTORE_USER \ - CALLER_RESTORE_R1 \ - CALLER_RESTORE_R2 \ - CALLER_RESTORE_R3 \ - CALLER_RESTORE_R4 \ - CALLER_RESTORE_R5 \ - CALLER_RESTORE_R6 \ - CALLER_RESTORE_R7 \ - CALLER_RESTORE_R8 \ - CALLER_RESTORE_R9 \ - CALLER_RESTORE_R10 \ - CALLER_RESTORE_F1 \ - CALLER_RESTORE_F2 \ - CALLER_RESTORE_F3 \ - CALLER_RESTORE_F4 \ - CALLER_RESTORE_D1 \ - CALLER_RESTORE_D2 \ - CALLER_RESTORE_L1 - - /* Restore Base first, since the others may - be addressed relative to it */ -#define CALLER_RESTORE_SYSTEM \ - CALLER_RESTORE_Base \ - CALLER_RESTORE_Sp \ - CALLER_RESTORE_SpLim \ - CALLER_RESTORE_Hp \ - CALLER_RESTORE_CurrentTSO \ - CALLER_RESTORE_CurrentNursery - -#else /* not IN_STG_CODE */ - -#define CALLER_SAVE_USER /* nothing */ -#define CALLER_SAVE_SYSTEM /* nothing */ -#define CALLER_RESTORE_USER /* nothing */ -#define CALLER_RESTORE_SYSTEM /* nothing */ - -#endif /* IN_STG_CODE */ -#define CALLER_SAVE_ALL \ - CALLER_SAVE_SYSTEM \ - CALLER_SAVE_USER - -#define CALLER_RESTORE_ALL \ - CALLER_RESTORE_SYSTEM \ - CALLER_RESTORE_USER - #endif /* REGS_H */ |