summaryrefslogtreecommitdiff
path: root/includes
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-10-03 09:30:56 +0100
committerSimon Marlow <marlowsd@gmail.com>2012-10-08 09:04:40 +0100
commita7c0387d20c1c9994d1100b14fbb8fb4e28a259e (patch)
treeb95d0a512f951a4a463f1aa5178b0cd5c4fdb410 /includes
parentaed37acd4d157791381800d5de960a2461bcbef3 (diff)
downloadhaskell-a7c0387d20c1c9994d1100b14fbb8fb4e28a259e.tar.gz
Produce new-style Cmm from the Cmm parser
The main change here is that the Cmm parser now allows high-level cmm code with argument-passing and function calls. For example: foo ( gcptr a, bits32 b ) { if (b > 0) { // we can make tail calls passing arguments: jump stg_ap_0_fast(a); } return (x,y); } More details on the new cmm syntax are in Note [Syntax of .cmm files] in CmmParse.y. The old syntax is still more-or-less supported for those occasional code fragments that really need to explicitly manipulate the stack. However there are a couple of differences: it is now obligatory to give a list of live GlobalRegs on every jump, e.g. jump %ENTRY_CODE(Sp(0)) [R1]; Again, more details in Note [Syntax of .cmm files]. I have rewritten most of the .cmm files in the RTS into the new syntax, except for AutoApply.cmm which is generated by the genapply program: this file could be generated in the new syntax instead and would probably be better off for it, but I ran out of enthusiasm. Some other changes in this batch: - The PrimOp calling convention is gone, primops now use the ordinary NativeNodeCall convention. This means that primops and "foreign import prim" code must be written in high-level cmm, but they can now take more than 10 arguments. - CmmSink now does constant-folding (should fix #7219) - .cmm files now go through the cmmPipeline, and as a result we generate better code in many cases. All the object files generated for the RTS .cmm files are now smaller. Performance should be better too, but I haven't measured it yet. - RET_DYN frames are removed from the RTS, lots of code goes away - we now have some more canned GC points to cover unboxed-tuples with 2-4 pointers, which will reduce code size a little.
Diffstat (limited to 'includes')
-rw-r--r--includes/Cmm.h293
-rw-r--r--includes/Rts.h1
-rw-r--r--includes/rts/Constants.h24
-rw-r--r--includes/rts/storage/ClosureMacros.h8
-rw-r--r--includes/rts/storage/ClosureTypes.h55
-rw-r--r--includes/rts/storage/Closures.h56
-rw-r--r--includes/rts/storage/Liveness.h34
-rw-r--r--includes/rts/storage/SMPClosureOps.h2
-rw-r--r--includes/stg/MiscClosures.h55
-rw-r--r--includes/stg/Regs.h331
10 files changed, 264 insertions, 595 deletions
diff --git a/includes/Cmm.h b/includes/Cmm.h
index edcf46e7c0..afe08a26a3 100644
--- a/includes/Cmm.h
+++ b/includes/Cmm.h
@@ -9,36 +9,6 @@
*
* For the syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
*
- * If you're used to the old HC file syntax, here's a quick cheat sheet
- * for converting HC code:
- *
- * - Remove FB_/FE_
- * - Remove all type casts
- * - Remove '&'
- * - STGFUN(foo) { ... } ==> foo { ... }
- * - FN_(foo) { ... } ==> foo { ... }
- * - JMP_(e) ==> jump e;
- * - Remove EXTFUN(foo)
- * - Sp[n] ==> Sp(n)
- * - Hp[n] ==> Hp(n)
- * - Sp += n ==> Sp_adj(n)
- * - Hp += n ==> Hp_adj(n)
- * - R1.i ==> R1 (similarly for R1.w, R1.cl etc.)
- * - You need to explicitly dereference variables; eg.
- * alloc_blocks ==> CInt[alloc_blocks]
- * - convert all word offsets into byte offsets:
- * - e ==> WDS(e)
- * - sizeofW(StgFoo) ==> SIZEOF_StgFoo
- * - ENTRY_CODE(e) ==> %ENTRY_CODE(e)
- * - get_itbl(c) ==> %GET_STD_INFO(c)
- * - Change liveness masks in STK_CHK_GEN, HP_CHK_GEN:
- * R1_PTR | R2_PTR ==> R1_PTR & R2_PTR
- * (NOTE: | becomes &)
- * - Declarations like 'StgPtr p;' become just 'W_ p;'
- * - e->payload[n] ==> PAYLOAD(e,n)
- * - Be very careful with comparisons: the infix versions (>, >=, etc.)
- * are unsigned, so use %lt(a,b) to get signed less-than for example.
- *
* Accessing fields of structures defined in the RTS header files is
* done via automatically-generated macros in DerivedConstants.h. For
* example, where previously we used
@@ -136,6 +106,8 @@
Misc useful stuff
-------------------------------------------------------------------------- */
+#define ccall foreign "C"
+
#define NULL (0::W_)
#define STRING(name,str) \
@@ -210,7 +182,7 @@
#define Sp(n) W_[Sp + WDS(n)]
#define Hp(n) W_[Hp + WDS(n)]
-#define Sp_adj(n) Sp = Sp + WDS(n)
+#define Sp_adj(n) Sp = Sp + WDS(n) /* pronounced "spadge" */
#define Hp_adj(n) Hp = Hp + WDS(n)
/* -----------------------------------------------------------------------------
@@ -278,25 +250,37 @@
#define LOAD_INFO \
info = %INFO_PTR(UNTAG(P1));
-#define UNTAG_R1 \
- P1 = UNTAG(P1);
+#define MAYBE_UNTAG(x) UNTAG(x);
#else
-#define LOAD_INFO \
- if (GETTAG(P1) != 0) { \
- jump %ENTRY_CODE(Sp(0)); \
+#define LOAD_INFO(ret,x) \
+ if (GETTAG(x) != 0) { \
+ ret(x); \
} \
- info = %INFO_PTR(P1);
+ info = %INFO_PTR(x);
-#define UNTAG_R1 /* nothing */
+#define MAYBE_UNTAG(x) (x) /* already untagged */
#endif
-#define ENTER() \
+// We need two versions of ENTER():
+// - ENTER(x) takes the closure as an argument and uses return(),
+// for use in civilized code where the stack is handled by GHC
+//
+// - ENTER_NOSTACK() where the closure is in R1, and returns are
+// explicit jumps, for use when we are doing the stack management
+// ourselves.
+
+#define ENTER(x) ENTER_(return,x)
+#define ENTER_R1() ENTER_(RET_R1,R1)
+
+#define RET_R1(x) jump %ENTRY_CODE(Sp(0)) [R1]
+
+#define ENTER_(ret,x) \
again: \
W_ info; \
- LOAD_INFO \
+ LOAD_INFO(ret,x) \
switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \
(TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \
case \
@@ -304,7 +288,7 @@
IND_PERM, \
IND_STATIC: \
{ \
- P1 = StgInd_indirectee(P1); \
+ x = StgInd_indirectee(x); \
goto again; \
} \
case \
@@ -318,12 +302,12 @@
BCO, \
PAP: \
{ \
- jump %ENTRY_CODE(Sp(0)); \
+ ret(x); \
} \
default: \
{ \
- UNTAG_R1 \
- jump %ENTRY_CODE(info); \
+ x = MAYBE_UNTAG(x); \
+ jump %ENTRY_CODE(info) (x); \
} \
}
@@ -348,7 +332,6 @@
*/
#include "stg/RtsMachRegs.h"
-#include "rts/storage/Liveness.h"
#include "rts/prof/LDV.h"
#undef BLOCK_SIZE
@@ -359,6 +342,18 @@
#define MyCapability() (BaseReg - OFFSET_Capability_r)
/* -------------------------------------------------------------------------
+ Info tables
+ ------------------------------------------------------------------------- */
+
+#if defined(PROFILING)
+#define PROF_HDR_FIELDS(w_) \
+ w_ prof_hdr_1, \
+ w_ prof_hdr_2,
+#else
+#define PROF_HDR_FIELDS(w_) /* nothing */
+#endif
+
+/* -------------------------------------------------------------------------
Allocation and garbage collection
------------------------------------------------------------------------- */
@@ -371,30 +366,134 @@
* ticky-ticky. It's not clear whether eg. the size field of an array
* should be counted as "admin", or the various fields of a BCO.
*/
-#define ALLOC_PRIM(bytes,liveness,reentry) \
- HP_CHK_GEN_TICKY(bytes,liveness,reentry); \
+#define ALLOC_PRIM(bytes) \
+ HP_CHK_GEN_TICKY(bytes); \
TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
CCCS_ALLOC(bytes);
+#define HEAP_CHECK(bytes,failure) \
+ Hp = Hp + bytes; \
+ if (Hp > HpLim) { HpAlloc = bytes; failure; } \
+ TICK_ALLOC_HEAP_NOCTR(bytes);
+
+#define ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,failure) \
+ HEAP_CHECK(bytes,failure) \
+ TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
+ CCCS_ALLOC(bytes);
+
+#define ALLOC_PRIM_P(bytes,fun,arg) \
+ ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_P(fun,arg));
+
+#define ALLOC_PRIM_N(bytes,fun,arg) \
+ ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_N(fun,arg));
+
/* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */
#define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS)
-#define HP_CHK_GEN_TICKY(alloc,liveness,reentry) \
- HP_CHK_GEN(alloc,liveness,reentry); \
+#define HP_CHK_GEN_TICKY(alloc) \
+ HP_CHK_GEN(alloc); \
TICK_ALLOC_HEAP_NOCTR(alloc);
+#define HP_CHK_P(bytes, fun, arg) \
+ HEAP_CHECK(bytes, GC_PRIM_P(fun,arg))
+
+#define ALLOC_P_TICKY(alloc, fun, arg) \
+ HP_CHK_P(alloc); \
+ TICK_ALLOC_HEAP_NOCTR(alloc);
+
+#define CHECK_GC() \
+ (bdescr_link(CurrentNursery) == NULL || \
+ generation_n_new_large_words(W_[g0]) >= TO_W_(CLong[large_alloc_lim]))
+
// allocate() allocates from the nursery, so we check to see
// whether the nursery is nearly empty in any function that uses
// allocate() - this includes many of the primops.
-#define MAYBE_GC(liveness,reentry) \
- if (bdescr_link(CurrentNursery) == NULL || \
- generation_n_new_large_words(W_[g0]) >= TO_W_(CLong[large_alloc_lim])) { \
- R9 = liveness; \
- R10 = reentry; \
- HpAlloc = 0; \
- jump stg_gc_gen_hp; \
+//
+// HACK alert: the __L__ stuff is here to coax the common-block
+// eliminator into commoning up the call stg_gc_noregs() with the same
+// code that gets generated by a STK_CHK_GEN() in the same proc. We
+// also need an if (0) { goto __L__; } so that the __L__ label isn't
+// optimised away by the control-flow optimiser prior to common-block
+// elimination (it will be optimised away later).
+//
+// This saves some code in gmp-wrappers.cmm where we have lots of
+// MAYBE_GC() in the same proc as STK_CHK_GEN().
+//
+#define MAYBE_GC(retry) \
+ if (CHECK_GC()) { \
+ HpAlloc = 0; \
+ goto __L__; \
+ __L__: \
+ call stg_gc_noregs(); \
+ goto retry; \
+ } \
+ if (0) { goto __L__; }
+
+#define GC_PRIM(fun) \
+ R9 = fun; \
+ jump stg_gc_prim();
+
+#define GC_PRIM_N(fun,arg) \
+ R9 = fun; \
+ jump stg_gc_prim_n(arg);
+
+#define GC_PRIM_P(fun,arg) \
+ R9 = fun; \
+ jump stg_gc_prim_p(arg);
+
+#define GC_PRIM_PP(fun,arg1,arg2) \
+ R9 = fun; \
+ jump stg_gc_prim_pp(arg1,arg2);
+
+#define MAYBE_GC_(fun) \
+ if (CHECK_GC()) { \
+ HpAlloc = 0; \
+ GC_PRIM(fun) \
+ }
+
+#define MAYBE_GC_N(fun,arg) \
+ if (CHECK_GC()) { \
+ HpAlloc = 0; \
+ GC_PRIM_N(fun,arg) \
+ }
+
+#define MAYBE_GC_P(fun,arg) \
+ if (CHECK_GC()) { \
+ HpAlloc = 0; \
+ GC_PRIM_P(fun,arg) \
}
+#define MAYBE_GC_PP(fun,arg1,arg2) \
+ if (CHECK_GC()) { \
+ HpAlloc = 0; \
+ GC_PRIM_PP(fun,arg1,arg2) \
+ }
+
+#define STK_CHK(n, fun) \
+ if (Sp - n < SpLim) { \
+ GC_PRIM(fun) \
+ }
+
+#define STK_CHK_P(n, fun, arg) \
+ if (Sp - n < SpLim) { \
+ GC_PRIM_P(fun,arg) \
+ }
+
+#define STK_CHK_PP(n, fun, arg1, arg2) \
+ if (Sp - n < SpLim) { \
+ GC_PRIM_PP(fun,arg1,arg2) \
+ }
+
+#define STK_CHK_ENTER(n, closure) \
+ if (Sp - n < SpLim) { \
+ jump __stg_gc_enter_1(closure); \
+ }
+
+// A funky heap check used by AutoApply.cmm
+
+#define HP_CHK_NP_ASSIGN_SP0(size,f) \
+ HEAP_CHECK(size, Sp(0) = f; jump __stg_gc_enter_1 [R1];)
+
/* -----------------------------------------------------------------------------
Closure headers
-------------------------------------------------------------------------- */
@@ -481,23 +580,6 @@
#endif
/* -----------------------------------------------------------------------------
- Voluntary Yields/Blocks
-
- We only have a generic version of this at the moment - if it turns
- out to be slowing us down we can make specialised ones.
- -------------------------------------------------------------------------- */
-
-#define YIELD(liveness,reentry) \
- R9 = liveness; \
- R10 = reentry; \
- jump stg_gen_yield;
-
-#define BLOCK(liveness,reentry) \
- R9 = liveness; \
- R10 = reentry; \
- jump stg_gen_block;
-
-/* -----------------------------------------------------------------------------
Ticky macros
-------------------------------------------------------------------------- */
@@ -585,6 +667,63 @@
TICK_BUMP_BY(ALLOC_HEAP_tot,n)
/* -----------------------------------------------------------------------------
+ Saving and restoring STG registers
+
+ STG registers must be saved around a C call, just in case the STG
+ register is mapped to a caller-saves machine register. Normally we
+ don't need to worry about this the code generator has already
+ loaded any live STG registers into variables for us, but in
+ hand-written low-level Cmm code where we don't know which registers
+ are live, we might have to save them all.
+ -------------------------------------------------------------------------- */
+
+#define SAVE_STGREGS \
+ W_ r1, r2, r3, r4, r5, r6, r7, r8; \
+ F_ f1, f2, f3, f4; \
+ D_ d1, d2; \
+ L_ l1; \
+ \
+ r1 = R1; \
+ r2 = R2; \
+ r3 = R3; \
+ r4 = R4; \
+ r5 = R5; \
+ r6 = R6; \
+ r7 = R7; \
+ r8 = R8; \
+ \
+ f1 = F1; \
+ f2 = F2; \
+ f3 = F3; \
+ f4 = F4; \
+ \
+ d1 = D1; \
+ d2 = D2; \
+ \
+ l1 = L1;
+
+
+#define RESTORE_STGREGS \
+ R1 = r1; \
+ R2 = r2; \
+ R3 = r3; \
+ R4 = r4; \
+ R5 = r5; \
+ R6 = r6; \
+ R7 = r7; \
+ R8 = r8; \
+ \
+ F1 = f1; \
+ F2 = f2; \
+ F3 = f3; \
+ F4 = f4; \
+ \
+ D1 = d1; \
+ D2 = d2; \
+ \
+ L1 = l1;
+
+/* -----------------------------------------------------------------------------
Misc junk
-------------------------------------------------------------------------- */
@@ -592,14 +731,14 @@
#define END_TSO_QUEUE stg_END_TSO_QUEUE_closure
#define END_INVARIANT_CHECK_QUEUE stg_END_INVARIANT_CHECK_QUEUE_closure
-#define recordMutableCap(p, gen, regs) \
+#define recordMutableCap(p, gen) \
W_ __bd; \
W_ mut_list; \
mut_list = Capability_mut_lists(MyCapability()) + WDS(gen); \
__bd = W_[mut_list]; \
if (bdescr_free(__bd) >= bdescr_start(__bd) + BLOCK_SIZE) { \
W_ __new_bd; \
- ("ptr" __new_bd) = foreign "C" allocBlock_lock() [regs]; \
+ ("ptr" __new_bd) = foreign "C" allocBlock_lock(); \
bdescr_link(__new_bd) = __bd; \
__bd = __new_bd; \
W_[mut_list] = __bd; \
@@ -609,13 +748,13 @@
W_[free] = p; \
bdescr_free(__bd) = free + WDS(1);
-#define recordMutable(p, regs) \
+#define recordMutable(p) \
P_ __p; \
W_ __bd; \
W_ __gen; \
__p = p; \
__bd = Bdescr(__p); \
__gen = TO_W_(bdescr_gen_no(__bd)); \
- if (__gen > 0) { recordMutableCap(__p, __gen, regs); }
+ if (__gen > 0) { recordMutableCap(__p, __gen); }
#endif /* CMM_H */
diff --git a/includes/Rts.h b/includes/Rts.h
index c52fe63d78..b31776828f 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -208,7 +208,6 @@ INLINE_HEADER Time fsecondsToTime (double t)
#include "rts/storage/FunTypes.h"
#include "rts/storage/InfoTables.h"
#include "rts/storage/Closures.h"
-#include "rts/storage/Liveness.h"
#include "rts/storage/ClosureTypes.h"
#include "rts/storage/TSO.h"
#include "stg/MiscClosures.h" /* InfoTables, closures etc. defined in the RTS */
diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h
index cd741be7e0..2fab041c22 100644
--- a/includes/rts/Constants.h
+++ b/includes/rts/Constants.h
@@ -118,11 +118,6 @@
pushed in one of the heap check fragments in HeapStackCheck.hc
(ie. currently the generic heap checks - 3 words for StgRetDyn,
18 words for the saved registers, see StgMacros.h).
-
- In the event of an unboxed tuple or let-no-escape stack/heap check
- failure, there will be other words on the stack which are covered
- by the RET_DYN frame. These will have been accounted for by stack
- checks however, so we don't need to allow for them here.
-------------------------------------------------------------------------- */
#define RESERVED_STACK_WORDS 21
@@ -277,25 +272,6 @@
*/
#define TSO_SQUEEZED 128
-/* -----------------------------------------------------------------------------
- RET_DYN stack frames
- -------------------------------------------------------------------------- */
-
-/* VERY MAGIC CONSTANTS!
- * must agree with code in HeapStackCheck.c, stg_gen_chk, and
- * RESERVED_STACK_WORDS in Constants.h.
- */
-#define RET_DYN_BITMAP_SIZE 8
-#define RET_DYN_NONPTR_REGS_SIZE 10
-
-/* Sanity check that RESERVED_STACK_WORDS is reasonable. We can't
- * just derive RESERVED_STACK_WORDS because it's used in Haskell code
- * too.
- */
-#if RESERVED_STACK_WORDS != (3 + RET_DYN_BITMAP_SIZE + RET_DYN_NONPTR_REGS_SIZE)
-#error RESERVED_STACK_WORDS may be wrong!
-#endif
-
/*
* The number of times we spin in a spin lock before yielding (see
* #3758). To tune this value, use the benchmark in #3758: run the
diff --git a/includes/rts/storage/ClosureMacros.h b/includes/rts/storage/ClosureMacros.h
index 6fdd55727a..dd5f428135 100644
--- a/includes/rts/storage/ClosureMacros.h
+++ b/includes/rts/storage/ClosureMacros.h
@@ -410,14 +410,6 @@ EXTERN_INLINE StgWord stack_frame_sizeW( StgClosure *frame )
info = get_ret_itbl(frame);
switch (info->i.type) {
- case RET_DYN:
- {
- StgRetDyn *dyn = (StgRetDyn *)frame;
- return sizeofW(StgRetDyn) + RET_DYN_BITMAP_SIZE +
- RET_DYN_NONPTR_REGS_SIZE +
- RET_DYN_PTRS(dyn->liveness) + RET_DYN_NONPTRS(dyn->liveness);
- }
-
case RET_FUN:
return sizeofW(StgRetFun) + ((StgRetFun *)frame)->size;
diff --git a/includes/rts/storage/ClosureTypes.h b/includes/rts/storage/ClosureTypes.h
index 75ec08bf18..4e3b1e6a72 100644
--- a/includes/rts/storage/ClosureTypes.h
+++ b/includes/rts/storage/ClosureTypes.h
@@ -52,33 +52,32 @@
#define RET_BCO 31
#define RET_SMALL 32
#define RET_BIG 33
-#define RET_DYN 34
-#define RET_FUN 35
-#define UPDATE_FRAME 36
-#define CATCH_FRAME 37
-#define UNDERFLOW_FRAME 38
-#define STOP_FRAME 39
-#define BLOCKING_QUEUE 40
-#define BLACKHOLE 41
-#define MVAR_CLEAN 42
-#define MVAR_DIRTY 43
-#define ARR_WORDS 44
-#define MUT_ARR_PTRS_CLEAN 45
-#define MUT_ARR_PTRS_DIRTY 46
-#define MUT_ARR_PTRS_FROZEN0 47
-#define MUT_ARR_PTRS_FROZEN 48
-#define MUT_VAR_CLEAN 49
-#define MUT_VAR_DIRTY 50
-#define WEAK 51
-#define PRIM 52
-#define MUT_PRIM 53
-#define TSO 54
-#define STACK 55
-#define TREC_CHUNK 56
-#define ATOMICALLY_FRAME 57
-#define CATCH_RETRY_FRAME 58
-#define CATCH_STM_FRAME 59
-#define WHITEHOLE 60
-#define N_CLOSURE_TYPES 61
+#define RET_FUN 34
+#define UPDATE_FRAME 35
+#define CATCH_FRAME 36
+#define UNDERFLOW_FRAME 37
+#define STOP_FRAME 38
+#define BLOCKING_QUEUE 39
+#define BLACKHOLE 40
+#define MVAR_CLEAN 41
+#define MVAR_DIRTY 42
+#define ARR_WORDS 43
+#define MUT_ARR_PTRS_CLEAN 44
+#define MUT_ARR_PTRS_DIRTY 45
+#define MUT_ARR_PTRS_FROZEN0 46
+#define MUT_ARR_PTRS_FROZEN 47
+#define MUT_VAR_CLEAN 48
+#define MUT_VAR_DIRTY 49
+#define WEAK 50
+#define PRIM 51
+#define MUT_PRIM 52
+#define TSO 53
+#define STACK 54
+#define TREC_CHUNK 55
+#define ATOMICALLY_FRAME 56
+#define CATCH_RETRY_FRAME 57
+#define CATCH_STM_FRAME 58
+#define WHITEHOLE 59
+#define N_CLOSURE_TYPES 60
#endif /* RTS_STORAGE_CLOSURETYPES_H */
diff --git a/includes/rts/storage/Closures.h b/includes/rts/storage/Closures.h
index 5f4f03541f..fcba1ebeb6 100644
--- a/includes/rts/storage/Closures.h
+++ b/includes/rts/storage/Closures.h
@@ -240,60 +240,6 @@ typedef struct {
#define BCO_BITMAP_SIZEW(bco) ((BCO_BITMAP_SIZE(bco) + BITS_IN(StgWord) - 1) \
/ BITS_IN(StgWord))
-/* -----------------------------------------------------------------------------
- Dynamic stack frames for generic heap checks.
-
- These generic heap checks are slow, but have the advantage of being
- usable in a variety of situations.
-
- The one restriction is that any relevant SRTs must already be pointed
- to from the stack. The return address doesn't need to have an info
- table attached: hence it can be any old code pointer.
-
- The liveness mask contains a 1 at bit n, if register Rn contains a
- non-pointer. The contents of all 8 vanilla registers are always saved
- on the stack; the liveness mask tells the GC which ones contain
- pointers.
-
- Good places to use a generic heap check:
-
- - case alternatives (the return address with an SRT is already
- on the stack).
-
- - primitives (no SRT required).
-
- The stack frame layout for a RET_DYN is like this:
-
- some pointers |-- RET_DYN_PTRS(liveness) words
- some nonpointers |-- RET_DYN_NONPTRS(liveness) words
-
- L1 \
- D1-2 |-- RET_DYN_NONPTR_REGS_SIZE words
- F1-4 /
-
- R1-8 |-- RET_DYN_BITMAP_SIZE words
-
- return address \
- liveness mask |-- StgRetDyn structure
- stg_gen_chk_info /
-
- we assume that the size of a double is always 2 pointers (wasting a
- word when it is only one pointer, but avoiding lots of #ifdefs).
-
- See Liveness.h for the macros (RET_DYN_PTRS() etc.).
-
- NOTE: if you change the layout of RET_DYN stack frames, then you
- might also need to adjust the value of RESERVED_STACK_WORDS in
- Constants.h.
- -------------------------------------------------------------------------- */
-
-typedef struct {
- const StgInfoTable* info;
- StgWord liveness;
- StgWord ret_addr;
- StgClosure * payload[FLEXIBLE_ARRAY];
-} StgRetDyn;
-
/* A function return stack frame: used when saving the state for a
* garbage collection at a function entry point. The function
* arguments are on the stack, and we also save the function (its
@@ -430,7 +376,7 @@ typedef struct {
typedef struct {
StgHeader header;
- StgBool running_alt_code;
+ StgWord running_alt_code;
StgClosure *first_code;
StgClosure *alt_code;
} StgCatchRetryFrame;
diff --git a/includes/rts/storage/Liveness.h b/includes/rts/storage/Liveness.h
deleted file mode 100644
index 66c82f3134..0000000000
--- a/includes/rts/storage/Liveness.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* -----------------------------------------------------------------------------
- *
- * (c) The University of Glasgow 2004
- *
- * Building liveness masks for RET_DYN stack frames.
- * A few macros that are used in both .cmm and .c sources.
- *
- * A liveness mask is constructed like so:
- *
- * R1_PTR & R2_PTR & R3_PTR
- *
- * -------------------------------------------------------------------------- */
-
-#ifndef RTS_STORAGE_LIVENESS_H
-#define RTS_STORAGE_LIVENESS_H
-
-#define NO_PTRS 0xff
-#define R1_PTR (NO_PTRS ^ (1<<0))
-#define R2_PTR (NO_PTRS ^ (1<<1))
-#define R3_PTR (NO_PTRS ^ (1<<2))
-#define R4_PTR (NO_PTRS ^ (1<<3))
-#define R5_PTR (NO_PTRS ^ (1<<4))
-#define R6_PTR (NO_PTRS ^ (1<<5))
-#define R7_PTR (NO_PTRS ^ (1<<6))
-#define R8_PTR (NO_PTRS ^ (1<<7))
-
-#define N_NONPTRS(n) ((n)<<16)
-#define N_PTRS(n) ((n)<<24)
-
-#define RET_DYN_NONPTRS(l) ((l)>>16 & 0xff)
-#define RET_DYN_PTRS(l) ((l)>>24 & 0xff)
-#define RET_DYN_LIVENESS(l) ((l) & 0xffff)
-
-#endif /* RTS_STORAGE_LIVENESS_H */
diff --git a/includes/rts/storage/SMPClosureOps.h b/includes/rts/storage/SMPClosureOps.h
index 8dee7cbcf9..cd6a789af4 100644
--- a/includes/rts/storage/SMPClosureOps.h
+++ b/includes/rts/storage/SMPClosureOps.h
@@ -12,7 +12,7 @@
#ifdef CMINUSMINUS
#define unlockClosure(ptr,info) \
- prim %write_barrier() []; \
+ prim %write_barrier(); \
StgHeader_info(ptr) = info;
#else
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h
index c93cc319c0..b7b24a8632 100644
--- a/includes/stg/MiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -169,23 +169,6 @@ RTS_RET(stg_noforceIO);
/* standard selector thunks */
-RTS_RET(stg_sel_ret_0_upd);
-RTS_RET(stg_sel_ret_1_upd);
-RTS_RET(stg_sel_ret_2_upd);
-RTS_RET(stg_sel_ret_3_upd);
-RTS_RET(stg_sel_ret_4_upd);
-RTS_RET(stg_sel_ret_5_upd);
-RTS_RET(stg_sel_ret_6_upd);
-RTS_RET(stg_sel_ret_7_upd);
-RTS_RET(stg_sel_ret_8_upd);
-RTS_RET(stg_sel_ret_9_upd);
-RTS_RET(stg_sel_ret_10_upd);
-RTS_RET(stg_sel_ret_11_upd);
-RTS_RET(stg_sel_ret_12_upd);
-RTS_RET(stg_sel_ret_13_upd);
-RTS_RET(stg_sel_ret_14_upd);
-RTS_RET(stg_sel_ret_15_upd);
-
RTS_ENTRY(stg_sel_0_upd);
RTS_ENTRY(stg_sel_1_upd);
RTS_ENTRY(stg_sel_2_upd);
@@ -267,45 +250,39 @@ RTS_FUN_DECL(stg_PAP_apply);
/* standard GC & stack check entry points, all defined in HeapStackCheck.hc */
-RTS_RET(stg_enter);
+RTS_FUN_DECL(stg_gc_noregs);
+
RTS_RET(stg_enter_checkbh);
-RTS_RET(stg_gc_void);
+RTS_RET(stg_ret_v);
+RTS_RET(stg_ret_p);
+RTS_RET(stg_ret_n);
+RTS_RET(stg_ret_f);
+RTS_RET(stg_ret_d);
+RTS_RET(stg_ret_l);
+RTS_FUN_DECL(stg_gc_prim_p);
+RTS_FUN_DECL(stg_gc_prim_pp);
+RTS_FUN_DECL(stg_gc_prim_n);
+
+RTS_RET(stg_enter);
RTS_FUN_DECL(__stg_gc_enter_1);
-RTS_FUN_DECL(stg_gc_noregs);
-RTS_RET(stg_gc_unpt_r1);
RTS_FUN_DECL(stg_gc_unpt_r1);
-
-RTS_RET(stg_gc_unbx_r1);
RTS_FUN_DECL(stg_gc_unbx_r1);
-
-RTS_RET(stg_gc_f1);
RTS_FUN_DECL(stg_gc_f1);
-
-RTS_RET(stg_gc_d1);
RTS_FUN_DECL(stg_gc_d1);
-
-RTS_RET(stg_gc_l1);
RTS_FUN_DECL(stg_gc_l1);
+RTS_FUN_DECL(stg_gc_pp);
+RTS_FUN_DECL(stg_gc_ppp);
+RTS_FUN_DECL(stg_gc_pppp);
RTS_RET(stg_gc_fun);
RTS_FUN_DECL(__stg_gc_fun);
-RTS_RET(stg_gc_gen);
-RTS_FUN_DECL(stg_gc_gen);
-
-RTS_RET(stg_ut_1_0_unreg);
-
-RTS_FUN_DECL(stg_gc_gen_hp);
-RTS_FUN_DECL(stg_gc_ut);
-RTS_FUN_DECL(stg_gen_yield);
RTS_FUN_DECL(stg_yield_noregs);
RTS_FUN_DECL(stg_yield_to_interpreter);
-RTS_FUN_DECL(stg_gen_block);
RTS_FUN_DECL(stg_block_noregs);
-RTS_FUN_DECL(stg_block_1);
RTS_FUN_DECL(stg_block_blackhole);
RTS_FUN_DECL(stg_block_blackhole_finally);
RTS_FUN_DECL(stg_block_takemvar);
diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h
index bf17b7e825..70e93d3234 100644
--- a/includes/stg/Regs.h
+++ b/includes/stg/Regs.h
@@ -93,10 +93,10 @@ typedef struct {
/*
* Registers Hp and HpLim are global across the entire system, and are
- * copied into the RegTable before executing a thread.
+ * copied into the RegTable or registers before executing a thread.
*
- * Registers Sp and SpLim are saved in the TSO for the
- * thread, but are copied into the RegTable before executing a thread.
+ * Registers Sp and SpLim are saved in the TSO for the thread, but are
+ * copied into the RegTable or registers before executing a thread.
*
* All other registers are "general purpose", and are used for passing
* arguments to functions, and returning values. The code generator
@@ -116,45 +116,6 @@ typedef struct {
* (pseudo-)registers in those cases.
*/
-/*
- * Locations for saving per-thread registers.
- */
-
-#define SAVE_Sp (CurrentTSO->sp)
-#define SAVE_SpLim (CurrentTSO->splim)
-
-#define SAVE_Hp (BaseReg->rHp)
-
-#define SAVE_CurrentTSO (BaseReg->rCurrentTSO)
-#define SAVE_CurrentNursery (BaseReg->rCurrentNursery)
-#define SAVE_HpAlloc (BaseReg->rHpAlloc)
-
-/* We sometimes need to save registers across a C-call, eg. if they
- * are clobbered in the standard calling convention. We define the
- * save locations for all registers in the register table.
- */
-
-#define SAVE_R1 (BaseReg->rR1)
-#define SAVE_R2 (BaseReg->rR2)
-#define SAVE_R3 (BaseReg->rR3)
-#define SAVE_R4 (BaseReg->rR4)
-#define SAVE_R5 (BaseReg->rR5)
-#define SAVE_R6 (BaseReg->rR6)
-#define SAVE_R7 (BaseReg->rR7)
-#define SAVE_R8 (BaseReg->rR8)
-#define SAVE_R9 (BaseReg->rR9)
-#define SAVE_R10 (BaseReg->rR10)
-
-#define SAVE_F1 (BaseReg->rF1)
-#define SAVE_F2 (BaseReg->rF2)
-#define SAVE_F3 (BaseReg->rF3)
-#define SAVE_F4 (BaseReg->rF4)
-
-#define SAVE_D1 (BaseReg->rD1)
-#define SAVE_D2 (BaseReg->rD2)
-
-#define SAVE_L1 (BaseReg->rL1)
-
/* -----------------------------------------------------------------------------
* Emit the GCC-specific register declarations for each machine
* register being used. If any STG register isn't mapped to a machine
@@ -163,11 +124,6 @@ typedef struct {
* First, the general purpose registers. The idea is, if a particular
* general-purpose STG register can't be mapped to a real machine
* register, it won't be used at all. Instead, we'll use the stack.
- *
- * This is an improvement on the way things used to be done, when all
- * registers were mapped to locations in the register table, and stuff
- * was being shifted from the stack to the register table and back
- * again for no good reason (on register-poor architectures).
*/
/* define NO_REGS to omit register declarations - used in RTS C code
@@ -402,287 +358,6 @@ GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc)
#define stg_gc_enter_1 (FunReg->stgGCEnter1)
#define stg_gc_fun (FunReg->stgGCFun)
-/* -----------------------------------------------------------------------------
- For any registers which are denoted "caller-saves" by the C calling
- convention, we have to emit code to save and restore them across C
- calls.
- -------------------------------------------------------------------------- */
-
-#ifdef CALLER_SAVES_R1
-#define CALLER_SAVE_R1 SAVE_R1 = R1;
-#define CALLER_RESTORE_R1 R1 = SAVE_R1;
-#else
-#define CALLER_SAVE_R1 /* nothing */
-#define CALLER_RESTORE_R1 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_R2
-#define CALLER_SAVE_R2 SAVE_R2 = R2;
-#define CALLER_RESTORE_R2 R2 = SAVE_R2;
-#else
-#define CALLER_SAVE_R2 /* nothing */
-#define CALLER_RESTORE_R2 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_R3
-#define CALLER_SAVE_R3 SAVE_R3 = R3;
-#define CALLER_RESTORE_R3 R3 = SAVE_R3;
-#else
-#define CALLER_SAVE_R3 /* nothing */
-#define CALLER_RESTORE_R3 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_R4
-#define CALLER_SAVE_R4 SAVE_R4 = R4;
-#define CALLER_RESTORE_R4 R4 = SAVE_R4;
-#else
-#define CALLER_SAVE_R4 /* nothing */
-#define CALLER_RESTORE_R4 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_R5
-#define CALLER_SAVE_R5 SAVE_R5 = R5;
-#define CALLER_RESTORE_R5 R5 = SAVE_R5;
-#else
-#define CALLER_SAVE_R5 /* nothing */
-#define CALLER_RESTORE_R5 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_R6
-#define CALLER_SAVE_R6 SAVE_R6 = R6;
-#define CALLER_RESTORE_R6 R6 = SAVE_R6;
-#else
-#define CALLER_SAVE_R6 /* nothing */
-#define CALLER_RESTORE_R6 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_R7
-#define CALLER_SAVE_R7 SAVE_R7 = R7;
-#define CALLER_RESTORE_R7 R7 = SAVE_R7;
-#else
-#define CALLER_SAVE_R7 /* nothing */
-#define CALLER_RESTORE_R7 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_R8
-#define CALLER_SAVE_R8 SAVE_R8 = R8;
-#define CALLER_RESTORE_R8 R8 = SAVE_R8;
-#else
-#define CALLER_SAVE_R8 /* nothing */
-#define CALLER_RESTORE_R8 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_R9
-#define CALLER_SAVE_R9 SAVE_R9 = R9;
-#define CALLER_RESTORE_R9 R9 = SAVE_R9;
-#else
-#define CALLER_SAVE_R9 /* nothing */
-#define CALLER_RESTORE_R9 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_R10
-#define CALLER_SAVE_R10 SAVE_R10 = R10;
-#define CALLER_RESTORE_R10 R10 = SAVE_R10;
-#else
-#define CALLER_SAVE_R10 /* nothing */
-#define CALLER_RESTORE_R10 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_F1
-#define CALLER_SAVE_F1 SAVE_F1 = F1;
-#define CALLER_RESTORE_F1 F1 = SAVE_F1;
-#else
-#define CALLER_SAVE_F1 /* nothing */
-#define CALLER_RESTORE_F1 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_F2
-#define CALLER_SAVE_F2 SAVE_F2 = F2;
-#define CALLER_RESTORE_F2 F2 = SAVE_F2;
-#else
-#define CALLER_SAVE_F2 /* nothing */
-#define CALLER_RESTORE_F2 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_F3
-#define CALLER_SAVE_F3 SAVE_F3 = F3;
-#define CALLER_RESTORE_F3 F3 = SAVE_F3;
-#else
-#define CALLER_SAVE_F3 /* nothing */
-#define CALLER_RESTORE_F3 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_F4
-#define CALLER_SAVE_F4 SAVE_F4 = F4;
-#define CALLER_RESTORE_F4 F4 = SAVE_F4;
-#else
-#define CALLER_SAVE_F4 /* nothing */
-#define CALLER_RESTORE_F4 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_D1
-#define CALLER_SAVE_D1 SAVE_D1 = D1;
-#define CALLER_RESTORE_D1 D1 = SAVE_D1;
-#else
-#define CALLER_SAVE_D1 /* nothing */
-#define CALLER_RESTORE_D1 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_D2
-#define CALLER_SAVE_D2 SAVE_D2 = D2;
-#define CALLER_RESTORE_D2 D2 = SAVE_D2;
-#else
-#define CALLER_SAVE_D2 /* nothing */
-#define CALLER_RESTORE_D2 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_L1
-#define CALLER_SAVE_L1 SAVE_L1 = L1;
-#define CALLER_RESTORE_L1 L1 = SAVE_L1;
-#else
-#define CALLER_SAVE_L1 /* nothing */
-#define CALLER_RESTORE_L1 /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_Sp
-#define CALLER_SAVE_Sp SAVE_Sp = Sp;
-#define CALLER_RESTORE_Sp Sp = SAVE_Sp;
-#else
-#define CALLER_SAVE_Sp /* nothing */
-#define CALLER_RESTORE_Sp /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_SpLim
-#define CALLER_SAVE_SpLim SAVE_SpLim = SpLim;
-#define CALLER_RESTORE_SpLim SpLim = SAVE_SpLim;
-#else
-#define CALLER_SAVE_SpLim /* nothing */
-#define CALLER_RESTORE_SpLim /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_Hp
-#define CALLER_SAVE_Hp SAVE_Hp = Hp;
-#define CALLER_RESTORE_Hp Hp = SAVE_Hp;
-#else
-#define CALLER_SAVE_Hp /* nothing */
-#define CALLER_RESTORE_Hp /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_Base
-#ifdef THREADED_RTS
-#error "Can't have caller-saved BaseReg with THREADED_RTS"
-#endif
-#define CALLER_SAVE_Base /* nothing */
-#define CALLER_RESTORE_Base BaseReg = &MainRegTable;
-#else
-#define CALLER_SAVE_Base /* nothing */
-#define CALLER_RESTORE_Base /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_CurrentTSO
-#define CALLER_SAVE_CurrentTSO SAVE_CurrentTSO = CurrentTSO;
-#define CALLER_RESTORE_CurrentTSO CurrentTSO = SAVE_CurrentTSO;
-#else
-#define CALLER_SAVE_CurrentTSO /* nothing */
-#define CALLER_RESTORE_CurrentTSO /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_CurrentNursery
-#define CALLER_SAVE_CurrentNursery SAVE_CurrentNursery = CurrentNursery;
-#define CALLER_RESTORE_CurrentNursery CurrentNursery = SAVE_CurrentNursery;
-#else
-#define CALLER_SAVE_CurrentNursery /* nothing */
-#define CALLER_RESTORE_CurrentNursery /* nothing */
-#endif
-
-#ifdef CALLER_SAVES_HpAlloc
-#define CALLER_SAVE_HpAlloc SAVE_HpAlloc = HpAlloc;
-#define CALLER_RESTORE_HpAlloc HpAlloc = SAVE_HpAlloc;
-#else
-#define CALLER_SAVE_HpAlloc /* nothing */
-#define CALLER_RESTORE_HpAlloc /* nothing */
-#endif
-
#endif /* IN_STG_CODE */
-/* ----------------------------------------------------------------------------
- Handy bunches of saves/restores
- ------------------------------------------------------------------------ */
-
-#if IN_STG_CODE
-
-#define CALLER_SAVE_USER \
- CALLER_SAVE_R1 \
- CALLER_SAVE_R2 \
- CALLER_SAVE_R3 \
- CALLER_SAVE_R4 \
- CALLER_SAVE_R5 \
- CALLER_SAVE_R6 \
- CALLER_SAVE_R7 \
- CALLER_SAVE_R8 \
- CALLER_SAVE_R9 \
- CALLER_SAVE_R10 \
- CALLER_SAVE_F1 \
- CALLER_SAVE_F2 \
- CALLER_SAVE_F3 \
- CALLER_SAVE_F4 \
- CALLER_SAVE_D1 \
- CALLER_SAVE_D2 \
- CALLER_SAVE_L1
-
- /* Save Base last, since the others may
- be addressed relative to it */
-#define CALLER_SAVE_SYSTEM \
- CALLER_SAVE_Sp \
- CALLER_SAVE_SpLim \
- CALLER_SAVE_Hp \
- CALLER_SAVE_CurrentTSO \
- CALLER_SAVE_CurrentNursery \
- CALLER_SAVE_Base
-
-#define CALLER_RESTORE_USER \
- CALLER_RESTORE_R1 \
- CALLER_RESTORE_R2 \
- CALLER_RESTORE_R3 \
- CALLER_RESTORE_R4 \
- CALLER_RESTORE_R5 \
- CALLER_RESTORE_R6 \
- CALLER_RESTORE_R7 \
- CALLER_RESTORE_R8 \
- CALLER_RESTORE_R9 \
- CALLER_RESTORE_R10 \
- CALLER_RESTORE_F1 \
- CALLER_RESTORE_F2 \
- CALLER_RESTORE_F3 \
- CALLER_RESTORE_F4 \
- CALLER_RESTORE_D1 \
- CALLER_RESTORE_D2 \
- CALLER_RESTORE_L1
-
- /* Restore Base first, since the others may
- be addressed relative to it */
-#define CALLER_RESTORE_SYSTEM \
- CALLER_RESTORE_Base \
- CALLER_RESTORE_Sp \
- CALLER_RESTORE_SpLim \
- CALLER_RESTORE_Hp \
- CALLER_RESTORE_CurrentTSO \
- CALLER_RESTORE_CurrentNursery
-
-#else /* not IN_STG_CODE */
-
-#define CALLER_SAVE_USER /* nothing */
-#define CALLER_SAVE_SYSTEM /* nothing */
-#define CALLER_RESTORE_USER /* nothing */
-#define CALLER_RESTORE_SYSTEM /* nothing */
-
-#endif /* IN_STG_CODE */
-#define CALLER_SAVE_ALL \
- CALLER_SAVE_SYSTEM \
- CALLER_SAVE_USER
-
-#define CALLER_RESTORE_ALL \
- CALLER_RESTORE_SYSTEM \
- CALLER_RESTORE_USER
-
#endif /* REGS_H */