diff options
Diffstat (limited to 'rts')
86 files changed, 12173 insertions, 20 deletions
diff --git a/rts/.tsan-suppressions b/rts/.tsan-suppressions index 734faff5a6..dab647227e 100644 --- a/rts/.tsan-suppressions +++ b/rts/.tsan-suppressions @@ -1,5 +1,5 @@ # ThreadSanitizer suppressions. -# See Note [ThreadSanitizer] in includes/rts/TSANUtils.h. +# See Note [ThreadSanitizer] in rts/include/rts/TSANUtils.h. # This is a known race but is benign race:capability_is_busy diff --git a/rts/AdjustorAsm.S b/rts/AdjustorAsm.S index 2795b83b63..59ac2b49cb 100644 --- a/rts/AdjustorAsm.S +++ b/rts/AdjustorAsm.S @@ -1,4 +1,4 @@ -#include "../includes/ghcconfig.h" +#include "include/ghcconfig.h" /* ******************************** PowerPC ******************************** */ diff --git a/rts/Capability.h b/rts/Capability.h index 14ba9ef2d7..4a27e618fe 100644 --- a/rts/Capability.h +++ b/rts/Capability.h @@ -253,10 +253,10 @@ INLINE_HEADER void releaseCapability_ (Capability* cap STG_UNUSED, bool always_wakeup STG_UNUSED) {}; #endif -// declared in includes/rts/Threads.h: +// declared in rts/include/rts/Threads.h: // extern Capability MainCapability; -// declared in includes/rts/Threads.h: +// declared in rts/include/rts/Threads.h: // extern uint32_t n_capabilities; // extern uint32_t enabled_capabilities; diff --git a/rts/LdvProfile.c b/rts/LdvProfile.c index aef1703c4a..b56c4d07bb 100644 --- a/rts/LdvProfile.c +++ b/rts/LdvProfile.c @@ -70,7 +70,7 @@ processHeapClosureForDead( const StgClosure *c ) if (IS_FORWARDING_PTR(info)) { // The size of the evacuated closure is currently stored in // the LDV field. See SET_EVACUAEE_FOR_LDV() in - // includes/StgLdvProf.h. + // rts/include/StgLdvProf.h. return LDVW(c); } info = INFO_PTR_TO_STRUCT(info); diff --git a/rts/Printer.c b/rts/Printer.c index 90a40f2626..d2a5c67ca4 100644 --- a/rts/Printer.c +++ b/rts/Printer.c @@ -996,7 +996,7 @@ void printObj( StgClosure *obj ) Closure types NOTE: must be kept in sync with the closure types in - includes/rts/storage/ClosureTypes.h + rts/include/rts/storage/ClosureTypes.h -------------------------------------------------------------------------- */ const char *closure_type_names[] = { diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c index 68e412c9c7..964cb99e9f 100644 --- a/rts/RetainerProfile.c +++ b/rts/RetainerProfile.c @@ -49,7 +49,7 @@ information about the retainers is still applicable. /* Note: what to change in order to plug-in a new retainer profiling scheme? - (1) type retainer in ../includes/StgRetainerProf.h + (1) type retainer in include/StgRetainerProf.h (2) retainer function R(), i.e., getRetainerFrom() (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(), in RetainerSet.h, if needed. diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index 38e1b8071c..0632dfa6df 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -397,7 +397,7 @@ #endif /* Modules compiled with -ticky may mention ticky counters */ -/* This list should marry up with the one in $(TOP)/includes/stg/Ticky.h */ +/* This list should marry up with the one in $(TOP)/rts/include/stg/Ticky.h */ #define RTS_TICKY_SYMBOLS \ SymI_NeedsDataProto(ticky_entry_ctrs) \ SymI_NeedsDataProto(top_ct) \ diff --git a/rts/Schedule.c b/rts/Schedule.c index 4a4b2d494a..3306f00ff3 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -2621,7 +2621,7 @@ scheduleThreadOn(Capability *cap, StgWord cpu USED_IF_THREADS, StgTSO *tso) #endif } -// See includes/rts/Threads.h +// See rts/include/rts/Threads.h void scheduleWaitThread (StgTSO* tso, /*[out]*/HaskellObj* ret, Capability **pcap) { diff --git a/rts/Schedule.h b/rts/Schedule.h index 4c692842e7..5aaafd9d88 100644 --- a/rts/Schedule.h +++ b/rts/Schedule.h @@ -130,7 +130,7 @@ void resurrectThreads (StgTSO *); #if !IN_STG_CODE -/* END_TSO_QUEUE and friends now defined in includes/stg/MiscClosures.h */ +/* END_TSO_QUEUE and friends now defined in rts/include/stg/MiscClosures.h */ /* Add a thread to the end of the run queue. * NOTE: tso->link should be END_TSO_QUEUE before calling this macro. diff --git a/rts/StgCRun.c b/rts/StgCRun.c index b439b7e36a..25a931994d 100644 --- a/rts/StgCRun.c +++ b/rts/StgCRun.c @@ -880,7 +880,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) { those regs not used in Thumb mode. Hard to judge if this is needed, but certainly Haskell code is using them for placing GHC's virtual registers there. See - includes/stg/MachRegs.h Please note that Haskell code is + rts/include/stg/MachRegs.h Please note that Haskell code is compiled by GHC/LLVM into ARM code (not Thumb!), at least as of February 2012 */ : "%r4", "%r5", "%r6", "%r8", "%r9", "%r10", "%11", "%ip", "%lr" diff --git a/rts/StgPrimFloat.h b/rts/StgPrimFloat.h index 70ece8f074..4c77b3d995 100644 --- a/rts/StgPrimFloat.h +++ b/rts/StgPrimFloat.h @@ -16,6 +16,6 @@ void __decodeDouble_2Int (I_ *man_sign, W_ *man_high, W_ *man_low, I_ *exp, void __decodeFloat_Int (I_ *man, I_ *exp, StgFloat flt); // __{int,word}_encode{Float,Double} are public, declared in -// includes/rts/PrimFloat.h. +// rts/include/rts/PrimFloat.h. #include "EndPrivate.h" diff --git a/rts/include/.dir-locals.el b/rts/include/.dir-locals.el new file mode 100644 index 0000000000..9e13ffa6ba --- /dev/null +++ b/rts/include/.dir-locals.el @@ -0,0 +1,13 @@ +;;; Directory Local Variables +;;; See Info node `(emacs) Directory Variables' for more information. + +;; Default mode settings: no tabs, 80 column, UTF8 +((nil + (indent-tabs-mode . nil) + (fill-column . 80) + (buffer-file-coding-system . utf-8-unix)) + + ;; c-mode settings: 'Allman' BSD style, 4 space indents + (c-mode + (c-file-style . "BSD") + (c-basic-offset . 4))) diff --git a/rts/include/Cmm.h b/rts/include/Cmm.h new file mode 100644 index 0000000000..94951bc9f8 --- /dev/null +++ b/rts/include/Cmm.h @@ -0,0 +1,924 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The University of Glasgow 2004-2013 + * + * This file is included at the top of all .cmm source files (and + * *only* .cmm files). It defines a collection of useful macros for + * making .cmm code a bit less error-prone to write, and a bit easier + * on the eye for the reader. + * + * For the syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y. + * + * Accessing fields of structures defined in the RTS header files is + * done via automatically-generated macros in DerivedConstants.h. For + * example, where previously we used + * + * CurrentTSO->what_next = x + * + * in C-- we now use + * + * StgTSO_what_next(CurrentTSO) = x + * + * where the StgTSO_what_next() macro is automatically generated by + * utils/deriveConstants. If you need to access a field that doesn't + * already have a macro, edit that program (it's pretty self-explanatory). + * + * -------------------------------------------------------------------------- */ + +#pragma once + +/* + * In files that are included into both C and C-- (and perhaps + * Haskell) sources, we sometimes need to conditionally compile bits + * depending on the language. CMINUSMINUS==1 in .cmm sources: + */ +#define CMINUSMINUS 1 + +#include "ghcconfig.h" + +/* ----------------------------------------------------------------------------- + Types + + The following synonyms for C-- types are declared here: + + I8, I16, I32, I64 MachRep-style names for convenience + + W_ is shorthand for the word type (== StgWord) + F_ shorthand for float (F_ == StgFloat == C's float) + D_ shorthand for double (D_ == StgDouble == C's double) + + CInt has the same size as an int in C on this platform + CLong has the same size as a long in C on this platform + CBool has the same size as a bool in C on this platform + + --------------------------------------------------------------------------- */ + +#define I8 bits8 +#define I16 bits16 +#define I32 bits32 +#define I64 bits64 +#define P_ gcptr + +#if SIZEOF_VOID_P == 4 +#define W_ bits32 +/* Maybe it's better to include MachDeps.h */ +#define TAG_BITS 2 +#elif SIZEOF_VOID_P == 8 +#define W_ bits64 +/* Maybe it's better to include MachDeps.h */ +#define TAG_BITS 3 +#else +#error Unknown word size +#endif + +/* + * The RTS must sometimes UNTAG a pointer before dereferencing it. + * See the wiki page commentary/rts/haskell-execution/pointer-tagging + */ +#define TAG_MASK ((1 << TAG_BITS) - 1) +#define UNTAG(p) (p & ~TAG_MASK) +#define GETTAG(p) (p & TAG_MASK) + +#if SIZEOF_INT == 4 +#define CInt bits32 +#elif SIZEOF_INT == 8 +#define CInt bits64 +#else +#error Unknown int size +#endif + +#if SIZEOF_LONG == 4 +#define CLong bits32 +#elif SIZEOF_LONG == 8 +#define CLong bits64 +#else +#error Unknown long size +#endif + +#define CBool bits8 + +#define F_ float32 +#define D_ float64 +#define L_ bits64 +#define V16_ bits128 +#define V32_ bits256 +#define V64_ bits512 + +#define SIZEOF_StgDouble 8 +#define SIZEOF_StgWord64 8 + +/* ----------------------------------------------------------------------------- + Misc useful stuff + -------------------------------------------------------------------------- */ + +#define ccall foreign "C" + +#define NULL (0::W_) + +#define STRING(name,str) \ + section "rodata" { \ + name : bits8[] str; \ + } \ + +#if defined(TABLES_NEXT_TO_CODE) +#define RET_LBL(f) f##_info +#else +#define RET_LBL(f) f##_ret +#endif + +#if defined(TABLES_NEXT_TO_CODE) +#define ENTRY_LBL(f) f##_info +#else +#define ENTRY_LBL(f) f##_entry +#endif + +/* ----------------------------------------------------------------------------- + Byte/word macros + + Everything in C-- is in byte offsets (well, most things). We use + some macros to allow us to express offsets in words and to try to + avoid byte/word confusion. + -------------------------------------------------------------------------- */ + +#define SIZEOF_W SIZEOF_VOID_P +#define W_MASK (SIZEOF_W-1) + +#if SIZEOF_W == 4 +#define W_SHIFT 2 +#elif SIZEOF_W == 8 +#define W_SHIFT 3 +#endif + +/* Converting quantities of words to bytes */ +#define WDS(n) ((n)*SIZEOF_W) + +/* + * Converting quantities of bytes to words + * NB. these work on *unsigned* values only + */ +#define BYTES_TO_WDS(n) ((n) / SIZEOF_W) +#define ROUNDUP_BYTES_TO_WDS(n) (((n) + SIZEOF_W - 1) / SIZEOF_W) + +/* + * TO_W_(n) and TO_ZXW_(n) convert n to W_ type from a smaller type, + * with and without sign extension respectively + */ +#if SIZEOF_W == 4 +#define TO_I64(x) %sx64(x) +#define TO_W_(x) %sx32(x) +#define TO_ZXW_(x) %zx32(x) +#define HALF_W_(x) %lobits16(x) +#elif SIZEOF_W == 8 +#define TO_I64(x) (x) +#define TO_W_(x) %sx64(x) +#define TO_ZXW_(x) %zx64(x) +#define HALF_W_(x) %lobits32(x) +#endif + +#if SIZEOF_INT == 4 && SIZEOF_W == 8 +#define W_TO_INT(x) %lobits32(x) +#elif SIZEOF_INT == SIZEOF_W +#define W_TO_INT(x) (x) +#endif + +#if SIZEOF_LONG == 4 && SIZEOF_W == 8 +#define W_TO_LONG(x) %lobits32(x) +#elif SIZEOF_LONG == SIZEOF_W +#define W_TO_LONG(x) (x) +#endif + +/* ----------------------------------------------------------------------------- + Atomic memory operations. + -------------------------------------------------------------------------- */ + +#if SIZEOF_W == 4 +#define cmpxchgW cmpxchg32 +#elif SIZEOF_W == 8 +#define cmpxchgW cmpxchg64 +#endif + +/* ----------------------------------------------------------------------------- + Heap/stack access, and adjusting the heap/stack pointers. + -------------------------------------------------------------------------- */ + +#define Sp(n) W_[Sp + WDS(n)] +#define Hp(n) W_[Hp + WDS(n)] + +#define Sp_adj(n) Sp = Sp + WDS(n) /* pronounced "spadge" */ +#define Hp_adj(n) Hp = Hp + WDS(n) + +/* ----------------------------------------------------------------------------- + Assertions and Debuggery + -------------------------------------------------------------------------- */ + +#if defined(DEBUG) || defined(USE_ASSERTS_ALL_WAYS) +#define ASSERTS_ENABLED 1 +#else +#undef ASSERTS_ENABLED +#endif + +#if defined(ASSERTS_ENABLED) +#define ASSERT(predicate) \ + if (predicate) { \ + /*null*/; \ + } else { \ + foreign "C" _assertFail(__FILE__, __LINE__) never returns; \ + } +#else +#define ASSERT(p) /* nothing */ +#endif + +#if defined(DEBUG) +#define DEBUG_ONLY(s) s +#else +#define DEBUG_ONLY(s) /* nothing */ +#endif + +/* + * The IF_DEBUG macro is useful for debug messages that depend on one + * of the RTS debug options. For example: + * + * IF_DEBUG(RtsFlags_DebugFlags_apply, + * foreign "C" fprintf(stderr, stg_ap_0_ret_str)); + * + * Note the syntax is slightly different to the C version of this macro. + */ +#if defined(DEBUG) +#define IF_DEBUG(c,s) if (RtsFlags_DebugFlags_##c(RtsFlags) != 0::CBool) { s; } +#else +#define IF_DEBUG(c,s) /* nothing */ +#endif + +/* ----------------------------------------------------------------------------- + Entering + + It isn't safe to "enter" every closure. Functions in particular + have no entry code as such; their entry point contains the code to + apply the function. + + ToDo: range should end in N_CLOSURE_TYPES-1, not N_CLOSURE_TYPES, + but switch doesn't allow us to use exprs there yet. + + If R1 points to a tagged object it points either to + * A constructor. + * A function with arity <= TAG_MASK. + In both cases the right thing to do is to return. + Note: it is rather lucky that we can use the tag bits to do this + for both objects. Maybe it points to a brittle design? + + Indirections can contain tagged pointers, so their tag is checked. + -------------------------------------------------------------------------- */ + +#if defined(PROFILING) + +// When profiling, we cannot shortcut ENTER() by checking the tag, +// because LDV profiling relies on entering closures to mark them as +// "used". + +#define LOAD_INFO(ret,x) \ + info = %INFO_PTR(UNTAG(x)); + +#define UNTAG_IF_PROF(x) UNTAG(x) + +#else + +#define LOAD_INFO(ret,x) \ + if (GETTAG(x) != 0) { \ + ret(x); \ + } \ + info = %INFO_PTR(x); + +#define UNTAG_IF_PROF(x) (x) /* already untagged */ + +#endif + +// We need two versions of ENTER(): +// - ENTER(x) takes the closure as an argument and uses return(), +// for use in civilized code where the stack is handled by GHC +// +// - ENTER_NOSTACK() where the closure is in R1, and returns are +// explicit jumps, for use when we are doing the stack management +// ourselves. + +#if defined(PROFILING) +// See Note [Evaluating functions with profiling] in rts/Apply.cmm +#define ENTER(x) jump stg_ap_0_fast(x); +#else +#define ENTER(x) ENTER_(return,x) +#endif + +#define ENTER_R1() ENTER_(RET_R1,R1) + +#define RET_R1(x) jump %ENTRY_CODE(Sp(0)) [R1] + +#define ENTER_(ret,x) \ + again: \ + W_ info; \ + LOAD_INFO(ret,x) \ + /* See Note [Heap memory barriers] in SMP.h */ \ + prim_read_barrier; \ + switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \ + (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \ + case \ + IND, \ + IND_STATIC: \ + { \ + x = StgInd_indirectee(x); \ + goto again; \ + } \ + case \ + FUN, \ + FUN_1_0, \ + FUN_0_1, \ + FUN_2_0, \ + FUN_1_1, \ + FUN_0_2, \ + FUN_STATIC, \ + BCO, \ + PAP: \ + { \ + ret(x); \ + } \ + default: \ + { \ + x = UNTAG_IF_PROF(x); \ + jump %ENTRY_CODE(info) (x); \ + } \ + } + +// The FUN cases almost never happen: a pointer to a non-static FUN +// should always be tagged. This unfortunately isn't true for the +// interpreter right now, which leaves untagged FUNs on the stack. + +/* ----------------------------------------------------------------------------- + Constants. + -------------------------------------------------------------------------- */ + +#include "rts/Constants.h" +#include "DerivedConstants.h" +#include "rts/storage/ClosureTypes.h" +#include "rts/storage/FunTypes.h" +#include "rts/OSThreads.h" + +/* + * Need MachRegs, because some of the RTS code is conditionally + * compiled based on REG_R1, REG_R2, etc. + */ +#include "stg/MachRegsForHost.h" + +#include "rts/prof/LDV.h" + +#undef BLOCK_SIZE +#undef MBLOCK_SIZE +#include "rts/storage/Block.h" /* For Bdescr() */ + + +#define MyCapability() (BaseReg - OFFSET_Capability_r) + +/* ------------------------------------------------------------------------- + Info tables + ------------------------------------------------------------------------- */ + +#if defined(PROFILING) +#define PROF_HDR_FIELDS(w_,hdr1,hdr2) \ + w_ hdr1, \ + w_ hdr2, +#else +#define PROF_HDR_FIELDS(w_,hdr1,hdr2) /* nothing */ +#endif + +/* ------------------------------------------------------------------------- + Allocation and garbage collection + ------------------------------------------------------------------------- */ + +/* + * ALLOC_PRIM is for allocating memory on the heap for a primitive + * object. It is used all over PrimOps.cmm. + * + * We make the simplifying assumption that the "admin" part of a + * primitive closure is just the header when calculating sizes for + * ticky-ticky. It's not clear whether eg. the size field of an array + * should be counted as "admin", or the various fields of a BCO. + */ +#define ALLOC_PRIM(bytes) \ + HP_CHK_GEN_TICKY(bytes); \ + TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \ + CCCS_ALLOC(bytes); + +#define HEAP_CHECK(bytes,failure) \ + TICK_BUMP(HEAP_CHK_ctr); \ + Hp = Hp + (bytes); \ + if (Hp > HpLim) { HpAlloc = (bytes); failure; } \ + TICK_ALLOC_HEAP_NOCTR(bytes); + +#define ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,failure) \ + HEAP_CHECK(bytes,failure) \ + TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \ + CCCS_ALLOC(bytes); + +#define ALLOC_PRIM_(bytes,fun) \ + ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM(fun)); + +#define ALLOC_PRIM_P(bytes,fun,arg) \ + ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_P(fun,arg)); + +#define ALLOC_PRIM_N(bytes,fun,arg) \ + ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_N(fun,arg)); + +/* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */ +#define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS) + +#define HP_CHK_GEN_TICKY(bytes) \ + HP_CHK_GEN(bytes); \ + TICK_ALLOC_HEAP_NOCTR(bytes); + +#define HP_CHK_P(bytes, fun, arg) \ + HEAP_CHECK(bytes, GC_PRIM_P(fun,arg)) + +// TODO I'm not seeing where ALLOC_P_TICKY is used; can it be removed? +// -NSF March 2013 +#define ALLOC_P_TICKY(bytes, fun, arg) \ + HP_CHK_P(bytes); \ + TICK_ALLOC_HEAP_NOCTR(bytes); + +#define CHECK_GC() \ + (bdescr_link(CurrentNursery) == NULL || \ + generation_n_new_large_words(W_[g0]) >= TO_W_(CLong[large_alloc_lim])) + +// allocate() allocates from the nursery, so we check to see +// whether the nursery is nearly empty in any function that uses +// allocate() - this includes many of the primops. +// +// HACK alert: the __L__ stuff is here to coax the common-block +// eliminator into commoning up the call stg_gc_noregs() with the same +// code that gets generated by a STK_CHK_GEN() in the same proc. We +// also need an if (0) { goto __L__; } so that the __L__ label isn't +// optimised away by the control-flow optimiser prior to common-block +// elimination (it will be optimised away later). +// +// This saves some code in gmp-wrappers.cmm where we have lots of +// MAYBE_GC() in the same proc as STK_CHK_GEN(). +// +#define MAYBE_GC(retry) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + goto __L__; \ + __L__: \ + call stg_gc_noregs(); \ + goto retry; \ + } \ + if (0) { goto __L__; } + +#define GC_PRIM(fun) \ + jump stg_gc_prim(fun); + +// Version of GC_PRIM for use in low-level Cmm. We can call +// stg_gc_prim, because it takes one argument and therefore has a +// platform-independent calling convention (Note [Syntax of .cmm +// files] in GHC.Cmm.Parser). +#define GC_PRIM_LL(fun) \ + R1 = fun; \ + jump stg_gc_prim [R1]; + +// We pass the fun as the second argument, because the arg is +// usually already in the first argument position (R1), so this +// avoids moving it to a different register / stack slot. +#define GC_PRIM_N(fun,arg) \ + jump stg_gc_prim_n(arg,fun); + +#define GC_PRIM_P(fun,arg) \ + jump stg_gc_prim_p(arg,fun); + +#define GC_PRIM_P_LL(fun,arg) \ + R1 = arg; \ + R2 = fun; \ + jump stg_gc_prim_p_ll [R1,R2]; + +#define GC_PRIM_PP(fun,arg1,arg2) \ + jump stg_gc_prim_pp(arg1,arg2,fun); + +#define MAYBE_GC_(fun) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + GC_PRIM(fun) \ + } + +#define MAYBE_GC_N(fun,arg) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + GC_PRIM_N(fun,arg) \ + } + +#define MAYBE_GC_P(fun,arg) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + GC_PRIM_P(fun,arg) \ + } + +#define MAYBE_GC_PP(fun,arg1,arg2) \ + if (CHECK_GC()) { \ + HpAlloc = 0; \ + GC_PRIM_PP(fun,arg1,arg2) \ + } + +#define STK_CHK_LL(n, fun) \ + TICK_BUMP(STK_CHK_ctr); \ + if (Sp - (n) < SpLim) { \ + GC_PRIM_LL(fun) \ + } + +#define STK_CHK_P_LL(n, fun, arg) \ + TICK_BUMP(STK_CHK_ctr); \ + if (Sp - (n) < SpLim) { \ + GC_PRIM_P_LL(fun,arg) \ + } + +#define STK_CHK_PP(n, fun, arg1, arg2) \ + TICK_BUMP(STK_CHK_ctr); \ + if (Sp - (n) < SpLim) { \ + GC_PRIM_PP(fun,arg1,arg2) \ + } + +#define STK_CHK_ENTER(n, closure) \ + TICK_BUMP(STK_CHK_ctr); \ + if (Sp - (n) < SpLim) { \ + jump __stg_gc_enter_1(closure); \ + } + +// A funky heap check used by AutoApply.cmm + +#define HP_CHK_NP_ASSIGN_SP0(size,f) \ + HEAP_CHECK(size, Sp(0) = f; jump __stg_gc_enter_1 [R1];) + +/* ----------------------------------------------------------------------------- + Closure headers + -------------------------------------------------------------------------- */ + +/* + * This is really ugly, since we don't do the rest of StgHeader this + * way. The problem is that values from DerivedConstants.h cannot be + * dependent on the way (SMP, PROF etc.). For SIZEOF_StgHeader we get + * the value from GHC, but it seems like too much trouble to do that + * for StgThunkHeader. + */ +#define SIZEOF_StgThunkHeader SIZEOF_StgHeader+SIZEOF_StgSMPThunkHeader + +#define StgThunk_payload(__ptr__,__ix__) \ + W_[__ptr__+SIZEOF_StgThunkHeader+ WDS(__ix__)] + +/* ----------------------------------------------------------------------------- + Closures + -------------------------------------------------------------------------- */ + +/* The offset of the payload of an array */ +#define BYTE_ARR_CTS(arr) ((arr) + SIZEOF_StgArrBytes) + +/* The number of words allocated in an array payload */ +#define BYTE_ARR_WDS(arr) ROUNDUP_BYTES_TO_WDS(StgArrBytes_bytes(arr)) + +/* Getting/setting the info pointer of a closure */ +#define SET_INFO(p,info) StgHeader_info(p) = info +#define GET_INFO(p) StgHeader_info(p) + +/* Determine the size of an ordinary closure from its info table */ +#define sizeW_fromITBL(itbl) \ + SIZEOF_StgHeader + WDS(%INFO_PTRS(itbl)) + WDS(%INFO_NPTRS(itbl)) + +/* NB. duplicated from InfoTables.h! */ +#define BITMAP_SIZE(bitmap) ((bitmap) & BITMAP_SIZE_MASK) +#define BITMAP_BITS(bitmap) ((bitmap) >> BITMAP_BITS_SHIFT) + +/* Debugging macros */ +#define LOOKS_LIKE_INFO_PTR(p) \ + ((p) != NULL && \ + LOOKS_LIKE_INFO_PTR_NOT_NULL(p)) + +#define LOOKS_LIKE_INFO_PTR_NOT_NULL(p) \ + ( (TO_W_(%INFO_TYPE(%STD_INFO(p))) != INVALID_OBJECT) && \ + (TO_W_(%INFO_TYPE(%STD_INFO(p))) < N_CLOSURE_TYPES)) + +#define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p)))) + +/* + * The layout of the StgFunInfoExtra part of an info table changes + * depending on TABLES_NEXT_TO_CODE. So we define field access + * macros which use the appropriate version here: + */ +#if defined(TABLES_NEXT_TO_CODE) +/* + * when TABLES_NEXT_TO_CODE, slow_apply is stored as an offset + * instead of the normal pointer. + */ + +#define StgFunInfoExtra_slow_apply(fun_info) \ + (TO_W_(StgFunInfoExtraRev_slow_apply_offset(fun_info)) \ + + (fun_info) + SIZEOF_StgFunInfoExtraRev + SIZEOF_StgInfoTable) + +#define StgFunInfoExtra_fun_type(i) StgFunInfoExtraRev_fun_type(i) +#define StgFunInfoExtra_arity(i) StgFunInfoExtraRev_arity(i) +#define StgFunInfoExtra_bitmap(i) StgFunInfoExtraRev_bitmap(i) +#else +#define StgFunInfoExtra_slow_apply(i) StgFunInfoExtraFwd_slow_apply(i) +#define StgFunInfoExtra_fun_type(i) StgFunInfoExtraFwd_fun_type(i) +#define StgFunInfoExtra_arity(i) StgFunInfoExtraFwd_arity(i) +#define StgFunInfoExtra_bitmap(i) StgFunInfoExtraFwd_bitmap(i) +#endif + +#define mutArrCardMask ((1 << MUT_ARR_PTRS_CARD_BITS) - 1) +#define mutArrPtrCardDown(i) ((i) >> MUT_ARR_PTRS_CARD_BITS) +#define mutArrPtrCardUp(i) (((i) + mutArrCardMask) >> MUT_ARR_PTRS_CARD_BITS) +#define mutArrPtrsCardWords(n) ROUNDUP_BYTES_TO_WDS(mutArrPtrCardUp(n)) + +#if defined(PROFILING) || defined(DEBUG) +#define OVERWRITING_CLOSURE_SIZE(c, size) foreign "C" overwritingClosureSize(c "ptr", size) +#define OVERWRITING_CLOSURE(c) foreign "C" overwritingClosure(c "ptr") +#define OVERWRITING_CLOSURE_MUTABLE(c, off) foreign "C" overwritingMutableClosureOfs(c "ptr", off) +#else +#define OVERWRITING_CLOSURE_SIZE(c, size) /* nothing */ +#define OVERWRITING_CLOSURE(c) /* nothing */ +/* This is used to zero slop after shrunk arrays. It is important that we do + * this whenever profiling is enabled as described in Note [slop on the heap] + * in Storage.c. */ +#define OVERWRITING_CLOSURE_MUTABLE(c, off) \ + if (TO_W_(RtsFlags_ProfFlags_doHeapProfile(RtsFlags)) != 0) { foreign "C" overwritingMutableClosureOfs(c "ptr", off); } +#endif + +// Memory barriers. +// For discussion of how these are used to fence heap object +// accesses see Note [Heap memory barriers] in SMP.h. +#if defined(THREADED_RTS) +#define prim_read_barrier prim %read_barrier() +#else +#define prim_read_barrier /* nothing */ +#endif +#if defined(THREADED_RTS) +#define prim_write_barrier prim %write_barrier() +#else +#define prim_write_barrier /* nothing */ +#endif + +/* ----------------------------------------------------------------------------- + Ticky macros + -------------------------------------------------------------------------- */ + +#if defined(TICKY_TICKY) +#define TICK_BUMP_BY(ctr,n) W_[ctr] = W_[ctr] + n +#else +#define TICK_BUMP_BY(ctr,n) /* nothing */ +#endif + +#define TICK_BUMP(ctr) TICK_BUMP_BY(ctr,1) + +#define TICK_ENT_DYN_IND() TICK_BUMP(ENT_DYN_IND_ctr) +// ENT_DYN_THK_ctr doesn't exist anymore. Could be ENT_DYN_THK_SINGLE_ctr or +// ENT_DYN_THK_MANY_ctr +// #define TICK_ENT_DYN_THK() TICK_BUMP(ENT_DYN_THK_ctr) +#define TICK_ENT_DYN_THK() + +#define TICK_ENT_VIA_NODE() TICK_BUMP(ENT_VIA_NODE_ctr) +#define TICK_ENT_STATIC_IND() TICK_BUMP(ENT_STATIC_IND_ctr) +#define TICK_ENT_PERM_IND() TICK_BUMP(ENT_PERM_IND_ctr) +#define TICK_ENT_PAP() TICK_BUMP(ENT_PAP_ctr) +#define TICK_ENT_AP() TICK_BUMP(ENT_AP_ctr) +#define TICK_ENT_AP_STACK() TICK_BUMP(ENT_AP_STACK_ctr) +#define TICK_ENT_BH() TICK_BUMP(ENT_BH_ctr) +#define TICK_ENT_LNE() TICK_BUMP(ENT_LNE_ctr) +#define TICK_UNKNOWN_CALL() TICK_BUMP(UNKNOWN_CALL_ctr) +#define TICK_UPDF_PUSHED() TICK_BUMP(UPDF_PUSHED_ctr) +#define TICK_CATCHF_PUSHED() TICK_BUMP(CATCHF_PUSHED_ctr) +#define TICK_UPDF_OMITTED() TICK_BUMP(UPDF_OMITTED_ctr) +#define TICK_UPD_NEW_IND() TICK_BUMP(UPD_NEW_IND_ctr) +#define TICK_UPD_NEW_PERM_IND() TICK_BUMP(UPD_NEW_PERM_IND_ctr) +#define TICK_UPD_OLD_IND() TICK_BUMP(UPD_OLD_IND_ctr) +#define TICK_UPD_OLD_PERM_IND() TICK_BUMP(UPD_OLD_PERM_IND_ctr) + +#define TICK_SLOW_CALL_FUN_TOO_FEW() TICK_BUMP(SLOW_CALL_FUN_TOO_FEW_ctr) +#define TICK_SLOW_CALL_FUN_CORRECT() TICK_BUMP(SLOW_CALL_FUN_CORRECT_ctr) +#define TICK_SLOW_CALL_FUN_TOO_MANY() TICK_BUMP(SLOW_CALL_FUN_TOO_MANY_ctr) +#define TICK_SLOW_CALL_PAP_TOO_FEW() TICK_BUMP(SLOW_CALL_PAP_TOO_FEW_ctr) +#define TICK_SLOW_CALL_PAP_CORRECT() TICK_BUMP(SLOW_CALL_PAP_CORRECT_ctr) +#define TICK_SLOW_CALL_PAP_TOO_MANY() TICK_BUMP(SLOW_CALL_PAP_TOO_MANY_ctr) + +#define TICK_SLOW_CALL_fast_v16() TICK_BUMP(SLOW_CALL_fast_v16_ctr) +#define TICK_SLOW_CALL_fast_v() TICK_BUMP(SLOW_CALL_fast_v_ctr) +#define TICK_SLOW_CALL_fast_p() TICK_BUMP(SLOW_CALL_fast_p_ctr) +#define TICK_SLOW_CALL_fast_pv() TICK_BUMP(SLOW_CALL_fast_pv_ctr) +#define TICK_SLOW_CALL_fast_pp() TICK_BUMP(SLOW_CALL_fast_pp_ctr) +#define TICK_SLOW_CALL_fast_ppv() TICK_BUMP(SLOW_CALL_fast_ppv_ctr) +#define TICK_SLOW_CALL_fast_ppp() TICK_BUMP(SLOW_CALL_fast_ppp_ctr) +#define TICK_SLOW_CALL_fast_pppv() TICK_BUMP(SLOW_CALL_fast_pppv_ctr) +#define TICK_SLOW_CALL_fast_pppp() TICK_BUMP(SLOW_CALL_fast_pppp_ctr) +#define TICK_SLOW_CALL_fast_ppppp() TICK_BUMP(SLOW_CALL_fast_ppppp_ctr) +#define TICK_SLOW_CALL_fast_pppppp() TICK_BUMP(SLOW_CALL_fast_pppppp_ctr) +#define TICK_VERY_SLOW_CALL() TICK_BUMP(VERY_SLOW_CALL_ctr) + +/* NOTE: TICK_HISTO_BY and TICK_HISTO + currently have no effect. + The old code for it didn't typecheck and I + just commented it out to get ticky to work. + - krc 1/2007 */ + +#define TICK_HISTO_BY(histo,n,i) /* nothing */ + +#define TICK_HISTO(histo,n) TICK_HISTO_BY(histo,n,1) + +/* An unboxed tuple with n components. */ +#define TICK_RET_UNBOXED_TUP(n) \ + TICK_BUMP(RET_UNBOXED_TUP_ctr++); \ + TICK_HISTO(RET_UNBOXED_TUP,n) + +/* + * A slow call with n arguments. In the unevald case, this call has + * already been counted once, so don't count it again. + */ +#define TICK_SLOW_CALL(n) \ + TICK_BUMP(SLOW_CALL_ctr); \ + TICK_HISTO(SLOW_CALL,n) + +/* + * This slow call was found to be to an unevaluated function; undo the + * ticks we did in TICK_SLOW_CALL. + */ +#define TICK_SLOW_CALL_UNEVALD(n) \ + TICK_BUMP(SLOW_CALL_UNEVALD_ctr); \ + TICK_BUMP_BY(SLOW_CALL_ctr,-1); \ + TICK_HISTO_BY(SLOW_CALL,n,-1); + +/* Updating a closure with a new CON */ +#define TICK_UPD_CON_IN_NEW(n) \ + TICK_BUMP(UPD_CON_IN_NEW_ctr); \ + TICK_HISTO(UPD_CON_IN_NEW,n) + +#define TICK_ALLOC_HEAP_NOCTR(bytes) \ + TICK_BUMP(ALLOC_RTS_ctr); \ + TICK_BUMP_BY(ALLOC_RTS_tot,bytes) + +/* ----------------------------------------------------------------------------- + Misc junk + -------------------------------------------------------------------------- */ + +#define NO_TREC stg_NO_TREC_closure +#define END_TSO_QUEUE stg_END_TSO_QUEUE_closure +#define STM_AWOKEN stg_STM_AWOKEN_closure + +#define recordMutableCap(p, gen) \ + W_ __bd; \ + W_ mut_list; \ + mut_list = Capability_mut_lists(MyCapability()) + WDS(gen); \ + __bd = W_[mut_list]; \ + if (bdescr_free(__bd) >= bdescr_start(__bd) + BLOCK_SIZE) { \ + W_ __new_bd; \ + ("ptr" __new_bd) = foreign "C" allocBlock_lock(); \ + bdescr_link(__new_bd) = __bd; \ + __bd = __new_bd; \ + W_[mut_list] = __bd; \ + } \ + W_ free; \ + free = bdescr_free(__bd); \ + W_[free] = p; \ + bdescr_free(__bd) = free + WDS(1); + +#define recordMutable(p) \ + P_ __p; \ + W_ __bd; \ + W_ __gen; \ + __p = p; \ + __bd = Bdescr(__p); \ + __gen = TO_W_(bdescr_gen_no(__bd)); \ + if (__gen > 0) { recordMutableCap(__p, __gen); } + +/* ----------------------------------------------------------------------------- + Update remembered set write barrier + -------------------------------------------------------------------------- */ + +/* ----------------------------------------------------------------------------- + Arrays + -------------------------------------------------------------------------- */ + +/* Complete function body for the clone family of (mutable) array ops. + Defined as a macro to avoid function call overhead or code + duplication. */ +#define cloneArray(info, src, offset, n) \ + W_ words, size; \ + gcptr dst, dst_p, src_p; \ + \ + again: MAYBE_GC(again); \ + \ + size = n + mutArrPtrsCardWords(n); \ + words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; \ + ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \ + TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0); \ + \ + SET_HDR(dst, info, CCCS); \ + StgMutArrPtrs_ptrs(dst) = n; \ + StgMutArrPtrs_size(dst) = size; \ + \ + dst_p = dst + SIZEOF_StgMutArrPtrs; \ + src_p = src + SIZEOF_StgMutArrPtrs + WDS(offset); \ + prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \ + \ + return (dst); + +#define copyArray(src, src_off, dst, dst_off, n) \ + W_ dst_elems_p, dst_p, src_p, bytes; \ + \ + if ((n) != 0) { \ + SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \ + \ + dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \ + dst_p = dst_elems_p + WDS(dst_off); \ + src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \ + bytes = WDS(n); \ + \ + prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \ + \ + setCards(dst, dst_off, n); \ + } \ + \ + return (); + +#define copyMutableArray(src, src_off, dst, dst_off, n) \ + W_ dst_elems_p, dst_p, src_p, bytes; \ + \ + if ((n) != 0) { \ + SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \ + \ + dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \ + dst_p = dst_elems_p + WDS(dst_off); \ + src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \ + bytes = WDS(n); \ + \ + if ((src) == (dst)) { \ + prim %memmove(dst_p, src_p, bytes, SIZEOF_W); \ + } else { \ + prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \ + } \ + \ + setCards(dst, dst_off, n); \ + } \ + \ + return (); + +/* + * Set the cards in the array pointed to by arr for an + * update to n elements, starting at element dst_off. + */ +#define setCards(arr, dst_off, n) \ + setCardsValue(arr, dst_off, n, 1) + +/* + * Set the cards in the array pointed to by arr for an + * update to n elements, starting at element dst_off to value (0 to indicate + * clean, 1 to indicate dirty). + */ +#define setCardsValue(arr, dst_off, n, value) \ + W_ __start_card, __end_card, __cards, __dst_cards_p; \ + __dst_cards_p = (arr) + SIZEOF_StgMutArrPtrs + WDS(StgMutArrPtrs_ptrs(arr)); \ + __start_card = mutArrPtrCardDown(dst_off); \ + __end_card = mutArrPtrCardDown((dst_off) + (n) - 1); \ + __cards = __end_card - __start_card + 1; \ + prim %memset(__dst_cards_p + __start_card, (value), __cards, 1) + +/* Complete function body for the clone family of small (mutable) + array ops. Defined as a macro to avoid function call overhead or + code duplication. */ +#define cloneSmallArray(info, src, offset, n) \ + W_ words, size; \ + gcptr dst, dst_p, src_p; \ + \ + again: MAYBE_GC(again); \ + \ + words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n; \ + ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \ + TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0); \ + \ + SET_HDR(dst, info, CCCS); \ + StgSmallMutArrPtrs_ptrs(dst) = n; \ + \ + dst_p = dst + SIZEOF_StgSmallMutArrPtrs; \ + src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(offset); \ + prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \ + \ + return (dst); + + +// +// Nonmoving write barrier helpers +// +// See Note [Update remembered set] in NonMovingMark.c. + +#if defined(THREADED_RTS) +#define IF_NONMOVING_WRITE_BARRIER_ENABLED \ + if (W_[nonmoving_write_barrier_enabled] != 0) (likely: False) +#else +// A similar measure is also taken in rts/NonMoving.h, but that isn't visible from C-- +#define IF_NONMOVING_WRITE_BARRIER_ENABLED \ + if (0) +#define nonmoving_write_barrier_enabled 0 +#endif + +// A useful helper for pushing a pointer to the update remembered set. +#define updateRemembSetPushPtr(p) \ + IF_NONMOVING_WRITE_BARRIER_ENABLED { \ + ccall updateRemembSetPushClosure_(BaseReg "ptr", p "ptr"); \ + } diff --git a/rts/include/HsFFI.h b/rts/include/HsFFI.h new file mode 100644 index 0000000000..7c25599f83 --- /dev/null +++ b/rts/include/HsFFI.h @@ -0,0 +1,148 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2000 + * + * A mapping for Haskell types to C types, including the corresponding bounds. + * Intended to be used in conjunction with the FFI. + * + * WARNING: Keep this file and StgTypes.h in synch! + * + * ---------------------------------------------------------------------------*/ + +/* N.B. Only use C-style multi-line comments in this file to avoid upsetting + * dtrace on SmartOS, which doesn't support C++-style single-line comments. + */ + +#pragma once + +#if defined(__cplusplus) +extern "C" { +#endif + +/* get types from GHC's runtime system */ +#include "ghcconfig.h" +#include "stg/Types.h" + +/* get limits for floating point types */ +#include <float.h> + +typedef StgChar HsChar; +typedef StgInt HsInt; +typedef StgInt8 HsInt8; +typedef StgInt16 HsInt16; +typedef StgInt32 HsInt32; +typedef StgInt64 HsInt64; +typedef StgWord HsWord; +typedef StgWord8 HsWord8; +typedef StgWord16 HsWord16; +typedef StgWord32 HsWord32; +typedef StgWord64 HsWord64; +typedef StgFloat HsFloat; +typedef StgDouble HsDouble; +typedef StgInt HsBool; +typedef void* HsPtr; /* this should better match StgAddr */ +typedef void (*HsFunPtr)(void); /* this should better match StgAddr */ +typedef void* HsStablePtr; + +/* this should correspond to the type of StgChar in StgTypes.h */ +#define HS_CHAR_MIN 0 +#define HS_CHAR_MAX 0x10FFFF + +/* is it true or not? */ +#define HS_BOOL_FALSE 0 +#define HS_BOOL_TRUE 1 + +#define HS_BOOL_MIN HS_BOOL_FALSE +#define HS_BOOL_MAX HS_BOOL_TRUE + + +#define HS_INT_MIN STG_INT_MIN +#define HS_INT_MAX STG_INT_MAX +#define HS_WORD_MAX STG_WORD_MAX + +#define HS_INT8_MIN STG_INT8_MIN +#define HS_INT8_MAX STG_INT8_MAX +#define HS_INT16_MIN STG_INT16_MIN +#define HS_INT16_MAX STG_INT16_MAX +#define HS_INT32_MIN STG_INT32_MIN +#define HS_INT32_MAX STG_INT32_MAX +#define HS_INT64_MIN STG_INT64_MIN +#define HS_INT64_MAX STG_INT64_MAX +#define HS_WORD8_MAX STG_WORD8_MAX +#define HS_WORD16_MAX STG_WORD16_MAX +#define HS_WORD32_MAX STG_WORD32_MAX +#define HS_WORD64_MAX STG_WORD64_MAX + +#define HS_FLOAT_RADIX FLT_RADIX +#define HS_FLOAT_ROUNDS FLT_ROUNDS +#define HS_FLOAT_EPSILON FLT_EPSILON +#define HS_FLOAT_DIG FLT_DIG +#define HS_FLOAT_MANT_DIG FLT_MANT_DIG +#define HS_FLOAT_MIN FLT_MIN +#define HS_FLOAT_MIN_EXP FLT_MIN_EXP +#define HS_FLOAT_MIN_10_EXP FLT_MIN_10_EXP +#define HS_FLOAT_MAX FLT_MAX +#define HS_FLOAT_MAX_EXP FLT_MAX_EXP +#define HS_FLOAT_MAX_10_EXP FLT_MAX_10_EXP + +#define HS_DOUBLE_RADIX DBL_RADIX +#define HS_DOUBLE_ROUNDS DBL_ROUNDS +#define HS_DOUBLE_EPSILON DBL_EPSILON +#define HS_DOUBLE_DIG DBL_DIG +#define HS_DOUBLE_MANT_DIG DBL_MANT_DIG +#define HS_DOUBLE_MIN DBL_MIN +#define HS_DOUBLE_MIN_EXP DBL_MIN_EXP +#define HS_DOUBLE_MIN_10_EXP DBL_MIN_10_EXP +#define HS_DOUBLE_MAX DBL_MAX +#define HS_DOUBLE_MAX_EXP DBL_MAX_EXP +#define HS_DOUBLE_MAX_10_EXP DBL_MAX_10_EXP + +extern void hs_init (int *argc, char **argv[]); +extern void hs_exit (void); +extern void hs_exit_nowait(void); +extern void hs_set_argv (int argc, char *argv[]); +extern void hs_thread_done (void); +extern void hs_restoreConsoleCP (void); + +extern void hs_perform_gc (void); + +/* Lock the stable pointer table. The table must be unlocked + * again before calling any Haskell functions, even if those + * functions do not manipulate stable pointers. The Haskell + * garbage collector will not be able to run until this lock + * is released! It is also forbidden to call hs_free_fun_ptr + * or any stable pointer-related FFI functions other than + * hs_free_stable_ptr_unsafe while the table is locked. + */ +extern void hs_lock_stable_ptr_table (void); + +/* A deprecated synonym. */ +extern void hs_lock_stable_tables (void); + +/* Unlock the stable pointer table. */ +extern void hs_unlock_stable_ptr_table (void); + +/* A deprecated synonym. */ +extern void hs_unlock_stable_tables (void); + +/* Free a stable pointer assuming that the stable pointer + * table is already locked. + */ +extern void hs_free_stable_ptr_unsafe (HsStablePtr sp); + +extern void hs_free_stable_ptr (HsStablePtr sp); +extern void hs_free_fun_ptr (HsFunPtr fp); + +extern StgPtr hs_spt_lookup(StgWord64 key[2]); +extern int hs_spt_keys(StgPtr keys[], int szKeys); +extern int hs_spt_key_count (void); + +extern void hs_try_putmvar (int capability, HsStablePtr sp); + +/* -------------------------------------------------------------------------- */ + + + +#if defined(__cplusplus) +} +#endif diff --git a/rts/include/MachDeps.h b/rts/include/MachDeps.h new file mode 100644 index 0000000000..98a90814d9 --- /dev/null +++ b/rts/include/MachDeps.h @@ -0,0 +1,119 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The University of Glasgow 2002 + * + * Definitions that characterise machine specific properties of basic + * types (C & Haskell) of a target platform. + * + * NB: Keep in sync with HsFFI.h and StgTypes.h. + * NB: THIS FILE IS INCLUDED IN HASKELL SOURCE! + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +/* Don't allow stage1 (cross-)compiler embed assumptions about target + * platform. When ghc-stage1 is being built by ghc-stage0 is should not + * refer to target defines. A few past examples: + * - https://gitlab.haskell.org/ghc/ghc/issues/13491 + * - https://phabricator.haskell.org/D3122 + * - https://phabricator.haskell.org/D3405 + * + * In those cases code change assumed target defines like SIZEOF_HSINT + * are applied to host platform, not target platform. + * + * So what should be used instead in GHC_STAGE=1? + * + * To get host's equivalent of SIZEOF_HSINT you can use Bits instances: + * Data.Bits.finiteBitSize (0 :: Int) + * + * To get target's values it is preferred to use runtime target + * configuration from 'targetPlatform :: DynFlags -> Platform' + * record. + * + * Hence we hide these macros from GHC_STAGE=1 + */ + +/* Sizes of C types come from here... */ +#include "ghcautoconf.h" + +/* Sizes of Haskell types follow. These sizes correspond to: + * - the number of bytes in the primitive type (eg. Int#) + * - the number of bytes in the external representation (eg. HsInt) + * - the scale offset used by writeFooOffAddr# + * + * In the heap, the type may take up more space: eg. SIZEOF_INT8 == 1, + * but it takes up SIZEOF_HSWORD (4 or 8) bytes in the heap. + */ + +#define SIZEOF_HSCHAR SIZEOF_WORD32 +#define ALIGNMENT_HSCHAR ALIGNMENT_WORD32 + +#define SIZEOF_HSINT SIZEOF_VOID_P +#define ALIGNMENT_HSINT ALIGNMENT_VOID_P + +#define SIZEOF_HSWORD SIZEOF_VOID_P +#define ALIGNMENT_HSWORD ALIGNMENT_VOID_P + +#define SIZEOF_HSDOUBLE SIZEOF_DOUBLE +#define ALIGNMENT_HSDOUBLE ALIGNMENT_DOUBLE + +#define SIZEOF_HSFLOAT SIZEOF_FLOAT +#define ALIGNMENT_HSFLOAT ALIGNMENT_FLOAT + +#define SIZEOF_HSPTR SIZEOF_VOID_P +#define ALIGNMENT_HSPTR ALIGNMENT_VOID_P + +#define SIZEOF_HSFUNPTR SIZEOF_VOID_P +#define ALIGNMENT_HSFUNPTR ALIGNMENT_VOID_P + +#define SIZEOF_HSSTABLEPTR SIZEOF_VOID_P +#define ALIGNMENT_HSSTABLEPTR ALIGNMENT_VOID_P + +#define SIZEOF_INT8 SIZEOF_INT8_T +#define ALIGNMENT_INT8 ALIGNMENT_INT8_T + +#define SIZEOF_WORD8 SIZEOF_UINT8_T +#define ALIGNMENT_WORD8 ALIGNMENT_UINT8_T + +#define SIZEOF_INT16 SIZEOF_INT16_T +#define ALIGNMENT_INT16 ALIGNMENT_INT16_T + +#define SIZEOF_WORD16 SIZEOF_UINT16_T +#define ALIGNMENT_WORD16 ALIGNMENT_UINT16_T + +#define SIZEOF_INT32 SIZEOF_INT32_T +#define ALIGNMENT_INT32 ALIGNMENT_INT32_T + +#define SIZEOF_WORD32 SIZEOF_UINT32_T +#define ALIGNMENT_WORD32 ALIGNMENT_UINT32_T + +#define SIZEOF_INT64 SIZEOF_INT64_T +#define ALIGNMENT_INT64 ALIGNMENT_INT64_T + +#define SIZEOF_WORD64 SIZEOF_UINT64_T +#define ALIGNMENT_WORD64 ALIGNMENT_UINT64_T + +#if !defined(WORD_SIZE_IN_BITS) +#if SIZEOF_HSWORD == 4 +#define WORD_SIZE_IN_BITS 32 +#define WORD_SIZE_IN_BITS_FLOAT 32.0 +#else +#define WORD_SIZE_IN_BITS 64 +#define WORD_SIZE_IN_BITS_FLOAT 64.0 +#endif +#endif + +#if !defined(TAG_BITS) +#if SIZEOF_HSWORD == 4 +#define TAG_BITS 2 +#else +#define TAG_BITS 3 +#endif +#endif + +#define TAG_MASK ((1 << TAG_BITS) - 1) + diff --git a/rts/include/Makefile b/rts/include/Makefile new file mode 100644 index 0000000000..d0dc985cbe --- /dev/null +++ b/rts/include/Makefile @@ -0,0 +1,15 @@ +# ----------------------------------------------------------------------------- +# +# (c) 2009 The University of Glasgow +# +# This file is part of the GHC build system. +# +# To understand how the build system works and how to modify it, see +# https://gitlab.haskell.org/ghc/ghc/wikis/building/architecture +# https://gitlab.haskell.org/ghc/ghc/wikis/building/modifying +# +# ----------------------------------------------------------------------------- + +dir = rts/include +TOP = ../.. +include $(TOP)/mk/sub-makefile.mk diff --git a/rts/include/Rts.h b/rts/include/Rts.h new file mode 100644 index 0000000000..e3471cf333 --- /dev/null +++ b/rts/include/Rts.h @@ -0,0 +1,372 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * RTS external APIs. This file declares everything that the GHC RTS + * exposes externally. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#if defined(__cplusplus) +extern "C" { +#endif + +/* get types from GHC's runtime system */ +#include "ghcconfig.h" +/* We have to include Types.h before everything else as this defines some + macros that will change the behaviour of system headers. */ +#include "stg/Types.h" + +/* We include windows.h very early, as on Win64 the CONTEXT type has + fields "R8", "R9" and "R10", which goes bad if we've already + #define'd those names for our own purposes (in stg/Regs.h) */ +#if defined(HAVE_WINDOWS_H) +#include <windows.h> +#endif + +#if !defined(IN_STG_CODE) +#define IN_STG_CODE 0 +#endif +#include "Stg.h" + +#include "HsFFI.h" +#include "RtsAPI.h" + +// Disencourage gcc from inlining when debugging - it obfuscates things +#if defined(DEBUG) +# undef STATIC_INLINE +# define STATIC_INLINE static +#endif + +// Fine grained inlining control helpers. +#define ATTR_ALWAYS_INLINE __attribute__((always_inline)) +#define ATTR_NOINLINE __attribute__((noinline)) + + +#include "rts/Types.h" +#include "rts/Time.h" + +#if __GNUC__ >= 3 +#define ATTRIBUTE_ALIGNED(n) __attribute__((aligned(n))) +#else +#define ATTRIBUTE_ALIGNED(n) /*nothing*/ +#endif + +// Symbols that are extern, but private to the RTS, are declared +// with visibility "hidden" to hide them outside the RTS shared +// library. +#if defined(HAS_VISIBILITY_HIDDEN) +#define RTS_PRIVATE GNUC3_ATTRIBUTE(visibility("hidden")) +#else +#define RTS_PRIVATE /* disabled: RTS_PRIVATE */ +#endif + +#if __GNUC__ >= 4 +#define RTS_UNLIKELY(p) __builtin_expect((p),0) +#else +#define RTS_UNLIKELY(p) (p) +#endif + +#if __GNUC__ >= 4 +#define RTS_LIKELY(p) __builtin_expect(!!(p), 1) +#else +#define RTS_LIKELY(p) (p) +#endif + +/* __builtin_unreachable is supported since GNU C 4.5 */ +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) +#define RTS_UNREACHABLE __builtin_unreachable() +#else +#define RTS_UNREACHABLE abort() +#endif + +/* Prefetch primitives */ +#define prefetchForRead(ptr) __builtin_prefetch(ptr, 0) +#define prefetchForWrite(ptr) __builtin_prefetch(ptr, 1) + +/* Fix for mingw stat problem (done here so it's early enough) */ +#if defined(mingw32_HOST_OS) +#define __MSVCRT__ 1 +#endif + +/* Needed to get the macro version of errno on some OSs, and also to + get prototypes for the _r versions of C library functions. */ +#if !defined(_REENTRANT) +#define _REENTRANT 1 +#endif + +/* + * We often want to know the size of something in units of an + * StgWord... (rounded up, of course!) + */ +#define ROUNDUP_BYTES_TO_WDS(n) (((n) + sizeof(W_) - 1) / sizeof(W_)) + +#define sizeofW(t) ROUNDUP_BYTES_TO_WDS(sizeof(t)) + +/* ----------------------------------------------------------------------------- + Assertions and Debuggery + + CHECK(p) evaluates p and terminates with an error if p is false + ASSERT(p) like CHECK(p) a no-op, unless ASSERTS_ENABLED is on. Either + because we're building in the DEBUG way or USE_ASSERTS_ALL_WAYS + (aka --enable-asserts-all-ways) was enabled at ./configure time. + -------------------------------------------------------------------------- */ + +void _assertFail(const char *filename, unsigned int linenum) + GNUC3_ATTRIBUTE(__noreturn__); + +#define CHECK(predicate) \ + if (RTS_LIKELY(predicate)) \ + /*null*/; \ + else \ + _assertFail(__FILE__, __LINE__) + +#define CHECKM(predicate, msg, ...) \ + if (RTS_LIKELY(predicate)) \ + /*null*/; \ + else \ + barf(msg, ##__VA_ARGS__) + +#if defined(DEBUG) || defined(USE_ASSERTS_ALL_WAYS) +#define ASSERTS_ENABLED 1 +#else +#undef ASSERTS_ENABLED +#endif + +#if defined(ASSERTS_ENABLED) +#define ASSERT(predicate) \ + do { CHECK(predicate); } while(0) +#define ASSERTM(predicate,msg,...) \ + do { CHECKM(predicate, msg, ##__VA_ARGS__); } while(0) +#else +#define ASSERT(predicate) \ + do { (void) sizeof(predicate); } while(0) +#define ASSERTM(predicate,msg,...) \ + do { (void) sizeof(predicate); (void) sizeof(msg); } while(0) +#endif /* DEBUG */ + +/* + * Use this on the RHS of macros which expand to nothing + * to make sure that the macro can be used in a context which + * demands a non-empty statement. + */ + +#define doNothing() do { } while (0) + +#if defined(DEBUG) +#define USED_IF_DEBUG +#define USED_IF_NOT_DEBUG STG_UNUSED +#else +#define USED_IF_DEBUG STG_UNUSED +#define USED_IF_NOT_DEBUG +#endif + +#if defined(THREADED_RTS) +#define USED_IF_THREADS +#define USED_IF_NOT_THREADS STG_UNUSED +#else +#define USED_IF_THREADS STG_UNUSED +#define USED_IF_NOT_THREADS +#endif + +#if defined(PROFILING) +#define USED_IF_PROFILING +#define USED_IF_NOT_PROFILING STG_UNUSED +#else +#define USED_IF_PROFILING STG_UNUSED +#define USED_IF_NOT_PROFILING +#endif + +#define FMT_SizeT "zu" +#define FMT_HexSizeT "zx" + +/* ----------------------------------------------------------------------------- + Include everything STG-ish + -------------------------------------------------------------------------- */ + +/* System headers: stdlib.h is needed so that we can use NULL. It must + * come after MachRegs.h, because stdlib.h might define some inline + * functions which may only be defined after register variables have + * been declared. + */ +#include <stdlib.h> + +#include "rts/Config.h" + +/* Global constraints */ +#include "rts/Constants.h" + +/* Runtime flags */ +#include "rts/Flags.h" + +/* Profiling information */ +#include "rts/prof/CCS.h" +#include "rts/prof/Heap.h" +#include "rts/prof/LDV.h" + +/* Parallel information */ +#include "rts/OSThreads.h" +#include "rts/TSANUtils.h" +#include "rts/SpinLock.h" + +#include "rts/Messages.h" +#include "rts/Threads.h" + +/* Storage format definitions */ +#include "rts/storage/FunTypes.h" +#include "rts/storage/InfoTables.h" +#include "rts/storage/Closures.h" +#include "rts/storage/Heap.h" +#include "rts/storage/ClosureTypes.h" +#include "rts/storage/TSO.h" +#include "stg/MiscClosures.h" /* InfoTables, closures etc. defined in the RTS */ +#include "rts/storage/Block.h" +#include "rts/storage/ClosureMacros.h" +#include "rts/storage/MBlock.h" +#include "rts/storage/GC.h" +#include "rts/NonMoving.h" + +/* Foreign exports */ +#include "rts/ForeignExports.h" + +/* Other RTS external APIs */ +#include "rts/ExecPage.h" +#include "rts/Parallel.h" +#include "rts/Signals.h" +#include "rts/BlockSignals.h" +#include "rts/Hpc.h" +#include "rts/Adjustor.h" +#include "rts/FileLock.h" +#include "rts/GetTime.h" +#include "rts/Globals.h" +#include "rts/IOInterface.h" +#include "rts/Linker.h" +#include "rts/Ticky.h" +#include "rts/Timer.h" +#include "rts/StablePtr.h" +#include "rts/StableName.h" +#include "rts/TTY.h" +#include "rts/Utils.h" +#include "rts/PrimFloat.h" +#include "rts/Main.h" +#include "rts/Profiling.h" +#include "rts/IPE.h" +#include "rts/StaticPtrTable.h" +#include "rts/Libdw.h" +#include "rts/LibdwPool.h" + +/* Misc stuff without a home */ +DLL_IMPORT_RTS extern char **prog_argv; /* so we can get at these from Haskell */ +DLL_IMPORT_RTS extern int prog_argc; +DLL_IMPORT_RTS extern char *prog_name; + +void reportStackOverflow(StgTSO* tso); +void reportHeapOverflow(void); + +void stg_exit(int n) GNU_ATTRIBUTE(__noreturn__); + +#if !defined(mingw32_HOST_OS) +int stg_sig_install (int, int, void *); +#endif + +/* ----------------------------------------------------------------------------- + Ways + -------------------------------------------------------------------------- */ + +// Returns non-zero if the RTS is a profiling version +int rts_isProfiled(void); + +// Returns non-zero if the RTS is a dynamically-linked version +int rts_isDynamic(void); + +// Returns non-zero if the RTS is a threaded version +int rts_isThreaded(void); + +// Returns non-zero if the RTS is a debugged version +int rts_isDebugged(void); + +// Returns non-zero if the RTS is a tracing version (event log) +int rts_isTracing(void); + +/* ----------------------------------------------------------------------------- + RTS Exit codes + -------------------------------------------------------------------------- */ + +/* 255 is allegedly used by dynamic linkers to report linking failure */ +#define EXIT_INTERNAL_ERROR 254 +#define EXIT_DEADLOCK 253 +#define EXIT_INTERRUPTED 252 +#define EXIT_HEAPOVERFLOW 251 +#define EXIT_KILLED 250 + +/* ----------------------------------------------------------------------------- + Miscellaneous garbage + -------------------------------------------------------------------------- */ + +#if defined(DEBUG) +#define TICK_VAR(arity) \ + extern StgInt SLOW_CALLS_##arity; \ + extern StgInt RIGHT_ARITY_##arity; \ + extern StgInt TAGGED_PTR_##arity; + +extern StgInt TOTAL_CALLS; + +TICK_VAR(1) +TICK_VAR(2) +#endif + +/* ----------------------------------------------------------------------------- + Assertions and Debuggery + -------------------------------------------------------------------------- */ + +#define IF_RTSFLAGS(c,s) if (RtsFlags.c) { s; } doNothing() + +#if defined(DEBUG) +/* See Note [RtsFlags is a pointer in STG code] */ +#if IN_STG_CODE +#define IF_DEBUG(c,s) if (RtsFlags[0].DebugFlags.c) { s; } doNothing() +#else +#define IF_DEBUG(c,s) if (RtsFlags.DebugFlags.c) { s; } doNothing() +#endif /* IN_STG_CODE */ +#else +#define IF_DEBUG(c,s) doNothing() +#endif /* DEBUG */ + +#if defined(DEBUG) +#define DEBUG_ONLY(s) s +#else +#define DEBUG_ONLY(s) doNothing() +#endif /* DEBUG */ + +#if defined(DEBUG) +#define DEBUG_IS_ON 1 +#else +#define DEBUG_IS_ON 0 +#endif /* DEBUG */ + +/* ----------------------------------------------------------------------------- + Useful macros and inline functions + -------------------------------------------------------------------------- */ + +#if defined(__GNUC__) +#define SUPPORTS_TYPEOF +#endif + +#if defined(SUPPORTS_TYPEOF) +#define stg_min(a,b) ({typeof(a) _a = (a), _b = (b); _a <= _b ? _a : _b; }) +#define stg_max(a,b) ({typeof(a) _a = (a), _b = (b); _a <= _b ? _b : _a; }) +#else +#define stg_min(a,b) ((a) <= (b) ? (a) : (b)) +#define stg_max(a,b) ((a) <= (b) ? (b) : (a)) +#endif + +/* -------------------------------------------------------------------------- */ + +#if defined(__cplusplus) +} +#endif diff --git a/rts/include/RtsAPI.h b/rts/include/RtsAPI.h new file mode 100644 index 0000000000..e2d1845819 --- /dev/null +++ b/rts/include/RtsAPI.h @@ -0,0 +1,607 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2004 + * + * API for invoking Haskell functions via the RTS + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * --------------------------------------------------------------------------*/ + +#pragma once + +#if defined(__cplusplus) +extern "C" { +#endif + +#include "HsFFI.h" +#include "rts/Time.h" +#include "rts/Types.h" + +/* + * Running the scheduler + */ +typedef enum { + NoStatus, /* not finished yet */ + Success, /* completed successfully */ + Killed, /* uncaught exception */ + Interrupted, /* stopped in response to a call to interruptStgRts */ + HeapExhausted /* out of memory */ +} SchedulerStatus; + +typedef struct StgClosure_ *HaskellObj; + +/* + * An abstract type representing the token returned by rts_lock() and + * used when allocating objects and threads in the RTS. + */ +typedef struct Capability_ Capability; + +/* + * An abstract type representing the token returned by rts_pause(). + */ +typedef struct PauseToken_ PauseToken; + +/* + * From a PauseToken, get a Capability token used when allocating objects and + * threads in the RTS. + */ +Capability *pauseTokenCapability(PauseToken *pauseToken); + +/* + * The public view of a Capability: we can be sure it starts with + * these two components (but it may have more private fields). + */ +typedef struct CapabilityPublic_ { + StgFunTable f; + StgRegTable r; +} CapabilityPublic; + +/* N.B. this needs the Capability declaration above. */ +#include "rts/EventLogWriter.h" + +/* ---------------------------------------------------------------------------- + RTS configuration settings, for passing to hs_init_ghc() + ------------------------------------------------------------------------- */ + +typedef enum { + RtsOptsNone, // +RTS causes an error + RtsOptsIgnore, // Ignore command line arguments + RtsOptsIgnoreAll, // Ignore command line and Environment arguments + RtsOptsSafeOnly, // safe RTS options allowed; others cause an error + RtsOptsAll // all RTS options allowed + } RtsOptsEnabledEnum; + +struct GCDetails_; + +// The RtsConfig struct is passed (by value) to hs_init_ghc(). The +// reason for using a struct is extensibility: we can add more +// fields to this later without breaking existing client code. +typedef struct { + + // Whether to interpret +RTS options on the command line + RtsOptsEnabledEnum rts_opts_enabled; + + // Whether to give RTS flag suggestions + HsBool rts_opts_suggestions; + + // additional RTS options + const char *rts_opts; + + // True if GHC was not passed -no-hs-main + HsBool rts_hs_main; + + // Whether to retain CAFs (default: false) + HsBool keep_cafs; + + // Writer a for eventlog. + const EventLogWriter *eventlog_writer; + + // Called before processing command-line flags, so that default + // settings for RtsFlags can be provided. + void (* defaultsHook) (void); + + // Called just before exiting + void (* onExitHook) (void); + + // Called on a stack overflow, before exiting + void (* stackOverflowHook) (W_ stack_size); + + // Called on heap overflow, before exiting + void (* outOfHeapHook) (W_ request_size, W_ heap_size); + + // Called when malloc() fails, before exiting + void (* mallocFailHook) (W_ request_size /* in bytes */, const char *msg); + + // Called for every GC + void (* gcDoneHook) (const struct GCDetails_ *stats); + + // Called when GC sync takes too long (+RTS --long-gc-sync=<time>) + void (* longGCSync) (uint32_t this_cap, Time time_ns); + void (* longGCSyncEnd) (Time time_ns); +} RtsConfig; + +// Clients should start with defaultRtsConfig and then customise it. +// Bah, I really wanted this to be a const struct value, but it seems +// you can't do that in C (it generates code). +extern const RtsConfig defaultRtsConfig; + +/* ----------------------------------------------------------------------------- + Statistics + -------------------------------------------------------------------------- */ + +// +// Stats about a single GC +// +typedef struct GCDetails_ { + // The generation number of this GC + uint32_t gen; + // Number of threads used in this GC + uint32_t threads; + // Number of bytes allocated since the previous GC + uint64_t allocated_bytes; + // Total amount of live data in the heap (incliudes large + compact data). + // Updated after every GC. Data in uncollected generations (in minor GCs) + // are considered live. + uint64_t live_bytes; + // Total amount of live data in large objects + uint64_t large_objects_bytes; + // Total amount of live data in compact regions + uint64_t compact_bytes; + // Total amount of slop (wasted memory) + uint64_t slop_bytes; + // Total amount of memory in use by the RTS + uint64_t mem_in_use_bytes; + // Total amount of data copied during this GC + uint64_t copied_bytes; + // In parallel GC, the max amount of data copied by any one thread + uint64_t par_max_copied_bytes; + // In parallel GC, the amount of balanced data copied by all threads + uint64_t par_balanced_copied_bytes; + // The time elapsed during synchronisation before GC + Time sync_elapsed_ns; + // The CPU time used during GC itself + Time cpu_ns; + // The time elapsed during GC itself + Time elapsed_ns; + + // + // Concurrent garbage collector + // + + // The CPU time used during the post-mark pause phase of the concurrent + // nonmoving GC. + Time nonmoving_gc_sync_cpu_ns; + // The time elapsed during the post-mark pause phase of the concurrent + // nonmoving GC. + Time nonmoving_gc_sync_elapsed_ns; + // The CPU time used during the post-mark pause phase of the concurrent + // nonmoving GC. + Time nonmoving_gc_cpu_ns; + // The time elapsed during the post-mark pause phase of the concurrent + // nonmoving GC. + Time nonmoving_gc_elapsed_ns; +} GCDetails; + +// +// Stats about the RTS currently, and since the start of execution +// +typedef struct _RTSStats { + + // ----------------------------------- + // Cumulative stats about memory use + + // Total number of GCs + uint32_t gcs; + // Total number of major (oldest generation) GCs + uint32_t major_gcs; + // Total bytes allocated + uint64_t allocated_bytes; + // Maximum live data (including large objects + compact regions) in the + // heap. Updated after a major GC. + uint64_t max_live_bytes; + // Maximum live data in large objects + uint64_t max_large_objects_bytes; + // Maximum live data in compact regions + uint64_t max_compact_bytes; + // Maximum slop + uint64_t max_slop_bytes; + // Maximum memory in use by the RTS + uint64_t max_mem_in_use_bytes; + // Sum of live bytes across all major GCs. Divided by major_gcs + // gives the average live data over the lifetime of the program. + uint64_t cumulative_live_bytes; + // Sum of copied_bytes across all GCs + uint64_t copied_bytes; + // Sum of copied_bytes across all parallel GCs + uint64_t par_copied_bytes; + // Sum of par_max_copied_bytes across all parallel GCs + uint64_t cumulative_par_max_copied_bytes; + // Sum of par_balanced_copied_byes across all parallel GCs. + uint64_t cumulative_par_balanced_copied_bytes; + + // ----------------------------------- + // Cumulative stats about time use + // (we use signed values here because due to inaccuracies in timers + // the values can occasionally go slightly negative) + + // Total CPU time used by the init phase + Time init_cpu_ns; + // Total elapsed time used by the init phase + Time init_elapsed_ns; + // Total CPU time used by the mutator + Time mutator_cpu_ns; + // Total elapsed time used by the mutator + Time mutator_elapsed_ns; + // Total CPU time used by the GC + Time gc_cpu_ns; + // Total elapsed time used by the GC + Time gc_elapsed_ns; + // Total CPU time (at the previous GC) + Time cpu_ns; + // Total elapsed time (at the previous GC) + Time elapsed_ns; + + // ----------------------------------- + // Stats about the most recent GC + + GCDetails gc; + + // ----------------------------------- + // Internal Counters + + uint64_t any_work; + // The number of times a GC thread has iterated it's outer loop across all + // parallel GCs + uint64_t scav_find_work; + + uint64_t max_n_todo_overflow; + + // ---------------------------------- + // Concurrent garbage collector + + // The CPU time used during the post-mark pause phase of the concurrent + // nonmoving GC. + Time nonmoving_gc_sync_cpu_ns; + // The time elapsed during the post-mark pause phase of the concurrent + // nonmoving GC. + Time nonmoving_gc_sync_elapsed_ns; + // The maximum time elapsed during the post-mark pause phase of the + // concurrent nonmoving GC. + Time nonmoving_gc_sync_max_elapsed_ns; + // The CPU time used during the post-mark pause phase of the concurrent + // nonmoving GC. + Time nonmoving_gc_cpu_ns; + // The time elapsed during the post-mark pause phase of the concurrent + // nonmoving GC. + Time nonmoving_gc_elapsed_ns; + // The maximum time elapsed during the post-mark pause phase of the + // concurrent nonmoving GC. + Time nonmoving_gc_max_elapsed_ns; +} RTSStats; + +void getRTSStats (RTSStats *s); +int getRTSStatsEnabled (void); + +// Returns the total number of bytes allocated since the start of the program. +// TODO: can we remove this? +uint64_t getAllocations (void); + +/* ---------------------------------------------------------------------------- + Starting up and shutting down the Haskell RTS. + ------------------------------------------------------------------------- */ + +/* DEPRECATED, use hs_init() or hs_init_ghc() instead */ +extern void startupHaskell ( int argc, char *argv[], + void (*init_root)(void) ); + +/* DEPRECATED, use hs_exit() instead */ +extern void shutdownHaskell ( void ); + +/* Like hs_init(), but allows rtsopts. For more complicated usage, + * use hs_init_ghc. */ +extern void hs_init_with_rtsopts (int *argc, char **argv[]); + +/* + * GHC-specific version of hs_init() that allows specifying whether + * +RTS ... -RTS options are allowed or not (default: only "safe" + * options are allowed), and allows passing an option string that is + * to be interpreted by the RTS only, not passed to the program. + */ +extern void hs_init_ghc (int *argc, char **argv[], // program arguments + RtsConfig rts_config); // RTS configuration + +extern void shutdownHaskellAndExit (int exitCode, int fastExit) + GNUC3_ATTRIBUTE(__noreturn__); + +#if !defined(mingw32_HOST_OS) +extern void shutdownHaskellAndSignal (int sig, int fastExit) + GNUC3_ATTRIBUTE(__noreturn__); +#endif + +extern void getProgArgv ( int *argc, char **argv[] ); +extern void setProgArgv ( int argc, char *argv[] ); +extern void getFullProgArgv ( int *argc, char **argv[] ); +extern void setFullProgArgv ( int argc, char *argv[] ); +extern void freeFullProgArgv ( void ) ; + +/* exit() override */ +extern void (*exitFn)(int); + +/* Note [Locking and Pausing the RTS] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You have to surround all access to the RtsAPI with rts_lock/rts_unlock or +with rts_pause/rts_resume. + + +# rts_lock / rts_unlock + +Use `rts_lock` to acquire a token which may be used to call other RtsAPI +functions and call `rts_unlock` to return the token. When locked, garbage +collection will not occur. As long as 1 or more capabilities are not locked, +haskell threads will continue to execute. If you want to pause execution of +all haskell threads then use rts_pause/rts_resume instead. + +The implementation of `rts_lock` acquires a capability for this thread. Hence, +at most n locks can be held simultaneously, where n is the number of +capabilities. It is an error to call `rts_lock` when the rts is already +paused by the current OS thread (see rts_pause/rts_resume below). + + +# rts_pause / rts_resume + +Use `rts_pause` to pause execution of all Haskell threads and `rts_resume` to +resume them. The implementation acquires all capabilities. `rts_resume` +must be called on the same thread as `rts_pause`. `rts_pause`, much like +rts_lock, returns a token. A `Capability` can be extracted from that token using +`pauseTokenCapability()`. The `Capability` can then be used to call other RtsAPI +functions. + +* With the RTS paused, garbage collections will not occur and haskell threads + will not execute, allocate, nor mutate their stacks. +* Non-Haskell (i.e. non-worker) threads such as those running safe FFI calls + will NOT be paused and can still mutate pinned mutable data such as pinned + `MutableByteArray#`s. +* You may call `rts_pause` from within a non-worker OS thread. +* You may call `rts_pause` from within a *safe* FFI call. In this case, make + sure to call `rts_resume` within the same FFI call or the RTS will deadlock. +* Calling `rts_pause` from an *unsafe* FFI call will cause an error. +* On return, the rts will be fully paused: all haskell threads are stopped + and all capabilities are acquired by the current OS thread. +* Calling `rts_pause` in between rts_lock/rts_unlock on the same thread will + cause an error. +* Calling `rts_pause` results in an error if the RTS is already paused by the + current OS thread. +* Only one OS thread at a time can keep the rts paused. +* `rts_pause` will block while another thread is pausing the RTS, and + continue when the current thread is given exclusive permission to pause the + RTS. + +## Note on implementation. + +Thread safety is achieved almost entirely by the mechanism of acquiring and +releasing Capabilities, resulting in a sort of mutex / critical section pattern. +This has the following consequences: + +* There are at most `n_capabilities` threads currently in a + rts_lock/rts_unlock section. +* There is at most 1 threads in a rts_pause/rts_resume section. In that case + there will be no threads in a rts_lock/rts_unlock section. +* rts_pause and rts_lock may block in order to enforce the above 2 + invariants. + +*/ + +// Acquires a token which may be used to create new objects and evaluate them. +// See Note [Locking and Pausing the RTS] for correct usage. +Capability *rts_lock (void); + +// releases the token acquired with rts_lock(). +// See Note [Locking and Pausing the RTS] for correct usage. +void rts_unlock (Capability *token); + +// If you are in a context where you know you have a current capability but +// do not know what it is, then use this to get it. Basically this only +// applies to "unsafe" foreign calls (as unsafe foreign calls are made with +// the capability held). +// +// WARNING: There is *no* guarantee this returns anything sensible (eg NULL) +// when there is no current capability. +Capability *rts_unsafeGetMyCapability (void); + +/* ---------------------------------------------------------------------------- + Which cpu should the OS thread and Haskell thread run on? + + 1. Run the current thread on the given capability: + rts_setInCallCapability(cap, 0); + + 2. Run the current thread on the given capability and set the cpu affinity + for this thread: + rts_setInCallCapability(cap, 1); + + 3. Run the current thread on the given numa node: + rts_pinThreadToNumaNode(node); + + 4. Run the current thread on the given capability and on the given numa node: + rts_setInCallCapability(cap, 0); + rts_pinThreadToNumaNode(cap); + ------------------------------------------------------------------------- */ + +// Specify the Capability that the current OS thread should run on when it calls +// into Haskell. The actual capability will be calculated as the supplied +// value modulo the number of enabled Capabilities. +// +// If affinity is non-zero, the current thread will be bound to +// specific CPUs according to the prevailing affinity policy for the +// specified capability, set by either +RTS -qa or +RTS --numa. +void rts_setInCallCapability (int preferred_capability, int affinity); + +// Specify the CPU Node that the current OS thread should run on when it calls +// into Haskell. The argument can be either a node number or capability number. +// The actual node will be calculated as the supplied value modulo the number +// of numa nodes. +void rts_pinThreadToNumaNode (int node); + +/* ---------------------------------------------------------------------------- + Building Haskell objects from C datatypes. + ------------------------------------------------------------------------- */ +HaskellObj rts_mkChar ( Capability *, HsChar c ); +HaskellObj rts_mkInt ( Capability *, HsInt i ); +HaskellObj rts_mkInt8 ( Capability *, HsInt8 i ); +HaskellObj rts_mkInt16 ( Capability *, HsInt16 i ); +HaskellObj rts_mkInt32 ( Capability *, HsInt32 i ); +HaskellObj rts_mkInt64 ( Capability *, HsInt64 i ); +HaskellObj rts_mkWord ( Capability *, HsWord w ); +HaskellObj rts_mkWord8 ( Capability *, HsWord8 w ); +HaskellObj rts_mkWord16 ( Capability *, HsWord16 w ); +HaskellObj rts_mkWord32 ( Capability *, HsWord32 w ); +HaskellObj rts_mkWord64 ( Capability *, HsWord64 w ); +HaskellObj rts_mkPtr ( Capability *, HsPtr a ); +HaskellObj rts_mkFunPtr ( Capability *, HsFunPtr a ); +HaskellObj rts_mkFloat ( Capability *, HsFloat f ); +HaskellObj rts_mkDouble ( Capability *, HsDouble f ); +HaskellObj rts_mkStablePtr ( Capability *, HsStablePtr s ); +HaskellObj rts_mkBool ( Capability *, HsBool b ); +HaskellObj rts_mkString ( Capability *, char *s ); + +HaskellObj rts_apply ( Capability *, HaskellObj, HaskellObj ); + +/* ---------------------------------------------------------------------------- + Deconstructing Haskell objects + ------------------------------------------------------------------------- */ +HsChar rts_getChar ( HaskellObj ); +HsInt rts_getInt ( HaskellObj ); +HsInt8 rts_getInt8 ( HaskellObj ); +HsInt16 rts_getInt16 ( HaskellObj ); +HsInt32 rts_getInt32 ( HaskellObj ); +HsInt64 rts_getInt64 ( HaskellObj ); +HsWord rts_getWord ( HaskellObj ); +HsWord8 rts_getWord8 ( HaskellObj ); +HsWord16 rts_getWord16 ( HaskellObj ); +HsWord32 rts_getWord32 ( HaskellObj ); +HsWord64 rts_getWord64 ( HaskellObj ); +HsPtr rts_getPtr ( HaskellObj ); +HsFunPtr rts_getFunPtr ( HaskellObj ); +HsFloat rts_getFloat ( HaskellObj ); +HsDouble rts_getDouble ( HaskellObj ); +HsStablePtr rts_getStablePtr ( HaskellObj ); +HsBool rts_getBool ( HaskellObj ); + +/* ---------------------------------------------------------------------------- + Evaluating Haskell expressions + + The versions ending in '_' allow you to specify an initial stack size. + Note that these calls may cause Garbage Collection, so all HaskellObj + references are rendered invalid by these calls. + + All of these functions take a (Capability **) - there is a + Capability pointer both input and output. We use an inout + parameter because this is less error-prone for the client than a + return value - the client could easily forget to use the return + value, whereas incorrectly using an inout parameter will usually + result in a type error. + ------------------------------------------------------------------------- */ + +void rts_eval (/* inout */ Capability **, + /* in */ HaskellObj p, + /* out */ HaskellObj *ret); + +void rts_eval_ (/* inout */ Capability **, + /* in */ HaskellObj p, + /* in */ unsigned int stack_size, + /* out */ HaskellObj *ret); + +void rts_evalIO (/* inout */ Capability **, + /* in */ HaskellObj p, + /* out */ HaskellObj *ret); + +void rts_evalStableIOMain (/* inout */ Capability **, + /* in */ HsStablePtr s, + /* out */ HsStablePtr *ret); + +void rts_evalStableIO (/* inout */ Capability **, + /* in */ HsStablePtr s, + /* out */ HsStablePtr *ret); + +void rts_evalLazyIO (/* inout */ Capability **, + /* in */ HaskellObj p, + /* out */ HaskellObj *ret); + +void rts_evalLazyIO_ (/* inout */ Capability **, + /* in */ HaskellObj p, + /* in */ unsigned int stack_size, + /* out */ HaskellObj *ret); + +void rts_inCall (/* inout */ Capability **, + /* in */ HaskellObj p, + /* out */ HaskellObj *ret); + +void rts_checkSchedStatus (char* site, Capability *); + +SchedulerStatus rts_getSchedStatus (Capability *cap); + +// Halt execution of all Haskell threads. +// See Note [Locking and Pausing the RTS] for correct usage. +PauseToken *rts_pause (void); + +// Counterpart of rts_pause: Continue from a pause. +// See Note [Locking and Pausing the RTS] for correct usage. +// [in] pauseToken: the token returned by rts_pause. +void rts_resume (PauseToken *pauseToken); + +// Returns true if the rts is paused. See rts_pause() and rts_resume(). +bool rts_isPaused(void); + +// List all live threads. The RTS must be paused and this must be called on the +// same thread that called rts_pause(). +typedef void (*ListThreadsCb)(void *user, StgTSO *); +void rts_listThreads(ListThreadsCb cb, void *user); + +// List all non-thread GC roots. The RTS must be paused and this must be called +// on the same thread that called rts_pause(). +typedef void (*ListRootsCb)(void *user, StgClosure *); +void rts_listMiscRoots(ListRootsCb cb, void *user); + +/* + * The RTS allocates some thread-local data when you make a call into + * Haskell using one of the rts_eval() functions. This data is not + * normally freed until hs_exit(). If you want to free it earlier + * than this, perhaps because the thread is about to exit, then call + * rts_done() from the thread. + * + * It is safe to make more rts_eval() calls after calling rts_done(), + * but the next one will cause allocation of the thread-local memory + * again. + */ +void rts_done (void); + +/* -------------------------------------------------------------------------- + Wrapper closures + + These are used by foreign export and foreign import "wrapper" stubs. + ----------------------------------------------------------------------- */ + +// When producing Windows DLLs the we need to know which symbols are in the +// local package/DLL vs external ones. +// +// Note that RtsAPI.h is also included by foreign export stubs in +// the base package itself. +// +#if defined(COMPILING_WINDOWS_DLL) && !defined(COMPILING_BASE_PACKAGE) +__declspec(dllimport) extern StgWord base_GHCziTopHandler_runIO_closure[]; +__declspec(dllimport) extern StgWord base_GHCziTopHandler_runNonIO_closure[]; +#else +extern StgWord base_GHCziTopHandler_runIO_closure[]; +extern StgWord base_GHCziTopHandler_runNonIO_closure[]; +#endif + +#define runIO_closure base_GHCziTopHandler_runIO_closure +#define runNonIO_closure base_GHCziTopHandler_runNonIO_closure + +/* ------------------------------------------------------------------------ */ + +#if defined(__cplusplus) +} +#endif diff --git a/rts/include/Stg.h b/rts/include/Stg.h new file mode 100644 index 0000000000..46f71c0241 --- /dev/null +++ b/rts/include/Stg.h @@ -0,0 +1,600 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * Top-level include file for everything required when compiling .hc + * code. NOTE: in .hc files, Stg.h must be included *before* any + * other headers, because we define some register variables which must + * be done before any inline functions are defined (some system + * headers have been known to define the odd inline function). + * + * We generally try to keep as little visible as possible when + * compiling .hc files. So for example the definitions of the + * InfoTable structs, closure structs and other RTS types are not + * visible here. The compiler knows enough about the representations + * of these types to generate code which manipulates them directly + * with pointer arithmetic. + * + * In ordinary C code, do not #include this file directly: #include + * "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#if !(__STDC_VERSION__ >= 199901L) && !(__cplusplus >= 201103L) +# error __STDC_VERSION__ does not advertise C99, C++11 or later +#endif + +/* + * If we are compiling a .hc file, then we want all the register + * variables. This is the what happens if you #include "Stg.h" first: + * we assume this is a .hc file, and set IN_STG_CODE==1, which later + * causes the register variables to be enabled in stg/Regs.h. + * + * If instead "Rts.h" is included first, then we are compiling a + * vanilla C file. Everything from Stg.h is provided, except that + * IN_STG_CODE is not defined, and the register variables will not be + * active. + */ +#if !defined(IN_STG_CODE) +# define IN_STG_CODE 1 + +// Turn on C99 for .hc code. This gives us the INFINITY and NAN +// constants from math.h, which we occasionally need to use in .hc (#1861) +# define _ISOC99_SOURCE + +// We need _BSD_SOURCE so that math.h defines things like gamma +// on Linux +# define _BSD_SOURCE + +// On AIX we need _BSD defined, otherwise <math.h> includes <stdlib.h> +# if defined(_AIX) +# define _BSD 1 +# endif + +// '_BSD_SOURCE' is deprecated since glibc-2.20 +// in favour of '_DEFAULT_SOURCE' +# define _DEFAULT_SOURCE +#endif + +#if IN_STG_CODE == 0 || defined(CC_LLVM_BACKEND) +// C compilers that use an LLVM back end (clang or llvm-gcc) do not +// correctly support global register variables so we make sure that +// we do not declare them for these compilers. +# define NO_GLOBAL_REG_DECLS /* don't define fixed registers */ +#endif + +/* Configuration */ +#include "ghcconfig.h" + +/* The code generator calls the math functions directly in .hc code. + NB. after configuration stuff above, because this sets #defines + that depend on config info, such as __USE_FILE_OFFSET64 */ +#include <math.h> + +// On Solaris, we don't get the INFINITY and NAN constants unless we +// #define _STDC_C99, and we can't do that unless we also use -std=c99, +// because _STDC_C99 causes the headers to use C99 syntax (e.g. restrict). +// We aren't ready for -std=c99 yet, so define INFINITY/NAN by hand using +// the gcc builtins. +#if !defined(INFINITY) +#if defined(__GNUC__) +#define INFINITY __builtin_inf() +#else +#error No definition for INFINITY +#endif +#endif + +#if !defined(NAN) +#if defined(__GNUC__) +#define NAN __builtin_nan("") +#else +#error No definition for NAN +#endif +#endif + +/* ----------------------------------------------------------------------------- + Useful definitions + -------------------------------------------------------------------------- */ + +/* + * The C backend likes to refer to labels by just mentioning their + * names. However, when a symbol is declared as a variable in C, the + * C compiler will implicitly dereference it when it occurs in source. + * So we must subvert this behaviour for .hc files by declaring + * variables as arrays, which eliminates the implicit dereference. + */ +#if IN_STG_CODE +#define RTS_VAR(x) (x)[] +#define RTS_DEREF(x) (*(x)) +#else +#define RTS_VAR(x) x +#define RTS_DEREF(x) x +#endif + +/* bit macros + */ +#define BITS_PER_BYTE 8 +#define BITS_IN(x) (BITS_PER_BYTE * sizeof(x)) + +/* Compute offsets of struct fields + */ +#define STG_FIELD_OFFSET(s_type, field) ((StgWord)&(((s_type*)0)->field)) + +/* + * 'Portable' inlining: + * INLINE_HEADER is for inline functions in header files (macros) + * STATIC_INLINE is for inline functions in source files + * EXTERN_INLINE is for functions that we want to inline sometimes + * (we also compile a static version of the function; see Inlines.c) + */ + +// We generally assume C99 semantics albeit these two definitions work fine even +// when gnu90 semantics are active (i.e. when __GNUC_GNU_INLINE__ is defined or +// when a GCC older than 4.2 is used) +// +// The problem, however, is with 'extern inline' whose semantics significantly +// differs between gnu90 and C99 +#define INLINE_HEADER static inline +#define STATIC_INLINE static inline + +// Figure out whether `__attributes__((gnu_inline))` is needed +// to force gnu90-style 'external inline' semantics. +#if defined(FORCE_GNU_INLINE) +// disable auto-detection since HAVE_GNU_INLINE has been defined externally +#elif defined(__GNUC_GNU_INLINE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 +// GCC 4.2.x didn't properly support C99 inline semantics (GCC 4.3 was the first +// release to properly support C99 inline semantics), and therefore warned when +// using 'extern inline' while in C99 mode unless `__attributes__((gnu_inline))` +// was explicitly set. +# define FORCE_GNU_INLINE 1 +#endif + +#if defined(FORCE_GNU_INLINE) +// Force compiler into gnu90 semantics +# if defined(KEEP_INLINES) +# define EXTERN_INLINE inline __attribute__((gnu_inline)) +# else +# define EXTERN_INLINE extern inline __attribute__((gnu_inline)) +# endif +#elif defined(__GNUC_GNU_INLINE__) +// we're currently in gnu90 inline mode by default and +// __attribute__((gnu_inline)) may not be supported, so better leave it off +# if defined(KEEP_INLINES) +# define EXTERN_INLINE inline +# else +# define EXTERN_INLINE extern inline +# endif +#else +// Assume C99 semantics (yes, this curiously results in swapped definitions!) +// This is the preferred branch, and at some point we may drop support for +// compilers not supporting C99 semantics altogether. +# if defined(KEEP_INLINES) +# define EXTERN_INLINE extern inline +# else +# define EXTERN_INLINE inline +# endif +#endif + + +/* + * GCC attributes + */ +#if defined(__GNUC__) +#define GNU_ATTRIBUTE(at) __attribute__((at)) +#else +#define GNU_ATTRIBUTE(at) +#endif + +#if __GNUC__ >= 3 +#define GNUC3_ATTRIBUTE(at) __attribute__((at)) +#else +#define GNUC3_ATTRIBUTE(at) +#endif + +/* Used to mark a switch case that falls-through */ +#if (defined(__GNUC__) && __GNUC__ >= 7) +// N.B. Don't enable fallthrough annotations when compiling with Clang. +// Apparently clang doesn't enable implicitly fallthrough warnings by default +// http://llvm.org/viewvc/llvm-project?revision=167655&view=revision +// when compiling C and the attribute cause warnings of their own (#16019). +#define FALLTHROUGH GNU_ATTRIBUTE(fallthrough) +#else +#define FALLTHROUGH ((void)0) +#endif /* __GNUC__ >= 7 */ + +#if !defined(DEBUG) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +#define GNUC_ATTR_HOT __attribute__((hot)) +#else +#define GNUC_ATTR_HOT /* nothing */ +#endif + +#define STG_UNUSED GNUC3_ATTRIBUTE(__unused__) + +/* Prevent functions from being optimized. + See Note [Windows Stack allocations] */ +#if defined(__clang__) +#define STG_NO_OPTIMIZE __attribute__((optnone)) +#elif defined(__GNUC__) || defined(__GNUG__) +#define STG_NO_OPTIMIZE __attribute__((optimize("O0"))) +#else +#define STG_NO_OPTIMIZE /* nothing */ +#endif + +/* ----------------------------------------------------------------------------- + Global type definitions + -------------------------------------------------------------------------- */ + +#include "MachDeps.h" +#include "stg/Types.h" + +/* ----------------------------------------------------------------------------- + Shorthand forms + -------------------------------------------------------------------------- */ + +typedef StgChar C_; +typedef StgWord W_; +typedef StgWord* P_; +typedef StgInt I_; +typedef StgWord StgWordArray[]; +typedef StgFunPtr F_; + +/* byte arrays (and strings): */ +#define EB_(X) extern const char X[] +#define IB_(X) static const char X[] +/* static (non-heap) closures (requires alignment for pointer tagging): */ +#define EC_(X) extern StgWordArray (X) GNU_ATTRIBUTE(aligned (8)) +#define IC_(X) static StgWordArray (X) GNU_ATTRIBUTE(aligned (8)) +/* writable data (does not require alignment): */ +#define ERW_(X) extern StgWordArray (X) +#define IRW_(X) static StgWordArray (X) +/* read-only data (does not require alignment): */ +#define ERO_(X) extern const StgWordArray (X) +#define IRO_(X) static const StgWordArray (X) +/* stg-native functions: */ +#define IF_(f) static StgFunPtr GNUC3_ATTRIBUTE(used) f(void) +#define FN_(f) StgFunPtr f(void) +#define EF_(f) StgFunPtr f(void) /* External Cmm functions */ +/* foreign functions: */ +#define EFF_(f) void f() /* See Note [External function prototypes] */ + +/* Note [External function prototypes] See #8965, #11395 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In generated C code we need to distinct between two types +of external symbols: +1. Cmm functions declared by 'EF_' macro (External Functions) +2. C functions declared by 'EFF_' macro (External Foreign Functions) + +Cmm functions are simple as they are internal to GHC. + +C functions are trickier: + +The external-function macro EFF_(F) used to be defined as + extern StgFunPtr f(void) +i.e a function of zero arguments. On most platforms this doesn't +matter very much: calls to these functions put the parameters in the +usual places anyway, and (with the exception of varargs) things just +work. + +However, the ELFv2 ABI on ppc64 optimises stack allocation +(http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01149.html): a call to a +function that has a prototype, is not varargs, and receives all parameters +in registers rather than on the stack does not require the caller to +allocate an argument save area. The incorrect prototypes cause GCC to +believe that all functions declared this way can be called without an +argument save area, but if the callee has sufficiently many arguments then +it will expect that area to be present, and will thus corrupt the caller's +stack. This happens in particular with calls to runInteractiveProcess in +libraries/process/cbits/runProcess.c, and led to #8965. + +The simplest fix appears to be to declare these external functions with an +unspecified argument list rather than a void argument list. This is no +worse for platforms that don't care either way, and allows a successful +bootstrap of GHC 7.8 on little-endian Linux ppc64 (which uses the ELFv2 +ABI). + +Another case is m68k ABI where 'void*' return type is returned by 'a0' +register while 'long' return type is returned by 'd0'. Thus we trick +external prototype return neither of these types to workaround #11395. +*/ + + +/* ----------------------------------------------------------------------------- + Tail calls + -------------------------------------------------------------------------- */ + +#define JMP_(cont) return((StgFunPtr)(cont)) + +/* ----------------------------------------------------------------------------- + Other Stg stuff... + -------------------------------------------------------------------------- */ + +#include "stg/DLL.h" +#include "stg/MachRegsForHost.h" +#include "stg/Regs.h" +#include "stg/Ticky.h" + +#if IN_STG_CODE +/* + * This is included later for RTS sources, after definitions of + * StgInfoTable, StgClosure and so on. + */ +#include "stg/MiscClosures.h" +#endif + +#include "stg/Prim.h" /* ghc-prim fallbacks */ +#include "stg/SMP.h" // write_barrier() inline is required + +/* ----------------------------------------------------------------------------- + Moving Floats and Doubles + + ASSIGN_FLT is for assigning a float to memory (usually the + stack/heap). The memory address is guaranteed to be + StgWord aligned (currently == sizeof(void *)). + + PK_FLT is for pulling a float out of memory. The memory is + guaranteed to be StgWord aligned. + -------------------------------------------------------------------------- */ + +INLINE_HEADER void ASSIGN_FLT (W_ [], StgFloat); +INLINE_HEADER StgFloat PK_FLT (W_ []); + +#if ALIGNMENT_FLOAT <= ALIGNMENT_VOID_P + +INLINE_HEADER void ASSIGN_FLT(W_ p_dest[], StgFloat src) { *(StgFloat *)p_dest = src; } +INLINE_HEADER StgFloat PK_FLT (W_ p_src[]) { return *(StgFloat *)p_src; } + +#else /* ALIGNMENT_FLOAT > ALIGNMENT_UNSIGNED_INT */ + +INLINE_HEADER void ASSIGN_FLT(W_ p_dest[], StgFloat src) +{ + float_thing y; + y.f = src; + *p_dest = y.fu; +} + +INLINE_HEADER StgFloat PK_FLT(W_ p_src[]) +{ + float_thing y; + y.fu = *p_src; + return(y.f); +} + +#endif /* ALIGNMENT_FLOAT > ALIGNMENT_VOID_P */ + +#if ALIGNMENT_DOUBLE <= ALIGNMENT_VOID_P + +INLINE_HEADER void ASSIGN_DBL (W_ [], StgDouble); +INLINE_HEADER StgDouble PK_DBL (W_ []); + +INLINE_HEADER void ASSIGN_DBL(W_ p_dest[], StgDouble src) { *(StgDouble *)p_dest = src; } +INLINE_HEADER StgDouble PK_DBL (W_ p_src[]) { return *(StgDouble *)p_src; } + +#else /* ALIGNMENT_DOUBLE > ALIGNMENT_VOID_P */ + +/* Sparc uses two floating point registers to hold a double. We can + * write ASSIGN_DBL and PK_DBL by directly accessing the registers + * independently - unfortunately this code isn't writable in C, we + * have to use inline assembler. + */ +#if defined(sparc_HOST_ARCH) + +#define ASSIGN_DBL(dst0,src) \ + { StgPtr dst = (StgPtr)(dst0); \ + __asm__("st %2,%0\n\tst %R2,%1" : "=m" (((P_)(dst))[0]), \ + "=m" (((P_)(dst))[1]) : "f" (src)); \ + } + +#define PK_DBL(src0) \ + ( { StgPtr src = (StgPtr)(src0); \ + register double d; \ + __asm__("ld %1,%0\n\tld %2,%R0" : "=f" (d) : \ + "m" (((P_)(src))[0]), "m" (((P_)(src))[1])); d; \ + } ) + +#else /* ! sparc_HOST_ARCH */ + +INLINE_HEADER void ASSIGN_DBL (W_ [], StgDouble); +INLINE_HEADER StgDouble PK_DBL (W_ []); + +typedef struct + { StgWord dhi; + StgWord dlo; + } unpacked_double; + +typedef union + { StgDouble d; + unpacked_double du; + } double_thing; + +INLINE_HEADER void ASSIGN_DBL(W_ p_dest[], StgDouble src) +{ + double_thing y; + y.d = src; + p_dest[0] = y.du.dhi; + p_dest[1] = y.du.dlo; +} + +/* GCC also works with this version, but it generates + the same code as the previous one, and is not ANSI + +#define ASSIGN_DBL( p_dest, src ) \ + *p_dest = ((double_thing) src).du.dhi; \ + *(p_dest+1) = ((double_thing) src).du.dlo \ +*/ + +INLINE_HEADER StgDouble PK_DBL(W_ p_src[]) +{ + double_thing y; + y.du.dhi = p_src[0]; + y.du.dlo = p_src[1]; + return(y.d); +} + +#endif /* ! sparc_HOST_ARCH */ + +#endif /* ALIGNMENT_DOUBLE > ALIGNMENT_UNSIGNED_INT */ + + +/* ----------------------------------------------------------------------------- + Moving 64-bit quantities around + + ASSIGN_Word64 assign an StgWord64/StgInt64 to a memory location + PK_Word64 load an StgWord64/StgInt64 from a amemory location + + In both cases the memory location might not be 64-bit aligned. + -------------------------------------------------------------------------- */ + +#if SIZEOF_HSWORD == 4 + +typedef struct + { StgWord dhi; + StgWord dlo; + } unpacked_double_word; + +typedef union + { StgInt64 i; + unpacked_double_word iu; + } int64_thing; + +typedef union + { StgWord64 w; + unpacked_double_word wu; + } word64_thing; + +INLINE_HEADER void ASSIGN_Word64(W_ p_dest[], StgWord64 src) +{ + word64_thing y; + y.w = src; + p_dest[0] = y.wu.dhi; + p_dest[1] = y.wu.dlo; +} + +INLINE_HEADER StgWord64 PK_Word64(W_ p_src[]) +{ + word64_thing y; + y.wu.dhi = p_src[0]; + y.wu.dlo = p_src[1]; + return(y.w); +} + +INLINE_HEADER void ASSIGN_Int64(W_ p_dest[], StgInt64 src) +{ + int64_thing y; + y.i = src; + p_dest[0] = y.iu.dhi; + p_dest[1] = y.iu.dlo; +} + +INLINE_HEADER StgInt64 PK_Int64(W_ p_src[]) +{ + int64_thing y; + y.iu.dhi = p_src[0]; + y.iu.dlo = p_src[1]; + return(y.i); +} + +#elif SIZEOF_VOID_P == 8 + +INLINE_HEADER void ASSIGN_Word64(W_ p_dest[], StgWord64 src) +{ + p_dest[0] = src; +} + +INLINE_HEADER StgWord64 PK_Word64(W_ p_src[]) +{ + return p_src[0]; +} + +INLINE_HEADER void ASSIGN_Int64(W_ p_dest[], StgInt64 src) +{ + p_dest[0] = src; +} + +INLINE_HEADER StgInt64 PK_Int64(W_ p_src[]) +{ + return p_src[0]; +} + +#endif /* SIZEOF_HSWORD == 4 */ + +/* ----------------------------------------------------------------------------- + Integer multiply with overflow + -------------------------------------------------------------------------- */ + +/* Multiply with overflow checking. + * + * This is tricky - the usual sign rules for add/subtract don't apply. + * + * On 32-bit machines we use gcc's 'long long' types, finding + * overflow with some careful bit-twiddling. + * + * On 64-bit machines where gcc's 'long long' type is also 64-bits, + * we use a crude approximation, testing whether either operand is + * larger than 32-bits; if neither is, then we go ahead with the + * multiplication. + * + * Return non-zero if there is any possibility that the signed multiply + * of a and b might overflow. Return zero only if you are absolutely sure + * that it won't overflow. If in doubt, return non-zero. + */ + +#if SIZEOF_VOID_P == 4 + +#if defined(WORDS_BIGENDIAN) +#define RTS_CARRY_IDX__ 0 +#define RTS_REM_IDX__ 1 +#else +#define RTS_CARRY_IDX__ 1 +#define RTS_REM_IDX__ 0 +#endif + +typedef union { + StgInt64 l; + StgInt32 i[2]; +} long_long_u ; + +#define mulIntMayOflo(a,b) \ +({ \ + StgInt32 r, c; \ + long_long_u z; \ + z.l = (StgInt64)a * (StgInt64)b; \ + r = z.i[RTS_REM_IDX__]; \ + c = z.i[RTS_CARRY_IDX__]; \ + if (c == 0 || c == -1) { \ + c = ((StgWord)((a^b) ^ r)) \ + >> (BITS_IN (I_) - 1); \ + } \ + c; \ +}) + +/* Careful: the carry calculation above is extremely delicate. Make sure + * you test it thoroughly after changing it. + */ + +#else + +/* Approximate version when we don't have long arithmetic (on 64-bit archs) */ + +/* If we have n-bit words then we have n-1 bits after accounting for the + * sign bit, so we can fit the result of multiplying 2 (n-1)/2-bit numbers */ +#define HALF_POS_INT (((I_)1) << ((BITS_IN (I_) - 1) / 2)) +#define HALF_NEG_INT (-HALF_POS_INT) + +#define mulIntMayOflo(a,b) \ +({ \ + I_ c; \ + if ((I_)a <= HALF_NEG_INT || a >= HALF_POS_INT \ + || (I_)b <= HALF_NEG_INT || b >= HALF_POS_INT) {\ + c = 1; \ + } else { \ + c = 0; \ + } \ + c; \ +}) +#endif + diff --git a/rts/include/ghc.mk b/rts/include/ghc.mk new file mode 100644 index 0000000000..88cb12caff --- /dev/null +++ b/rts/include/ghc.mk @@ -0,0 +1,311 @@ +# ----------------------------------------------------------------------------- +# +# (c) 2009 The University of Glasgow +# +# This file is part of the GHC build system. +# +# To understand how the build system works and how to modify it, see +# https://gitlab.haskell.org/ghc/ghc/wikis/building/architecture +# https://gitlab.haskell.org/ghc/ghc/wikis/building/modifying +# +# ----------------------------------------------------------------------------- + +# +# Header files built from the configure script's findings +# +includes_0_H_CONFIG = rts/include/dist/build/ghcautoconf.h +includes_1_H_CONFIG = rts/include/dist-install/build/ghcautoconf.h +includes_2_H_CONFIG = $(includes_1_H_CONFIG) + +includes_0_H_PLATFORM = rts/include/dist/build/ghcplatform.h +includes_1_H_PLATFORM = rts/include/dist-install/build/ghcplatform.h +includes_2_H_PLATFORM = $(includes_1_H_PLATFORM) + +includes_0_H_VERSION = rts/include/dist/build/ghcversion.h +includes_1_H_VERSION = rts/include/dist-install/build/ghcversion.h +includes_2_H_VERSION = $(includes_1_H_VERSION) + +BUILD_0_INCLUDE_DIR = rts/include/dist/build +BUILD_1_INCLUDE_DIR = rts/include/dist-install/build +BUILD_2_INCLUDE_DIR = $(BUILD_1_INCLUDE_DIR) + +# +# All header files are in rts/include/{one of these subdirectories} +# +includes_H_SUBDIRS += . +includes_H_SUBDIRS += rts +includes_H_SUBDIRS += rts/prof +includes_H_SUBDIRS += rts/storage +includes_H_SUBDIRS += stg + +includes_H_FILES := $(wildcard $(patsubst %,rts/include/%/*.h,$(includes_H_SUBDIRS))) +# This isn't necessary, but it makes the paths look a little prettier +includes_H_FILES := $(subst /./,/,$(includes_H_FILES)) + +# +# Options +# + +includes_CC_OPTS += $(SRC_CC_OPTS) +includes_CC_OPTS += $(SRC_CC_WARNING_OPTS) +includes_CC_OPTS += $(CONF_CC_OPTS_STAGE1) + +ifeq "$(GhcUnregisterised)" "YES" +includes_CC_OPTS += -DUSE_MINIINTERPRETER +endif + +includes_CC_OPTS += $(addprefix -I,$(GHC_INCLUDE_DIRS)) +includes_CC_OPTS += -I$(BUILD_1_INCLUDE_DIR) +includes_CC_OPTS += -Irts + +ifneq "$(GhcWithSMP)" "YES" +includes_CC_OPTS += -DNOSMP +endif + +define includesHeaderVersion +# $1 = stage +$$(includes_$1_H_VERSION) : mk/project.mk | $$$$(dir $$$$@)/. + $$(call removeFiles,$$@) + @echo "Creating $$@..." + @echo "#if !defined(__GHCVERSION_H__)" > $$@ + @echo "#define __GHCVERSION_H__" >> $$@ + @echo >> $$@ + @echo "#define __GLASGOW_HASKELL__ $$(ProjectVersionInt)" >> $$@ + @echo "#define __GLASGOW_HASKELL_FULL_VERSION__ \"$$(ProjectVersion)\"" >> $$@ + @echo >> $$@ + @if [ -n "$$(ProjectPatchLevel1)" ]; then \ + echo "#define __GLASGOW_HASKELL_PATCHLEVEL1__ $$(ProjectPatchLevel1)" >> $$@; \ + fi + @if [ -n "$$(ProjectPatchLevel2)" ]; then \ + echo "#define __GLASGOW_HASKELL_PATCHLEVEL2__ $$(ProjectPatchLevel2)" >> $$@; \ + fi + @echo >> $$@ + @echo '#define MIN_VERSION_GLASGOW_HASKELL(ma,mi,pl1,pl2) (\' >> $$@ + @echo ' ((ma)*100+(mi)) < __GLASGOW_HASKELL__ || \' >> $$@ + @echo ' ((ma)*100+(mi)) == __GLASGOW_HASKELL__ \' >> $$@ + @echo ' && (pl1) < __GLASGOW_HASKELL_PATCHLEVEL1__ || \' >> $$@ + @echo ' ((ma)*100+(mi)) == __GLASGOW_HASKELL__ \' >> $$@ + @echo ' && (pl1) == __GLASGOW_HASKELL_PATCHLEVEL1__ \' >> $$@ + @echo ' && (pl2) <= __GLASGOW_HASKELL_PATCHLEVEL2__ )' >> $$@ + @echo >> $$@ + @echo "#endif /* __GHCVERSION_H__ */" >> $$@ + @echo "Done." + +endef + +$(eval $(call includesHeaderVersion,0)) +$(eval $(call includesHeaderVersion,1)) + +ifneq "$(BINDIST)" "YES" + +define includesHeaderConfig +# $1 = stage +$$(includes_$1_H_CONFIG) : mk/config.h mk/config.mk rts/include/ghc.mk | $$$$(dir $$$$@)/. + $$(call removeFiles,$$@) + @echo "Creating $$@..." + @echo "#if !defined(__GHCAUTOCONF_H__)" > $$@ + @echo "#define __GHCAUTOCONF_H__" >> $$@ +# +# Copy the contents of mk/config.h, turning '#define PACKAGE_FOO +# "blah"' into '/* #undef PACKAGE_FOO */' to avoid clashes. +# + @sed 's,^\([ ]*\)#[ ]*define[ ][ ]*\(PACKAGE_[A-Z]*\)[ ][ ]*".*".*$$$$,\1/* #undef \2 */,' mk/config.h >> $$@ +# + @echo "#endif /* __GHCAUTOCONF_H__ */" >> $$@ + @echo "Done." + +endef + +$(eval $(call includesHeaderConfig,0)) +$(eval $(call includesHeaderConfig,1)) + +BuildPlatform_0_CPP = $(BuildPlatform_CPP) +BuildPlatform_1_CPP = $(HostPlatform_CPP) +BuildPlatform_2_CPP = $(TargetPlatform_CPP) + +HostPlatform_0_CPP = $(HostPlatform_CPP) +HostPlatform_1_CPP = $(TargetPlatform_CPP) +HostPlatform_2_CPP = $(TargetPlatform_CPP) + +BuildArch_0_CPP = $(BuildArch_CPP) +BuildArch_1_CPP = $(HostArch_CPP) +BuildArch_2_CPP = $(TargetArch_CPP) + +HostArch_0_CPP = $(HostArch_CPP) +HostArch_1_CPP = $(TargetArch_CPP) +HostArch_2_CPP = $(TargetArch_CPP) + +BuildOS_0_CPP = $(BuildOS_CPP) +BuildOS_1_CPP = $(HostOS_CPP) +BuildOS_2_CPP = $(TargetOS_CPP) + +HostOS_0_CPP = $(HostOS_CPP) +HostOS_1_CPP = $(TargetOS_CPP) +HostOS_2_CPP = $(TargetOS_CPP) + +BuildVendor_0_CPP = $(BuildVendor_CPP) +BuildVendor_1_CPP = $(HostVendor_CPP) +BuildVendor_2_CPP = $(TargetVendor_CPP) + +HostVendor_0_CPP = $(HostVendor_CPP) +HostVendor_1_CPP = $(TargetVendor_CPP) +HostVendor_2_CPP = $(TargetVendor_CPP) + +define includesHeaderPlatform +# $1 = stage +$$(includes_$1_H_PLATFORM) : rts/include/ghc.mk rts/include/Makefile | $$$$(dir $$$$@)/. + $$(call removeFiles,$$@) + @echo "Creating $$@..." + @echo "#if !defined(__GHCPLATFORM_H__)" > $$@ + @echo "#define __GHCPLATFORM_H__" >> $$@ + @echo >> $$@ + @echo "#define GHC_STAGE ($1 + 1)" >> $$@ + @echo >> $$@ + @echo "#define BuildPlatform_TYPE $(BuildPlatform_$1_CPP)" >> $$@ + @echo "#define HostPlatform_TYPE $(HostPlatform_$1_CPP)" >> $$@ + @echo >> $$@ + @echo "#define $(BuildPlatform_$1_CPP)_BUILD 1" >> $$@ + @echo "#define $(HostPlatform_$1_CPP)_HOST 1" >> $$@ + @echo >> $$@ + @echo "#define $(BuildArch_$1_CPP)_BUILD_ARCH 1" >> $$@ + @echo "#define $(HostArch_$1_CPP)_HOST_ARCH 1" >> $$@ + @echo "#define BUILD_ARCH \"$(BuildArch_$1_CPP)\"" >> $$@ + @echo "#define HOST_ARCH \"$(HostArch_$1_CPP)\"" >> $$@ + @echo >> $$@ + @echo "#define $(BuildOS_$1_CPP)_BUILD_OS 1" >> $$@ + @echo "#define $(HostOS_$1_CPP)_HOST_OS 1" >> $$@ + @echo "#define BUILD_OS \"$(BuildOS_$1_CPP)\"" >> $$@ + @echo "#define HOST_OS \"$(HostOS_$1_CPP)\"" >> $$@ + @echo >> $$@ + @echo "#define $(BuildVendor_$1_CPP)_BUILD_VENDOR 1" >> $$@ + @echo "#define $(HostVendor_$1_CPP)_HOST_VENDOR 1" >> $$@ + @echo "#define BUILD_VENDOR \"$(BuildVendor_$1_CPP)\"" >> $$@ + @echo "#define HOST_VENDOR \"$(HostVendor_$1_CPP)\"" >> $$@ + @echo >> $$@ +ifeq "$$(SettingsUseDistroMINGW)" "YES" + @echo "#define USE_INPLACE_MINGW_TOOLCHAIN 1" >> $$@ +endif +ifeq "$$(GhcUnregisterised)" "YES" + @echo "#define UnregisterisedCompiler 1" >> $$@ +endif + @echo >> $$@ + @echo "#endif /* __GHCPLATFORM_H__ */" >> $$@ + @echo "Done." +endef + +endif + +$(eval $(call includesHeaderPlatform,0)) +$(eval $(call includesHeaderPlatform,1)) + +# ----------------------------------------------------------------------------- +# Settings + +# These settings are read by GHC at runtime, so as to not cause spurious +# rebuilds. +# See Note [tooldir: How GHC finds mingw on Windows] + +includes_SETTINGS = rts/include/dist/build/settings + +$(includes_SETTINGS) : rts/include/Makefile | $$(dir $$@)/. + $(call removeFiles,$@) + @echo '[("GCC extra via C opts", "$(GccExtraViaCOpts)")' >> $@ + @echo ',("C compiler command", "$(SettingsCCompilerCommand)")' >> $@ + @echo ',("C compiler flags", "$(SettingsCCompilerFlags)")' >> $@ + @echo ',("C++ compiler flags", "$(SettingsCxxCompilerFlags)")' >> $@ + @echo ',("C compiler link flags", "$(SettingsCCompilerLinkFlags)")' >> $@ + @echo ',("C compiler supports -no-pie", "$(SettingsCCompilerSupportsNoPie)")' >> $@ + @echo ',("Haskell CPP command", "$(SettingsHaskellCPPCommand)")' >> $@ + @echo ',("Haskell CPP flags", "$(SettingsHaskellCPPFlags)")' >> $@ + @echo ',("ld command", "$(SettingsLdCommand)")' >> $@ + @echo ',("ld flags", "$(SettingsLdFlags)")' >> $@ + @echo ',("ld supports compact unwind", "$(LdHasNoCompactUnwind)")' >> $@ + @echo ',("ld supports build-id", "$(LdHasBuildId)")' >> $@ + @echo ',("ld supports filelist", "$(LdHasFilelist)")' >> $@ + @echo ',("ld is GNU ld", "$(LdIsGNULd)")' >> $@ + @echo ',("Merge objects command", "$(SettingsMergeObjectsCommand)")' >> $@ + @echo ',("Merge objects flags", "$(SettingsMergeObjectsFlags)")' >> $@ + @echo ',("ar command", "$(SettingsArCommand)")' >> $@ + @echo ',("ar flags", "$(ArArgs)")' >> $@ + @echo ',("ar supports at file", "$(ArSupportsAtFile)")' >> $@ + @echo ',("ranlib command", "$(SettingsRanlibCommand)")' >> $@ + @echo ',("otool command", "$(SettingsOtoolCommand)")' >> $@ + @echo ',("install_name_tool command", "$(SettingsInstallNameToolCommand)")' >> $@ + @echo ',("touch command", "$(SettingsTouchCommand)")' >> $@ + @echo ',("dllwrap command", "$(SettingsDllWrapCommand)")' >> $@ + @echo ',("windres command", "$(SettingsWindresCommand)")' >> $@ + @echo ',("libtool command", "$(SettingsLibtoolCommand)")' >> $@ + @echo ',("unlit command", "$$topdir/bin/$(utils/unlit_dist_PROG)")' >> $@ + @echo ',("cross compiling", "$(CrossCompiling)")' >> $@ + @echo ',("target platform string", "$(TARGETPLATFORM)")' >> $@ + @echo ',("target os", "$(HaskellTargetOs)")' >> $@ + @echo ',("target arch", "$(HaskellTargetArch)")' >> $@ + @echo ',("target word size", "$(TargetWordSize)")' >> $@ + @echo ',("target word big endian", "$(TargetWordBigEndian)")' >> $@ + @echo ',("target has GNU nonexec stack", "$(TargetHasGnuNonexecStack)")' >> $@ + @echo ',("target has .ident directive", "$(TargetHasIdentDirective)")' >> $@ + @echo ',("target has subsections via symbols", "$(TargetHasSubsectionsViaSymbols)")' >> $@ + @echo ',("target has RTS linker", "$(TargetHasRTSLinker)")' >> $@ + @echo ',("Unregisterised", "$(GhcUnregisterised)")' >> $@ + @echo ',("LLVM target", "$(LLVMTarget_CPP)")' >> $@ + @echo ',("LLVM llc command", "$(SettingsLlcCommand)")' >> $@ + @echo ',("LLVM opt command", "$(SettingsOptCommand)")' >> $@ + @echo ',("LLVM clang command", "$(SettingsClangCommand)")' >> $@ + @echo + @echo ',("bignum backend", "$(BIGNUM_BACKEND)")' >> $@ + @echo ',("Use interpreter", "$(GhcWithInterpreter)")' >> $@ + @echo ',("Support SMP", "$(GhcWithSMP)")' >> $@ + @echo ',("RTS ways", "$(GhcRTSWays)")' >> $@ + @echo ',("Tables next to code", "$(TablesNextToCode)")' >> $@ + @echo ',("Leading underscore", "$(LeadingUnderscore)")' >> $@ + @echo ',("Use LibFFI", "$(UseLibffiForAdjustors)")' >> $@ + @echo ",(\"RTS expects libdw\", \"$(GhcRtsWithLibdw)\")" >> $@ + @echo "]" >> $@ + + +# --------------------------------------------------------------------------- +# Make DerivedConstants.h for the compiler + +includes_DERIVEDCONSTANTS = rts/include/dist-derivedconstants/header/DerivedConstants.h + +DERIVE_CONSTANTS_FLAGS_FOR_HEADER += --gcc-program "$(CC)" +DERIVE_CONSTANTS_FLAGS_FOR_HEADER += $(addprefix --gcc-flag$(space),$(includes_CC_OPTS) -fcommon) +DERIVE_CONSTANTS_FLAGS_FOR_HEADER += --nm-program "$(NM)" +ifneq "$(OBJDUMP)" "" +DERIVE_CONSTANTS_FLAGS_FOR_HEADER += --objdump-program "$(OBJDUMP)" +endif +DERIVE_CONSTANTS_FLAGS_FOR_HEADER += --target-os "$(TargetOS_CPP)" + +ifneq "$(BINDIST)" "YES" +$(includes_DERIVEDCONSTANTS): $$(includes_H_FILES) $$(rts_H_FILES) + +$(includes_DERIVEDCONSTANTS): $(deriveConstants_INPLACE) $(includes_1_H_CONFIG) $(includes_1_H_PLATFORM) | $$(dir $$@)/. + $< --gen-header -o $@ --tmpdir $(dir $@) $(DERIVE_CONSTANTS_FLAGS_FOR_HEADER) +endif + +# --------------------------------------------------------------------------- +# Install all header files + +$(eval $(call clean-target,includes,,\ + $(includes_0_H_CONFIG) $(includes_0_H_PLATFORM) $(includes_0_H_VERSION) \ + $(includes_1_H_CONFIG) $(includes_1_H_PLATFORM) $(includes_1_H_VERSION))) + +$(eval $(call all-target,includes,\ + $(includes_0_H_CONFIG) $(includes_0_H_PLATFORM) $(includes_0_H_VERSION) \ + $(includes_1_H_CONFIG) $(includes_1_H_PLATFORM) $(includes_1_H_VERSION) \ + $(includes_DERIVEDCONSTANTS))) + +install: install_includes + +.PHONY: install_includes +install_includes : $(includes_1_H_CONFIG) $(includes_1_H_PLATFORM) $(includes_1_H_VERSION) + $(INSTALL_DIR) "$(DESTDIR)$(ghcheaderdir)" + $(foreach d,$(includes_H_SUBDIRS), \ + $(INSTALL_DIR) "$(DESTDIR)$(ghcheaderdir)/$d" && \ + $(INSTALL_HEADER) $(INSTALL_OPTS) rts/include/$d/*.h "$(DESTDIR)$(ghcheaderdir)/$d/" && \ + ) true + $(INSTALL_HEADER) $(INSTALL_OPTS) \ + $(includes_1_H_CONFIG) $(includes_1_H_PLATFORM) $(includes_1_H_VERSION) \ + $(includes_DERIVEDCONSTANTS) \ + "$(DESTDIR)$(ghcheaderdir)/" + diff --git a/rts/include/ghcconfig.h b/rts/include/ghcconfig.h new file mode 100644 index 0000000000..7b99835a23 --- /dev/null +++ b/rts/include/ghcconfig.h @@ -0,0 +1,4 @@ +#pragma once + +#include "ghcautoconf.h" +#include "ghcplatform.h" diff --git a/rts/include/rts/Adjustor.h b/rts/include/rts/Adjustor.h new file mode 100644 index 0000000000..8965c7c8bb --- /dev/null +++ b/rts/include/rts/Adjustor.h @@ -0,0 +1,22 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * Adjustor API + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +/* Creating and destroying an adjustor thunk */ +void* createAdjustor (int cconv, + StgStablePtr hptr, + StgFunPtr wptr, + char *typeString); + +void freeHaskellFunctionPtr (void* ptr); diff --git a/rts/include/rts/BlockSignals.h b/rts/include/rts/BlockSignals.h new file mode 100644 index 0000000000..46b0b0f562 --- /dev/null +++ b/rts/include/rts/BlockSignals.h @@ -0,0 +1,34 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * RTS signal handling + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +/* Used by runProcess() in the process package + */ + +/* + * Function: blockUserSignals() + * + * Temporarily block the delivery of further console events. Needed to + * avoid race conditions when GCing the queue of outstanding handlers or + * when emptying the queue by running the handlers. + * + */ +void blockUserSignals(void); + +/* + * Function: unblockUserSignals() + * + * The inverse of blockUserSignals(); re-enable the deliver of console events. + */ +void unblockUserSignals(void); diff --git a/rts/include/rts/Bytecodes.h b/rts/include/rts/Bytecodes.h new file mode 100644 index 0000000000..b97d4d4f60 --- /dev/null +++ b/rts/include/rts/Bytecodes.h @@ -0,0 +1,109 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * Bytecode definitions. + * + * ---------------------------------------------------------------------------*/ + +/* -------------------------------------------------------------------------- + * Instructions + * + * Notes: + * o CASEFAIL is generated by the compiler whenever it tests an "irrefutable" + * pattern which fails. If we don't see too many of these, we could + * optimise out the redundant test. + * ------------------------------------------------------------------------*/ + +/* NOTE: + + THIS FILE IS INCLUDED IN HASKELL SOURCES (ghc/compiler/GHC/ByteCode/Asm.hs). + DO NOT PUT C-SPECIFIC STUFF IN HERE! + + I hope that's clear :-) +*/ + +#define bci_STKCHECK 1 +#define bci_PUSH_L 2 +#define bci_PUSH_LL 3 +#define bci_PUSH_LLL 4 +#define bci_PUSH8 5 +#define bci_PUSH16 6 +#define bci_PUSH32 7 +#define bci_PUSH8_W 8 +#define bci_PUSH16_W 9 +#define bci_PUSH32_W 10 +#define bci_PUSH_G 11 +#define bci_PUSH_ALTS 12 +#define bci_PUSH_ALTS_P 13 +#define bci_PUSH_ALTS_N 14 +#define bci_PUSH_ALTS_F 15 +#define bci_PUSH_ALTS_D 16 +#define bci_PUSH_ALTS_L 17 +#define bci_PUSH_ALTS_V 18 +#define bci_PUSH_PAD8 19 +#define bci_PUSH_PAD16 20 +#define bci_PUSH_PAD32 21 +#define bci_PUSH_UBX8 22 +#define bci_PUSH_UBX16 23 +#define bci_PUSH_UBX32 24 +#define bci_PUSH_UBX 25 +#define bci_PUSH_APPLY_N 26 +#define bci_PUSH_APPLY_F 27 +#define bci_PUSH_APPLY_D 28 +#define bci_PUSH_APPLY_L 29 +#define bci_PUSH_APPLY_V 30 +#define bci_PUSH_APPLY_P 31 +#define bci_PUSH_APPLY_PP 32 +#define bci_PUSH_APPLY_PPP 33 +#define bci_PUSH_APPLY_PPPP 34 +#define bci_PUSH_APPLY_PPPPP 35 +#define bci_PUSH_APPLY_PPPPPP 36 +/* #define bci_PUSH_APPLY_PPPPPPP 37 */ +#define bci_SLIDE 38 +#define bci_ALLOC_AP 39 +#define bci_ALLOC_AP_NOUPD 40 +#define bci_ALLOC_PAP 41 +#define bci_MKAP 42 +#define bci_MKPAP 43 +#define bci_UNPACK 44 +#define bci_PACK 45 +#define bci_TESTLT_I 46 +#define bci_TESTEQ_I 47 +#define bci_TESTLT_F 48 +#define bci_TESTEQ_F 49 +#define bci_TESTLT_D 50 +#define bci_TESTEQ_D 51 +#define bci_TESTLT_P 52 +#define bci_TESTEQ_P 53 +#define bci_CASEFAIL 54 +#define bci_JMP 55 +#define bci_CCALL 56 +#define bci_SWIZZLE 57 +#define bci_ENTER 58 +#define bci_RETURN 59 +#define bci_RETURN_P 60 +#define bci_RETURN_N 61 +#define bci_RETURN_F 62 +#define bci_RETURN_D 63 +#define bci_RETURN_L 64 +#define bci_RETURN_V 65 +#define bci_BRK_FUN 66 +#define bci_TESTLT_W 67 +#define bci_TESTEQ_W 68 + +#define bci_RETURN_T 69 +#define bci_PUSH_ALTS_T 70 +/* If you need to go past 255 then you will run into the flags */ + +/* If you need to go below 0x0100 then you will run into the instructions */ +#define bci_FLAG_LARGE_ARGS 0x8000 + +/* If a BCO definitely requires less than this many words of stack, + don't include an explicit STKCHECK insn in it. The interpreter + will check for this many words of stack before running each BCO, + rendering an explicit check unnecessary in the majority of + cases. */ +#define INTERP_STACK_CHECK_THRESH 50 + +/*-------------------------------------------------------------------------*/ diff --git a/rts/include/rts/Config.h b/rts/include/rts/Config.h new file mode 100644 index 0000000000..289950af45 --- /dev/null +++ b/rts/include/rts/Config.h @@ -0,0 +1,52 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * Rts settings. + * + * NOTE: assumes #include "ghcconfig.h" + * + * NB: THIS FILE IS INCLUDED IN NON-C CODE AND DATA! #defines only please. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#if defined(TICKY_TICKY) && defined(THREADED_RTS) +#error TICKY_TICKY is incompatible with THREADED_RTS +#endif + +/* + * Whether the runtime system will use libbfd for debugging purposes. + */ +#if defined(DEBUG) && defined(HAVE_BFD_H) && defined(HAVE_LIBBFD) && !defined(_WIN32) +#define USING_LIBBFD 1 +#endif + +/* DEBUG and PROFILING both imply TRACING */ +#if defined(DEBUG) || defined(PROFILING) +#if !defined(TRACING) +#define TRACING +#endif +#endif + +/* DEBUG implies TICKY_TICKY */ +#if defined(DEBUG) +#if !defined(TICKY_TICKY) +#define TICKY_TICKY +#endif +#endif + + +/* ----------------------------------------------------------------------------- + Signals - supported on non-PAR versions of the runtime. See RtsSignals.h. + -------------------------------------------------------------------------- */ + +#define RTS_USER_SIGNALS 1 + +/* Profile spin locks */ + +#define PROF_SPIN diff --git a/rts/include/rts/Constants.h b/rts/include/rts/Constants.h new file mode 100644 index 0000000000..9cbe47752e --- /dev/null +++ b/rts/include/rts/Constants.h @@ -0,0 +1,340 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * Constants + * + * NOTE: this information is used by both the compiler and the RTS. + * Some of it is tweakable, and some of it must be kept up to date + * with various other parts of the system. + * + * Constants which are derived automatically from other definitions in + * the system (eg. structure sizes) are generated into the file + * DerivedConstants.h by a C program (utils/deriveConstants). + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +/* ----------------------------------------------------------------------------- + Minimum closure sizes + + This is the minimum number of words in the payload of a heap-allocated + closure, so that the closure has two bits in the bitmap for mark-compact + collection. + + See Note [Mark bits in mark-compact collector] in rts/sm/Compact.h + -------------------------------------------------------------------------- */ + +#define MIN_PAYLOAD_SIZE 1 + +/* ----------------------------------------------------------------------------- + Constants to do with specialised closure types. + -------------------------------------------------------------------------- */ + +/* We have some pre-compiled selector thunks defined in rts/StgStdThunks.hc. + * This constant defines the highest selectee index that we can replace with a + * reference to the pre-compiled code. + */ + +#define MAX_SPEC_SELECTEE_SIZE 15 + +/* Vector-apply thunks. These thunks just push their free variables + * on the stack and enter the first one. They're a bit like PAPs, but + * don't have a dynamic size. We've pre-compiled a few to save + * space. + */ + +#define MAX_SPEC_AP_SIZE 7 + +/* Specialised FUN/THUNK/CONSTR closure types */ + +#define MAX_SPEC_THUNK_SIZE 2 +#define MAX_SPEC_FUN_SIZE 2 +#define MAX_SPEC_CONSTR_SIZE 2 + +/* Range of built-in table of static small int-like and char-like closures. + * + * NB. This corresponds with the number of actual INTLIKE/CHARLIKE + * closures defined in rts/StgMiscClosures.cmm. + */ +#define MAX_INTLIKE 255 +#define MIN_INTLIKE (-16) + +#define MAX_CHARLIKE 255 +#define MIN_CHARLIKE 0 + +/* Each byte in the card table for an StgMutaArrPtrs covers + * (1<<MUT_ARR_PTRS_CARD_BITS) elements in the array. To find a good + * value for this, I used the benchmarks nofib/gc/hash, + * nofib/gc/graph, and nofib/gc/gc_bench. + */ +#define MUT_ARR_PTRS_CARD_BITS 7 + +/* ----------------------------------------------------------------------------- + STG Registers. + + Note that in MachRegs.h we define how many of these registers are + *real* machine registers, and not just offsets in the Register Table. + -------------------------------------------------------------------------- */ + +#define MAX_VANILLA_REG 10 +#define MAX_FLOAT_REG 6 +#define MAX_DOUBLE_REG 6 +#define MAX_LONG_REG 1 +#define MAX_XMM_REG 6 + +/* ----------------------------------------------------------------------------- + Semi-Tagging constants + + Old Comments about this stuff: + + Tags for indirection nodes and ``other'' (probably unevaluated) nodes; + normal-form values of algebraic data types will have tags 0, 1, ... + + @INFO_IND_TAG@ is different from @INFO_OTHER_TAG@ just so we can count + how often we bang into indirection nodes; that's all. (WDP 95/11) + + ToDo: find out if we need any of this. + -------------------------------------------------------------------------- */ + +#define INFO_OTHER_TAG (-1) +#define INFO_IND_TAG (-2) +#define INFO_FIRST_TAG 0 + +/* ----------------------------------------------------------------------------- + How much C stack to reserve for local temporaries when in the STG + world. Used in StgCRun.c. + -------------------------------------------------------------------------- */ + +#define RESERVED_C_STACK_BYTES (2048 * SIZEOF_LONG) + +/* ----------------------------------------------------------------------------- + How large is the stack frame saved by StgRun? + world. Used in StgCRun.c. + + The size has to be enough to save the registers (see StgCRun) + plus padding if the result is not 16 byte aligned. + See the Note [Stack Alignment on X86] in StgCRun.c for details. + + -------------------------------------------------------------------------- */ +#if defined(x86_64_HOST_ARCH) +# if defined(mingw32_HOST_OS) +# define STG_RUN_STACK_FRAME_SIZE 144 +# else +# define STG_RUN_STACK_FRAME_SIZE 48 +# endif +#endif + +/* ----------------------------------------------------------------------------- + StgRun related labels shared between StgCRun.c and StgStartup.cmm. + -------------------------------------------------------------------------- */ + +#if defined(LEADING_UNDERSCORE) +#define STG_RUN "_StgRun" +#define STG_RUN_JMP _StgRunJmp +#define STG_RETURN "_StgReturn" +#else +#define STG_RUN "StgRun" +#define STG_RUN_JMP StgRunJmp +#define STG_RETURN "StgReturn" +#endif + +/* ----------------------------------------------------------------------------- + How much Haskell stack space to reserve for the saving of registers + etc. in the case of a stack/heap overflow. + + This must be large enough to accommodate the largest stack frame + pushed in one of the heap check fragments in HeapStackCheck.hc + (ie. currently the generic heap checks - 3 words for StgRetDyn, + 18 words for the saved registers, see StgMacros.h). + -------------------------------------------------------------------------- */ + +#define RESERVED_STACK_WORDS 21 + +/* ----------------------------------------------------------------------------- + The limit on the size of the stack check performed when we enter an + AP_STACK, in words. See raiseAsync() and bug #1466. + -------------------------------------------------------------------------- */ + +#define AP_STACK_SPLIM 1024 + +/* ----------------------------------------------------------------------------- + Storage manager constants + -------------------------------------------------------------------------- */ + +/* The size of a block (2^BLOCK_SHIFT bytes) */ +#define BLOCK_SHIFT 12 + +/* The size of a megablock (2^MBLOCK_SHIFT bytes) */ +#define MBLOCK_SHIFT 20 + +/* ----------------------------------------------------------------------------- + Bitmap/size fields (used in info tables) + -------------------------------------------------------------------------- */ + +/* In a 32-bit bitmap field, we use 5 bits for the size, and 27 bits + * for the bitmap. If the bitmap requires more than 27 bits, then we + * store it in a separate array, and leave a pointer in the bitmap + * field. On a 64-bit machine, the sizes are extended accordingly. + */ +#if SIZEOF_VOID_P == 4 +#define BITMAP_SIZE_MASK 0x1f +#define BITMAP_BITS_SHIFT 5 +#elif SIZEOF_VOID_P == 8 +#define BITMAP_SIZE_MASK 0x3f +#define BITMAP_BITS_SHIFT 6 +#else +#error unknown SIZEOF_VOID_P +#endif + +/* ----------------------------------------------------------------------------- + Lag/Drag/Void constants + -------------------------------------------------------------------------- */ + +/* + An LDV word is divided into 3 parts: state bits (LDV_STATE_MASK), creation + time bits (LDV_CREATE_MASK), and last use time bits (LDV_LAST_MASK). + */ +#if SIZEOF_VOID_P == 8 +#define LDV_SHIFT 30 +#define LDV_STATE_MASK 0x1000000000000000 +#define LDV_CREATE_MASK 0x0FFFFFFFC0000000 +#define LDV_LAST_MASK 0x000000003FFFFFFF +#define LDV_STATE_CREATE 0x0000000000000000 +#define LDV_STATE_USE 0x1000000000000000 +#else +#define LDV_SHIFT 15 +#define LDV_STATE_MASK 0x40000000 +#define LDV_CREATE_MASK 0x3FFF8000 +#define LDV_LAST_MASK 0x00007FFF +#define LDV_STATE_CREATE 0x00000000 +#define LDV_STATE_USE 0x40000000 +#endif /* SIZEOF_VOID_P */ + +/* ----------------------------------------------------------------------------- + TSO related constants + -------------------------------------------------------------------------- */ + +/* + * Constants for the what_next field of a TSO, which indicates how it + * is to be run. + */ +#define ThreadRunGHC 1 /* return to address on top of stack */ +#define ThreadInterpret 2 /* interpret this thread */ +#define ThreadKilled 3 /* thread has died, don't run it */ +#define ThreadComplete 4 /* thread has finished */ + +/* + * Constants for the why_blocked field of a TSO + * NB. keep these in sync with GHC/Conc/Sync.hs: threadStatus + */ +#define NotBlocked 0 +#define BlockedOnMVar 1 +#define BlockedOnMVarRead 14 /* TODO: renumber me, see #9003 */ +#define BlockedOnBlackHole 2 +#define BlockedOnRead 3 +#define BlockedOnWrite 4 +#define BlockedOnDelay 5 +#define BlockedOnSTM 6 + +/* Win32 only: */ +#define BlockedOnDoProc 7 + +/* Only relevant for THREADED_RTS: */ +#define BlockedOnCCall 10 +#define BlockedOnCCall_Interruptible 11 + /* same as above but permit killing the worker thread */ + +/* Involved in a message sent to tso->msg_cap */ +#define BlockedOnMsgThrowTo 12 + +/* The thread is not on any run queues, but can be woken up + by tryWakeupThread() */ +#define ThreadMigrating 13 + +/* Lightweight non-deadlock checked version of MVar. Used for the why_blocked + field of a TSO. Threads blocked for this reason are not forcibly release by + the GC, as we expect them to be unblocked in the future based on outstanding + IO events. */ +#define BlockedOnIOCompletion 15 + +/* Next number is 16. */ + +/* + * These constants are returned to the scheduler by a thread that has + * stopped for one reason or another. See typedef StgThreadReturnCode + * in TSO.h. + */ +#define HeapOverflow 1 /* might also be StackOverflow */ +#define StackOverflow 2 +#define ThreadYielding 3 +#define ThreadBlocked 4 +#define ThreadFinished 5 + +/* + * Flags for the tso->flags field. + */ + +/* + * TSO_LOCKED is set when a TSO is locked to a particular Capability. + */ +#define TSO_LOCKED 2 + +/* + * TSO_BLOCKEX: the TSO is blocking exceptions + * + * TSO_INTERRUPTIBLE: the TSO can be interrupted if it blocks + * interruptibly (eg. with BlockedOnMVar). + * + * TSO_STOPPED_ON_BREAKPOINT: the thread is currently stopped in a breakpoint + */ +#define TSO_BLOCKEX 4 +#define TSO_INTERRUPTIBLE 8 +#define TSO_STOPPED_ON_BREAKPOINT 16 + +/* + * Used by the sanity checker to check whether TSOs are on the correct + * mutable list. + */ +#define TSO_MARKED 64 + +/* + * Used to communicate between stackSqueeze() and + * threadStackOverflow() that a thread's stack was squeezed and the + * stack may not need to be expanded. + */ +#define TSO_SQUEEZED 128 + +/* + * Enables the AllocationLimitExceeded exception when the thread's + * allocation limit goes negative. + */ +#define TSO_ALLOC_LIMIT 256 + +/* + * The number of times we spin in a spin lock before yielding (see + * #3758). To tune this value, use the benchmark in #3758: run the + * server with -N2 and the client both on a dual-core. Also make sure + * that the chosen value doesn't slow down any of the parallel + * benchmarks in nofib/parallel. + */ +#define SPIN_COUNT 1000 + +/* ----------------------------------------------------------------------------- + Spare workers per Capability in the threaded RTS + + No more than MAX_SPARE_WORKERS will be kept in the thread pool + associated with each Capability. + -------------------------------------------------------------------------- */ + +#define MAX_SPARE_WORKERS 6 + +/* + * The maximum number of NUMA nodes we support. This is a fixed limit so that + * we can have static arrays of this size in the RTS for speed. + */ +#define MAX_NUMA_NODES 16 diff --git a/rts/include/rts/EventLogFormat.h b/rts/include/rts/EventLogFormat.h new file mode 100644 index 0000000000..4a2b339a9e --- /dev/null +++ b/rts/include/rts/EventLogFormat.h @@ -0,0 +1,246 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2008-2009 + * + * Event log format + * + * The log format is designed to be extensible: old tools should be + * able to parse (but not necessarily understand all of) new versions + * of the format, and new tools will be able to understand old log + * files. + * + * The canonical documentation for the event log format and record layouts is + * the "Eventlog encodings" section of the GHC User's Guide. + * + * To add a new event + * ------------------ + * + * - In this file: + * - give it a new number, add a new #define EVENT_XXX + * below. Do not reuse event ids from deprecated event types. + * + * - In EventLog.c + * - add it to the EventDesc array + * - emit the event type in initEventLogging() + * - emit the new event in postEvent_() + * - generate the event itself by calling postEvent() somewhere + * + * - Describe the meaning and encoding of the event in the users guide + * (docs/user_guide/eventlog-formats.rst) + * + * - In the Haskell code to parse the event log file: + * - add types and code to read the new event + * + * -------------------------------------------------------------------------- */ + +#pragma once + +/* + * Markers for begin/end of the Header. + */ +#define EVENT_HEADER_BEGIN 0x68647262 /* 'h' 'd' 'r' 'b' */ +#define EVENT_HEADER_END 0x68647265 /* 'h' 'd' 'r' 'e' */ + +#define EVENT_DATA_BEGIN 0x64617462 /* 'd' 'a' 't' 'b' */ +#define EVENT_DATA_END 0xffff + +/* + * Markers for begin/end of the list of Event Types in the Header. + * Header, Event Type, Begin = hetb + * Header, Event Type, End = hete + */ +#define EVENT_HET_BEGIN 0x68657462 /* 'h' 'e' 't' 'b' */ +#define EVENT_HET_END 0x68657465 /* 'h' 'e' 't' 'e' */ + +#define EVENT_ET_BEGIN 0x65746200 /* 'e' 't' 'b' 0 */ +#define EVENT_ET_END 0x65746500 /* 'e' 't' 'e' 0 */ + +/* + * Types of event + */ +#define EVENT_CREATE_THREAD 0 /* (thread) */ +#define EVENT_RUN_THREAD 1 /* (thread) */ +#define EVENT_STOP_THREAD 2 /* (thread, status, blockinfo) */ +#define EVENT_THREAD_RUNNABLE 3 /* (thread) */ +#define EVENT_MIGRATE_THREAD 4 /* (thread, new_cap) */ +/* 5, 6, 7 deprecated */ +#define EVENT_THREAD_WAKEUP 8 /* (thread, other_cap) */ +#define EVENT_GC_START 9 /* () */ +#define EVENT_GC_END 10 /* () */ +#define EVENT_REQUEST_SEQ_GC 11 /* () */ +#define EVENT_REQUEST_PAR_GC 12 /* () */ +/* 13, 14 deprecated */ +#define EVENT_CREATE_SPARK_THREAD 15 /* (spark_thread) */ +#define EVENT_LOG_MSG 16 /* (message ...) */ +/* 17 deprecated */ +#define EVENT_BLOCK_MARKER 18 /* (size, end_time, capability) */ +#define EVENT_USER_MSG 19 /* (message ...) */ +#define EVENT_GC_IDLE 20 /* () */ +#define EVENT_GC_WORK 21 /* () */ +#define EVENT_GC_DONE 22 /* () */ +/* 23, 24 used by eden */ +#define EVENT_CAPSET_CREATE 25 /* (capset, capset_type) */ +#define EVENT_CAPSET_DELETE 26 /* (capset) */ +#define EVENT_CAPSET_ASSIGN_CAP 27 /* (capset, cap) */ +#define EVENT_CAPSET_REMOVE_CAP 28 /* (capset, cap) */ +/* the RTS identifier is in the form of "GHC-version rts_way" */ +#define EVENT_RTS_IDENTIFIER 29 /* (capset, name_version_string) */ +/* the vectors in these events are null separated strings */ +#define EVENT_PROGRAM_ARGS 30 /* (capset, commandline_vector) */ +#define EVENT_PROGRAM_ENV 31 /* (capset, environment_vector) */ +#define EVENT_OSPROCESS_PID 32 /* (capset, pid) */ +#define EVENT_OSPROCESS_PPID 33 /* (capset, parent_pid) */ +#define EVENT_SPARK_COUNTERS 34 /* (crt,dud,ovf,cnv,gcd,fiz,rem) */ +#define EVENT_SPARK_CREATE 35 /* () */ +#define EVENT_SPARK_DUD 36 /* () */ +#define EVENT_SPARK_OVERFLOW 37 /* () */ +#define EVENT_SPARK_RUN 38 /* () */ +#define EVENT_SPARK_STEAL 39 /* (victim_cap) */ +#define EVENT_SPARK_FIZZLE 40 /* () */ +#define EVENT_SPARK_GC 41 /* () */ +#define EVENT_INTERN_STRING 42 /* (string, id) {not used by ghc} */ +#define EVENT_WALL_CLOCK_TIME 43 /* (capset, unix_epoch_seconds, nanoseconds) */ +#define EVENT_THREAD_LABEL 44 /* (thread, name_string) */ +#define EVENT_CAP_CREATE 45 /* (cap) */ +#define EVENT_CAP_DELETE 46 /* (cap) */ +#define EVENT_CAP_DISABLE 47 /* (cap) */ +#define EVENT_CAP_ENABLE 48 /* (cap) */ +#define EVENT_HEAP_ALLOCATED 49 /* (heap_capset, alloc_bytes) */ +#define EVENT_HEAP_SIZE 50 /* (heap_capset, size_bytes) */ +#define EVENT_HEAP_LIVE 51 /* (heap_capset, live_bytes) */ +#define EVENT_HEAP_INFO_GHC 52 /* (heap_capset, n_generations, + max_heap_size, alloc_area_size, + mblock_size, block_size) */ +#define EVENT_GC_STATS_GHC 53 /* (heap_capset, generation, + copied_bytes, slop_bytes, frag_bytes, + par_n_threads, + par_max_copied, + par_tot_copied, par_balanced_copied) */ +#define EVENT_GC_GLOBAL_SYNC 54 /* () */ +#define EVENT_TASK_CREATE 55 /* (taskID, cap, tid) */ +#define EVENT_TASK_MIGRATE 56 /* (taskID, cap, new_cap) */ +#define EVENT_TASK_DELETE 57 /* (taskID) */ +#define EVENT_USER_MARKER 58 /* (marker_name) */ +#define EVENT_HACK_BUG_T9003 59 /* Hack: see trac #9003 */ + +/* Range 60 - 80 is used by eden for parallel tracing + * see http://www.mathematik.uni-marburg.de/~eden/ + */ + +#define EVENT_MEM_RETURN 90 /* (cap, current_mblocks, needed_mblocks, returned_mblocks) */ +#define EVENT_BLOCKS_SIZE 91 /* (heapcapset, size_bytes) */ + +/* Range 100 - 139 is reserved for Mercury. */ + +/* Range 140 - 159 is reserved for Perf events. */ + +/* Range 160 - 180 is reserved for cost-centre heap profiling events. */ + +#define EVENT_HEAP_PROF_BEGIN 160 +#define EVENT_HEAP_PROF_COST_CENTRE 161 +#define EVENT_HEAP_PROF_SAMPLE_BEGIN 162 +#define EVENT_HEAP_PROF_SAMPLE_COST_CENTRE 163 +#define EVENT_HEAP_PROF_SAMPLE_STRING 164 +#define EVENT_HEAP_PROF_SAMPLE_END 165 +#define EVENT_HEAP_BIO_PROF_SAMPLE_BEGIN 166 +#define EVENT_PROF_SAMPLE_COST_CENTRE 167 +#define EVENT_PROF_BEGIN 168 +#define EVENT_IPE 169 + +#define EVENT_USER_BINARY_MSG 181 + +#define EVENT_CONC_MARK_BEGIN 200 +#define EVENT_CONC_MARK_END 201 +#define EVENT_CONC_SYNC_BEGIN 202 +#define EVENT_CONC_SYNC_END 203 +#define EVENT_CONC_SWEEP_BEGIN 204 +#define EVENT_CONC_SWEEP_END 205 +#define EVENT_CONC_UPD_REM_SET_FLUSH 206 +#define EVENT_NONMOVING_HEAP_CENSUS 207 + +#define EVENT_TICKY_COUNTER_DEF 210 +#define EVENT_TICKY_COUNTER_SAMPLE 211 +#define EVENT_TICKY_COUNTER_BEGIN_SAMPLE 212 + +/* + * The highest event code +1 that ghc itself emits. Note that some event + * ranges higher than this are reserved but not currently emitted by ghc. + * This must match the size of the EventDesc[] array in EventLog.c + */ +#define NUM_GHC_EVENT_TAGS 213 + +#if 0 /* DEPRECATED EVENTS: */ +/* we don't actually need to record the thread, it's implicit */ +#define EVENT_RUN_SPARK 5 /* (thread) */ +#define EVENT_STEAL_SPARK 6 /* (thread, victim_cap) */ +/* shutdown replaced by EVENT_CAP_DELETE */ +#define EVENT_SHUTDOWN 7 /* () */ +/* ghc changed how it handles sparks so these are no longer applicable */ +#define EVENT_CREATE_SPARK 13 /* (cap, thread) */ +#define EVENT_SPARK_TO_THREAD 14 /* (cap, thread, spark_thread) */ +#define EVENT_STARTUP 17 /* (num_capabilities) */ +/* these are used by eden but are replaced by new alternatives for ghc */ +#define EVENT_VERSION 23 /* (version_string) */ +#define EVENT_PROGRAM_INVOCATION 24 /* (commandline_string) */ +#endif + +/* + * Status values for EVENT_STOP_THREAD + * + * 1-5 are the StgRun return values (from rts/include/Constants.h): + * + * #define HeapOverflow 1 + * #define StackOverflow 2 + * #define ThreadYielding 3 + * #define ThreadBlocked 4 + * #define ThreadFinished 5 + * #define ForeignCall 6 + * #define BlockedOnMVar 7 + * #define BlockedOnBlackHole 8 + * #define BlockedOnRead 9 + * #define BlockedOnWrite 10 + * #define BlockedOnDelay 11 + * #define BlockedOnSTM 12 + * #define BlockedOnDoProc 13 + * #define BlockedOnCCall -- not used (see ForeignCall) + * #define BlockedOnCCall_NoUnblockExc -- not used (see ForeignCall) + * #define BlockedOnMsgThrowTo 16 + */ +#define THREAD_SUSPENDED_FOREIGN_CALL 6 + +/* + * Capset type values for EVENT_CAPSET_CREATE + */ +#define CAPSET_TYPE_CUSTOM 1 /* reserved for end-user applications */ +#define CAPSET_TYPE_OSPROCESS 2 /* caps belong to the same OS process */ +#define CAPSET_TYPE_CLOCKDOMAIN 3 /* caps share a local clock/time */ + +/* + * Heap profile breakdown types. See EVENT_HEAP_PROF_BEGIN. + */ +typedef enum { + HEAP_PROF_BREAKDOWN_COST_CENTRE = 0x1, + HEAP_PROF_BREAKDOWN_MODULE, + HEAP_PROF_BREAKDOWN_CLOSURE_DESCR, + HEAP_PROF_BREAKDOWN_TYPE_DESCR, + HEAP_PROF_BREAKDOWN_RETAINER, + HEAP_PROF_BREAKDOWN_BIOGRAPHY, + HEAP_PROF_BREAKDOWN_CLOSURE_TYPE, + HEAP_PROF_BREAKDOWN_INFO_TABLE +} HeapProfBreakdown; + +#if !defined(EVENTLOG_CONSTANTS_ONLY) + +typedef StgWord16 EventTypeNum; +typedef StgWord64 EventTimestamp; /* in nanoseconds */ +typedef StgWord32 EventThreadID; +typedef StgWord16 EventCapNo; +typedef StgWord16 EventPayloadSize; /* variable-size events */ +typedef StgWord16 EventThreadStatus; /* status for EVENT_STOP_THREAD */ +typedef StgWord32 EventCapsetID; +typedef StgWord16 EventCapsetType; /* types for EVENT_CAPSET_CREATE */ +typedef StgWord64 EventTaskId; /* for EVENT_TASK_* */ +typedef StgWord64 EventKernelThreadId; /* for EVENT_TASK_CREATE */ + +#define EVENT_PAYLOAD_SIZE_MAX STG_WORD16_MAX +#endif diff --git a/rts/include/rts/EventLogWriter.h b/rts/include/rts/EventLogWriter.h new file mode 100644 index 0000000000..73a2aec64c --- /dev/null +++ b/rts/include/rts/EventLogWriter.h @@ -0,0 +1,75 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2008-2017 + * + * Support for fast binary event logging. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#include <stddef.h> +#include <stdbool.h> + +/* + * Abstraction for writing eventlog data. + */ +typedef struct { + // Initialize an EventLogWriter (may be NULL) + void (* initEventLogWriter) (void); + + // Write a series of events returning true on success. + // Note that this may be called by multiple threads simultaneously. + // The writer is responsible for concurrency control. + bool (* writeEventLog) (void *eventlog, size_t eventlog_size); + + // Flush possibly existing buffers (may be NULL) + // Note that this may be called by multiple threads simultaneously. + // The writer is responsible for concurrency control. + void (* flushEventLog) (void); + + // Close an initialized EventLogOutput (may be NULL) + void (* stopEventLogWriter) (void); +} EventLogWriter; + +/* + * An EventLogWriter which writes eventlogs to + * a file `program.eventlog`. + */ +extern const EventLogWriter FileEventLogWriter; + +enum EventLogStatus { + /* The runtime system wasn't compiled with eventlog support. */ + EVENTLOG_NOT_SUPPORTED, + /* An EventLogWriter has not yet been configured */ + EVENTLOG_NOT_CONFIGURED, + /* An EventLogWriter has been configured and is running. */ + EVENTLOG_RUNNING, +}; + +/* + * Query whether the current runtime system supports eventlogging. + */ +enum EventLogStatus eventLogStatus(void); + +/* + * Initialize event logging using the given EventLogWriter. + * Returns true on success or false if an EventLogWriter is already configured + * or eventlogging isn't supported by the runtime. + */ +bool startEventLogging(const EventLogWriter *writer); + +/* + * Stop event logging and destroy the current EventLogWriter. + */ +void endEventLogging(void); + +/* + * Flush the eventlog. cap can be NULL if one is not held. + */ +void flushEventLog(Capability **cap); diff --git a/rts/include/rts/ExecPage.h b/rts/include/rts/ExecPage.h new file mode 100644 index 0000000000..4261b71259 --- /dev/null +++ b/rts/include/rts/ExecPage.h @@ -0,0 +1,18 @@ +/* + * Utilities for managing dynamically-allocated executable pages. + */ + +#pragma once + +typedef struct { + char contents; +} ExecPage; + +/* Allocate a writable page. */ +ExecPage *allocateExecPage(void); + +/* Make a page previously allocated by allocateExecPage. */ +void freezeExecPage(ExecPage *page); + +/* Free a page previously allocated by allocateExecPage. */ +void freeExecPage(ExecPage *page); diff --git a/rts/include/rts/FileLock.h b/rts/include/rts/FileLock.h new file mode 100644 index 0000000000..3d8056d7a0 --- /dev/null +++ b/rts/include/rts/FileLock.h @@ -0,0 +1,37 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2007-2009 + * + * File locking support as required by Haskell + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +/* Note [RTS File locking] + * ~~~~~~~~~~~~~~~~~~~~~~~ + * + * The Haskell report dictates certain file locking behaviour. + * This is specified in the Haskell98 report under: 21.2.3 File locking + * + * GHC does not rely on the platform it's on to implement this. + * Instead we keep track of locked files in a data structure in + * the RTS. This file provides the interface to this data structure. + * + * In the base libraries we then use this interface to "lock" files. + * This means it's very much still possible for users outside of the + * rts/base library to open the files in question even if they are + * locked. + * */ + +#pragma once + +#include "Stg.h" + +/* No valid FD would be negative, so use a word instead of int so the value + is compatible with a Windows handle. */ +int lockFile(StgWord64 id, StgWord64 dev, StgWord64 ino, int for_writing); +int unlockFile(StgWord64 id); diff --git a/rts/include/rts/Flags.h b/rts/include/rts/Flags.h new file mode 100644 index 0000000000..11e7bfdaa7 --- /dev/null +++ b/rts/include/rts/Flags.h @@ -0,0 +1,330 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * Datatypes that holds the command-line flag settings. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include "stg/Types.h" +#include "Time.h" + +/* For defaults, see the @initRtsFlagsDefaults@ routine. */ + +/* Note [Synchronization of flags and base APIs] + * + * We provide accessors to RTS flags in base. (GHC.RTS module) + * The API should be updated whenever RTS flags are modified. + */ + +/* See Note [Synchronization of flags and base APIs] */ +typedef struct _GC_FLAGS { + FILE *statsFile; + uint32_t giveStats; +#define NO_GC_STATS 0 +#define COLLECT_GC_STATS 1 +#define ONELINE_GC_STATS 2 +#define SUMMARY_GC_STATS 3 +#define VERBOSE_GC_STATS 4 + + uint32_t maxStkSize; /* in *words* */ + uint32_t initialStkSize; /* in *words* */ + uint32_t stkChunkSize; /* in *words* */ + uint32_t stkChunkBufferSize; /* in *words* */ + + uint32_t maxHeapSize; /* in *blocks* */ + uint32_t minAllocAreaSize; /* in *blocks* */ + uint32_t largeAllocLim; /* in *blocks* */ + uint32_t nurseryChunkSize; /* in *blocks* */ + uint32_t minOldGenSize; /* in *blocks* */ + uint32_t heapSizeSuggestion; /* in *blocks* */ + bool heapSizeSuggestionAuto; + double oldGenFactor; + double returnDecayFactor; + double pcFreeHeap; + + bool useNonmoving; // default = false + bool nonmovingSelectorOpt; // Do selector optimization in the + // non-moving heap, default = false + uint32_t generations; + bool squeezeUpdFrames; + + bool compact; /* True <=> "compact all the time" */ + double compactThreshold; + + bool sweep; /* use "mostly mark-sweep" instead of copying + * for the oldest generation */ + bool ringBell; + + Time idleGCDelayTime; /* units: TIME_RESOLUTION */ + Time interIdleGCWait; /* units: TIME_RESOLUTION */ + bool doIdleGC; + + Time longGCSync; /* units: TIME_RESOLUTION */ + + StgWord heapBase; /* address to ask the OS for memory */ + + StgWord allocLimitGrace; /* units: *blocks* + * After an AllocationLimitExceeded + * exception has been raised, how much + * extra space is given to the thread + * to handle the exception before we + * raise it again. + */ + StgWord heapLimitGrace; /* units: *blocks* + * After a HeapOverflow exception has + * been raised, how much extra space is + * given to the thread to handle the + * exception before we raise it again. + */ + + bool numa; /* Use NUMA */ + StgWord numaMask; +} GC_FLAGS; + +/* See Note [Synchronization of flags and base APIs] */ +typedef struct _DEBUG_FLAGS { + /* flags to control debugging output & extra checking in various subsystems */ + bool scheduler; /* 's' */ + bool interpreter; /* 'i' */ + bool weak; /* 'w' */ + bool gccafs; /* 'G' */ + bool gc; /* 'g' */ + bool nonmoving_gc; /* 'n' */ + bool block_alloc; /* 'b' */ + bool sanity; /* 'S' warning: might be expensive! */ + bool zero_on_gc; /* 'Z' */ + bool stable; /* 't' */ + bool prof; /* 'p' */ + bool linker; /* 'l' the object linker */ + bool apply; /* 'a' */ + bool stm; /* 'm' */ + bool squeeze; /* 'z' stack squeezing & lazy blackholing */ + bool hpc; /* 'c' coverage */ + bool sparks; /* 'r' */ + bool numa; /* '--debug-numa' */ + bool compact; /* 'C' */ +} DEBUG_FLAGS; + +/* See Note [Synchronization of flags and base APIs] */ +typedef struct _COST_CENTRE_FLAGS { + uint32_t doCostCentres; +# define COST_CENTRES_NONE 0 +# define COST_CENTRES_SUMMARY 1 +# define COST_CENTRES_VERBOSE 2 /* incl. serial time profile */ +# define COST_CENTRES_ALL 3 +# define COST_CENTRES_JSON 4 + + int profilerTicks; /* derived */ + int msecsPerTick; /* derived */ + char const *outputFileNameStem; +} COST_CENTRE_FLAGS; + +/* See Note [Synchronization of flags and base APIs] */ +typedef struct _PROFILING_FLAGS { + uint32_t doHeapProfile; +# define NO_HEAP_PROFILING 0 /* N.B. Used as indexes into arrays */ +# define HEAP_BY_CCS 1 +# define HEAP_BY_MOD 2 +# define HEAP_BY_DESCR 4 +# define HEAP_BY_TYPE 5 +# define HEAP_BY_RETAINER 6 +# define HEAP_BY_LDV 7 + +# define HEAP_BY_CLOSURE_TYPE 8 +# define HEAP_BY_INFO_TABLE 9 + + Time heapProfileInterval; /* time between samples */ + uint32_t heapProfileIntervalTicks; /* ticks between samples (derived) */ + bool startHeapProfileAtStartup; /* true if we start profiling from program startup */ + + + bool showCCSOnException; + + uint32_t maxRetainerSetSize; + + uint32_t ccsLength; + + const char* modSelector; + const char* descrSelector; + const char* typeSelector; + const char* ccSelector; + const char* ccsSelector; + const char* retainerSelector; + const char* bioSelector; + +} PROFILING_FLAGS; + +#define TRACE_NONE 0 +#define TRACE_EVENTLOG 1 +#define TRACE_STDERR 2 + +/* See Note [Synchronization of flags and base APIs] */ +typedef struct _TRACE_FLAGS { + int tracing; + bool timestamp; /* show timestamp in stderr output */ + bool scheduler; /* trace scheduler events */ + bool gc; /* trace GC events */ + bool nonmoving_gc; /* trace nonmoving GC events */ + bool sparks_sampled; /* trace spark events by a sampled method */ + bool sparks_full; /* trace spark events 100% accurately */ + bool ticky; /* trace ticky-ticky samples */ + bool user; /* trace user events (emitted from Haskell code) */ + Time eventlogFlushTime; /* Time between force eventlog flushes (or 0 if disabled) */ + int eventlogFlushTicks; + char *trace_output; /* output filename for eventlog */ +} TRACE_FLAGS; + +/* See Note [Synchronization of flags and base APIs] */ +typedef struct _CONCURRENT_FLAGS { + Time ctxtSwitchTime; /* units: TIME_RESOLUTION */ + int ctxtSwitchTicks; /* derived */ +} CONCURRENT_FLAGS; + +/* + * The tickInterval is the time interval between "ticks", ie. + * timer signals (see Timer.{c,h}). It is the frequency at + * which we sample CCCS for profiling. + * + * It is changed by the +RTS -V<secs> flag. + */ +#define DEFAULT_TICK_INTERVAL USToTime(10000) + +/* + * When linkerAlwaysPic is true, the runtime linker assume that all object + * files were compiled with -fPIC -fexternal-dynamic-refs and load them + * anywhere in the address space. + * Note that there is no 32bit darwin system we can realistically expect to + * run on or compile for. + */ +#if defined(darwin_HOST_OS) || defined(aarch64_HOST_ARCH) || defined(arm_HOST_ARCH) +#define DEFAULT_LINKER_ALWAYS_PIC true +#else +#define DEFAULT_LINKER_ALWAYS_PIC false +#endif + +/* Which I/O Manager to use in the target program. */ +typedef enum _IO_MANAGER { IO_MNGR_NATIVE, IO_MNGR_POSIX } IO_MANAGER; + +/* See Note [Synchronization of flags and base APIs] */ +typedef struct _MISC_FLAGS { + Time tickInterval; /* units: TIME_RESOLUTION */ + bool install_signal_handlers; + bool install_seh_handlers; + bool generate_dump_file; + bool generate_stack_trace; + bool machineReadable; + bool disableDelayedOsMemoryReturn; /* See Note [MADV_FREE and MADV_DONTNEED]. + It's in `MiscFlags` instead of + `GcFlags` because if GHC used madvise() + memory management for non-GC related + tasks in the future, we'd respect it + there as well. */ + bool internalCounters; /* See Note [Internal Counter Stats] */ + bool linkerAlwaysPic; /* Assume the object code is always PIC */ + StgWord linkerMemBase; /* address to ask the OS for memory + * for the linker, NULL ==> off */ + IO_MANAGER ioManager; /* The I/O manager to use. */ + uint32_t numIoWorkerThreads; /* Number of I/O worker threads to use. */ +} MISC_FLAGS; + +/* See Note [Synchronization of flags and base APIs] */ +typedef struct _PAR_FLAGS { + uint32_t nCapabilities; /* number of threads to run simultaneously */ + bool migrate; /* migrate threads between capabilities */ + uint32_t maxLocalSparks; + bool parGcEnabled; /* enable parallel GC */ + uint32_t parGcGen; /* do parallel GC in this generation + * and higher only */ + bool parGcLoadBalancingEnabled; + /* enable load-balancing in the + * parallel GC */ + uint32_t parGcLoadBalancingGen; + /* do load-balancing in this + * generation and higher only */ + + uint32_t parGcNoSyncWithIdle; + /* if a Capability has been idle for + * this many GCs, do not try to wake + * it up when doing a + * non-load-balancing parallel GC. + * (zero disables) */ + + uint32_t parGcThreads; + /* Use this many threads for parallel + * GC (default: use all nNodes). */ + + bool setAffinity; /* force thread affinity with CPUs */ +} PAR_FLAGS; + +/* See Note [Synchronization of flags and base APIs] */ +typedef struct _TICKY_FLAGS { + bool showTickyStats; + FILE *tickyFile; +} TICKY_FLAGS; + +/* Put them together: */ + +/* See Note [Synchronization of flags and base APIs] */ +typedef struct _RTS_FLAGS { + /* The first portion of RTS_FLAGS is invariant. */ + GC_FLAGS GcFlags; + CONCURRENT_FLAGS ConcFlags; + MISC_FLAGS MiscFlags; + DEBUG_FLAGS DebugFlags; + COST_CENTRE_FLAGS CcFlags; + PROFILING_FLAGS ProfFlags; + TRACE_FLAGS TraceFlags; + TICKY_FLAGS TickyFlags; + PAR_FLAGS ParFlags; +} RTS_FLAGS; + +#if defined(COMPILING_RTS_MAIN) +extern DLLIMPORT RTS_FLAGS RtsFlags; +#elif IN_STG_CODE +/* Note [RtsFlags is a pointer in STG code] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * When compiling with IN_STG_CODE the RtsFlags symbol is defined as a pointer. + * This is necessary because the C code generator can't generate '&label'. + */ +extern RTS_FLAGS RtsFlags[]; +#else +extern RTS_FLAGS RtsFlags; +#endif + +/* + * The printf formats are here, so we are less likely to make + * overly-long filenames (with disastrous results). No more than 128 + * chars, please! + */ + +#define STATS_FILENAME_MAXLEN 128 + +#define GR_FILENAME_FMT "%0.124s.gr" +#define HP_FILENAME_FMT "%0.124s.hp" +#define LIFE_FILENAME_FMT "%0.122s.life" +#define PROF_FILENAME_FMT "%0.122s.prof" +#define PROF_FILENAME_FMT_GUM "%0.118s.%03d.prof" +#define QP_FILENAME_FMT "%0.124s.qp" +#define STAT_FILENAME_FMT "%0.122s.stat" +#define TICKY_FILENAME_FMT "%0.121s.ticky" +#define TIME_FILENAME_FMT "%0.122s.time" +#define TIME_FILENAME_FMT_GUM "%0.118s.%03d.time" + +/* an "int" so as to match normal "argc" */ +/* Now defined in Stg.h (lib/std/cbits need these too.) +extern int prog_argc; +extern char **prog_argv; +*/ +extern int rts_argc; /* ditto */ +extern char **rts_argv; diff --git a/rts/include/rts/ForeignExports.h b/rts/include/rts/ForeignExports.h new file mode 100644 index 0000000000..aeb524aebf --- /dev/null +++ b/rts/include/rts/ForeignExports.h @@ -0,0 +1,38 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1995-2009 + * + * Interface to the RTS's foreign export tracking code. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +struct _ObjectCode; + +/* N.B. See Note [Tracking foreign exports] in + * rts/ForeignExports.c. */ +struct ForeignExportsList { + /* a link field for linking these together into lists. + */ + struct ForeignExportsList *next; + /* the length of ->exports */ + int n_entries; + /* if the RTS linker loaded the module, + * to which ObjectCode these exports belong. */ + struct _ObjectCode *oc; + /* if the RTS linker loaded the module, + * this points to an array of length ->n_entries + * recording the StablePtr for each export. */ + StgStablePtr **stable_ptrs; + /* the exported closures. of length ->exports. */ + StgPtr exports[]; +}; + +void registerForeignExports(struct ForeignExportsList *exports); + diff --git a/rts/include/rts/GetTime.h b/rts/include/rts/GetTime.h new file mode 100644 index 0000000000..53207ce307 --- /dev/null +++ b/rts/include/rts/GetTime.h @@ -0,0 +1,16 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1995-2009 + * + * Interface to the RTS time + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +StgWord64 getMonotonicNSec (void); diff --git a/rts/include/rts/Globals.h b/rts/include/rts/Globals.h new file mode 100644 index 0000000000..bd3aa637db --- /dev/null +++ b/rts/include/rts/Globals.h @@ -0,0 +1,36 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2006-2009 + * + * The RTS stores some "global" values on behalf of libraries, so that + * some libraries can ensure that certain top-level things are shared + * even when multiple versions of the library are loaded. e.g. see + * Data.Typeable and GHC.Conc. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#define mkStoreAccessorPrototype(name) \ + StgStablePtr \ + getOrSet##name(StgStablePtr ptr); + +mkStoreAccessorPrototype(GHCConcSignalSignalHandlerStore) +mkStoreAccessorPrototype(GHCConcWindowsPendingDelaysStore) +mkStoreAccessorPrototype(GHCConcWindowsIOManagerThreadStore) +mkStoreAccessorPrototype(GHCConcWindowsProddingStore) +mkStoreAccessorPrototype(SystemEventThreadEventManagerStore) +mkStoreAccessorPrototype(SystemEventThreadIOManagerThreadStore) +mkStoreAccessorPrototype(SystemTimerThreadEventManagerStore) +mkStoreAccessorPrototype(SystemTimerThreadIOManagerThreadStore) +mkStoreAccessorPrototype(LibHSghcFastStringTable) +mkStoreAccessorPrototype(LibHSghcGlobalHasPprDebug) +mkStoreAccessorPrototype(LibHSghcGlobalHasNoDebugOutput) +mkStoreAccessorPrototype(LibHSghcGlobalHasNoStateHack) +extern HsInt ghc_unique_counter; +extern HsInt ghc_unique_inc; diff --git a/rts/include/rts/Hpc.h b/rts/include/rts/Hpc.h new file mode 100644 index 0000000000..85d00ca485 --- /dev/null +++ b/rts/include/rts/Hpc.h @@ -0,0 +1,34 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2008-2009 + * + * Haskell Program Coverage + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +// Simple linked list of modules +typedef struct _HpcModuleInfo { + char *modName; // name of module + StgWord32 tickCount; // number of ticks + StgWord32 hashNo; // Hash number for this module's mix info + StgWord64 *tixArr; // tix Array; local for this module + bool from_file; // data was read from the .tix file + struct _HpcModuleInfo *next; +} HpcModuleInfo; + +void hs_hpc_module (char *modName, + StgWord32 modCount, + StgWord32 modHashNo, + StgWord64 *tixArr); + +HpcModuleInfo * hs_hpc_rootModule (void); + +void startupHpc(void); +void exitHpc(void); diff --git a/rts/include/rts/IOInterface.h b/rts/include/rts/IOInterface.h new file mode 100644 index 0000000000..9a646cc5cf --- /dev/null +++ b/rts/include/rts/IOInterface.h @@ -0,0 +1,38 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * IO Manager functionality in the RTS + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +#if defined(mingw32_HOST_OS) + +#define IO_MANAGER_WAKEUP 0xffffffff +#define IO_MANAGER_DIE 0xfffffffe +/* spurious wakeups are returned as zero. */ +/* console events are ((event<<1) | 1). */ + +int rts_InstallConsoleEvent ( int action, StgStablePtr *handler ); +void rts_ConsoleHandlerDone ( int ev ); +extern StgInt console_handler; + +void * getIOManagerEvent (void); +HsWord32 readIOManagerEvent (void); +void sendIOManagerEvent (HsWord32 event); + +#else + +void setIOManagerControlFd (uint32_t cap_no, int fd); +void setTimerManagerControlFd(int fd); +void setIOManagerWakeupFd (int fd); + +#endif + diff --git a/rts/include/rts/IPE.h b/rts/include/rts/IPE.h new file mode 100644 index 0000000000..81a6d553d0 --- /dev/null +++ b/rts/include/rts/IPE.h @@ -0,0 +1,35 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2017-2018 + * + * IPE API + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + + +typedef struct InfoProv_{ + char * table_name; + char * closure_desc; + char * ty_desc; + char * label; + char * module; + char * srcloc; +} InfoProv; + +typedef struct InfoProvEnt_ { + StgInfoTable * info; + InfoProv prov; + struct InfoProvEnt_ *link; +} InfoProvEnt; + +extern InfoProvEnt * RTS_VAR(IPE_LIST); // registered IP list + +void registerInfoProvList(InfoProvEnt **cc_list); +InfoProvEnt * lookupIPE(StgInfoTable *info); diff --git a/rts/include/rts/Libdw.h b/rts/include/rts/Libdw.h new file mode 100644 index 0000000000..d7bd55d06e --- /dev/null +++ b/rts/include/rts/Libdw.h @@ -0,0 +1,97 @@ +/* --------------------------------------------------------------------------- + * + * (c) The GHC Team, 2014-2015 + * + * Producing DWARF-based stacktraces with libdw. + * + * --------------------------------------------------------------------------*/ + +#pragma once + +// for FILE +#include <stdio.h> + +// Chunk capacity +// This is rather arbitrary +#define BACKTRACE_CHUNK_SZ 256 + +/* + * Note [Chunked stack representation] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Consider the stack, + * main calls (bottom of stack) + * func1 which in turn calls + * func2 which calls + * func3 which calls + * func4 which calls + * func5 which calls + * func6 which calls + * func7 which requests a backtrace (top of stack) + * + * This would produce the Backtrace (using a smaller chunk size of three for + * illustrative purposes), + * + * Backtrace /----> Chunk /----> Chunk /----> Chunk + * last --------/ next --------/ next --------/ next + * n_frames=8 n_frames=2 n_frames=3 n_frames=3 + * ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~ + * func1 func4 func7 + * main func3 func6 + * func2 func5 + * + */ + +/* A chunk of code addresses from an execution stack + * + * The first address in this list corresponds to the stack frame + * nearest to the "top" of the stack. + */ +typedef struct BacktraceChunk_ { + StgWord n_frames; // number of frames in this chunk + struct BacktraceChunk_ *next; // the chunk following this one + StgPtr frames[BACKTRACE_CHUNK_SZ]; // the code addresses from the + // frames +} __attribute__((packed)) BacktraceChunk; + +/* A chunked list of code addresses from an execution stack + * + * This structure is optimized for append operations since we append O(stack + * depth) times yet typically only traverse the stack trace once. Consequently, + * the "top" stack frame (that is, the one where we started unwinding) can be + * found in the last chunk. Yes, this is a bit inconsistent with the ordering + * within a chunk. See Note [Chunked stack representation] for a depiction. + */ +typedef struct Backtrace_ { + StgWord n_frames; // Total number of frames in the backtrace + BacktraceChunk *last; // The first chunk of frames (corresponding to the + // bottom of the stack) +} Backtrace; + +/* Various information describing the location of an address */ +typedef struct Location_ { + const char *object_file; + const char *function; + + // lineno and colno are only valid if source_file /= NULL + const char *source_file; + StgWord32 lineno; + StgWord32 colno; +} __attribute__((packed)) Location; + +struct LibdwSession_; +typedef struct LibdwSession_ LibdwSession; + +/* Free a backtrace */ +void backtraceFree(Backtrace *bt); + +/* Request a backtrace of the current stack state. + * May return NULL if a backtrace can't be acquired. */ +Backtrace *libdwGetBacktrace(LibdwSession *session); + +/* Lookup Location information for the given address. + * Returns 0 if successful, 1 if address could not be found. */ +int libdwLookupLocation(LibdwSession *session, Location *loc, StgPtr pc); + +/* Pretty-print a backtrace to the given FILE */ +void libdwPrintBacktrace(LibdwSession *session, FILE *file, Backtrace *bt); diff --git a/rts/include/rts/LibdwPool.h b/rts/include/rts/LibdwPool.h new file mode 100644 index 0000000000..76ff41c8c7 --- /dev/null +++ b/rts/include/rts/LibdwPool.h @@ -0,0 +1,19 @@ +/* --------------------------------------------------------------------------- + * + * (c) The GHC Team, 2015-2016 + * + * A pool of libdw sessions + * + * --------------------------------------------------------------------------*/ + +#pragma once + +/* Claim a session from the pool */ +LibdwSession *libdwPoolTake(void); + +/* Return a session to the pool */ +void libdwPoolRelease(LibdwSession *sess); + +/* Free any sessions in the pool forcing a reload of any loaded debug + * information */ +void libdwPoolClear(void); diff --git a/rts/include/rts/Linker.h b/rts/include/rts/Linker.h new file mode 100644 index 0000000000..1f3719c0c7 --- /dev/null +++ b/rts/include/rts/Linker.h @@ -0,0 +1,114 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2009 + * + * RTS Object Linker + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#if defined(mingw32_HOST_OS) +typedef wchar_t pathchar; +#define PATH_FMT "ls" +#else +typedef char pathchar; +#define PATH_FMT "s" +#endif + +/* Initialize the object linker. Equivalent to initLinker_(1). */ +void initLinker (void); + +/* Initialize the object linker. + * The retain_cafs argument is: + * + * non-zero => Retain CAFs unconditionally in linked Haskell code. + * Note that this prevents any code from being unloaded. + * It should not be necessary unless you are GHCi or + * hs-plugins, which needs to be able call any function + * in the compiled code. + * + * zero => Do not retain CAFs. Everything reachable from foreign + * exports will be retained, due to the StablePtrs + * created by the module initialisation code. unloadObj + * frees these StablePtrs, which will allow the CAFs to + * be GC'd and the code to be removed. + */ +void initLinker_ (int retain_cafs); + +/* insert a symbol in the hash table */ +HsInt insertSymbol(pathchar* obj_name, char* key, void* data); + +/* lookup a symbol in the hash table */ +void *lookupSymbol( char *lbl ); + +/* See Linker.c Note [runtime-linker-phases] */ +typedef enum { + OBJECT_LOADED, + OBJECT_NEEDED, + OBJECT_RESOLVED, + OBJECT_UNLOADED, + OBJECT_DONT_RESOLVE, + OBJECT_NOT_LOADED /* The object was either never loaded or has been + fully unloaded */ +} OStatus; + +/* check object load status */ +OStatus getObjectLoadStatus( pathchar *path ); + +/* delete an object from the pool */ +HsInt unloadObj( pathchar *path ); + +/* purge an object's symbols from the symbol table, but don't unload it */ +HsInt purgeObj( pathchar *path ); + +/* add an obj (populate the global symbol table, but don't resolve yet) */ +HsInt loadObj( pathchar *path ); + +/* add an arch (populate the global symbol table, but don't resolve yet) */ +HsInt loadArchive( pathchar *path ); + +/* resolve all the currently unlinked objects in memory */ +HsInt resolveObjs( void ); + +/* Load an .so using the system linker. + Returns a handle that can be passed to dlsym() or NULL on error. + + In the case of error, stores the error message in errmsg. The caller + is responsible for freeing it. */ +void *loadNativeObj( pathchar *path, char **errmsg ); + +/* Mark the .so loaded with the system linker for unloading. + The RTS will unload it when all the references to the .so disappear from + the heap. + Takes the handle returned from loadNativeObj() as an argument. */ +HsInt unloadNativeObj( void *handle ); + +/* load a dynamic library */ +const char *addDLL( pathchar* dll_name ); + +/* add a path to the library search path */ +HsPtr addLibrarySearchPath(pathchar* dll_path); + +/* removes a directory from the search path, + path must have been added using addLibrarySearchPath */ +HsBool removeLibrarySearchPath(HsPtr dll_path_index); + +/* give a warning about missing Windows patches that would make + the linker work better */ +void warnMissingKBLibraryPaths( void ); + +/* ----------------------------------------------------------------------------- +* Searches the system directories to determine if there is a system DLL that +* satisfies the given name. This prevent GHCi from linking against a static +* library if a DLL is available. +*/ +pathchar* findSystemLibrary(pathchar* dll_name); + +/* called by the initialization code for a module, not a user API */ +StgStablePtr foreignExportStablePtr (StgPtr p); diff --git a/rts/include/rts/Main.h b/rts/include/rts/Main.h new file mode 100644 index 0000000000..05924ad92b --- /dev/null +++ b/rts/include/rts/Main.h @@ -0,0 +1,18 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2009 + * + * Entry point for standalone Haskell programs. + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +/* ----------------------------------------------------------------------------- + * The entry point for Haskell programs that use a Haskell main function + * -------------------------------------------------------------------------- */ + +int hs_main (int argc, char *argv[], // program args + StgClosure *main_closure, // closure for Main.main + RtsConfig rts_config) // RTS configuration + GNUC3_ATTRIBUTE(__noreturn__); diff --git a/rts/include/rts/Messages.h b/rts/include/rts/Messages.h new file mode 100644 index 0000000000..dbaf37bbc7 --- /dev/null +++ b/rts/include/rts/Messages.h @@ -0,0 +1,104 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * Message API for use inside the RTS. All messages generated by the + * RTS should go through one of the functions declared here, and we + * also provide hooks so that messages from the RTS can be redirected + * as appropriate. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#include <stdarg.h> + +#if defined(mingw32_HOST_OS) && !defined(__clang__) +/* On Win64, if we say "printf" then gcc thinks we are going to use + MS format specifiers like %I64d rather than %llu */ +#define PRINTF gnu_printf +#else +/* However, on OS X, "gnu_printf" isn't recognised */ +#define PRINTF printf +#endif + +/* ----------------------------------------------------------------------------- + * Message generation + * -------------------------------------------------------------------------- */ + +/* + * A fatal internal error: this is for errors that probably indicate + * bugs in the RTS or compiler. We normally output bug reporting + * instructions along with the error message. + * + * barf() invokes (*fatalInternalErrorFn)(). This function is not + * expected to return. + */ +void barf(const char *s, ...) + GNUC3_ATTRIBUTE(__noreturn__) + GNUC3_ATTRIBUTE(format(PRINTF, 1, 2)); + +void vbarf(const char *s, va_list ap) + GNUC3_ATTRIBUTE(__noreturn__); + +// declared in Rts.h: +// extern void _assertFail(const char *filename, unsigned int linenum) +// GNUC3_ATTRIBUTE(__noreturn__); + +/* + * An error condition which is caused by and/or can be corrected by + * the user. + * + * errorBelch() invokes (*errorMsgFn)(). + */ +void errorBelch(const char *s, ...) + GNUC3_ATTRIBUTE(format (PRINTF, 1, 2)); + +void verrorBelch(const char *s, va_list ap); + +/* + * An error condition which is caused by and/or can be corrected by + * the user, and which has an associated error condition reported + * by the system (in errno on Unix, and GetLastError() on Windows). + * The system error message is appended to the message generated + * from the supplied format string. + * + * sysErrorBelch() invokes (*sysErrorMsgFn)(). + */ +void sysErrorBelch(const char *s, ...) + GNUC3_ATTRIBUTE(format (PRINTF, 1, 2)); + +void vsysErrorBelch(const char *s, va_list ap); + +/* + * A debugging message. Debugging messages are generated either as a + * virtue of having DEBUG turned on, or by being explicitly selected + * via RTS options (eg. +RTS -Ds). + * + * debugBelch() invokes (*debugMsgFn)(). + */ +void debugBelch(const char *s, ...) + GNUC3_ATTRIBUTE(format (PRINTF, 1, 2)); + +void vdebugBelch(const char *s, va_list ap); + + +/* Hooks for redirecting message generation: */ + +typedef void RtsMsgFunction(const char *, va_list); + +extern RtsMsgFunction *fatalInternalErrorFn; +extern RtsMsgFunction *debugMsgFn; +extern RtsMsgFunction *errorMsgFn; + +/* Default stdio implementation of the message hooks: */ + +extern RtsMsgFunction rtsFatalInternalErrorFn; +extern RtsMsgFunction rtsDebugMsgFn; +extern RtsMsgFunction rtsErrorMsgFn; +extern RtsMsgFunction rtsSysErrorMsgFn; diff --git a/rts/include/rts/NonMoving.h b/rts/include/rts/NonMoving.h new file mode 100644 index 0000000000..35a04fe940 --- /dev/null +++ b/rts/include/rts/NonMoving.h @@ -0,0 +1,43 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2018-2019 + * + * Non-moving garbage collector + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +// Forward declaration for Stg.h +struct StgClosure_; +struct StgThunk_; +struct Capability_; + +/* This is called by the code generator */ +extern DLL_IMPORT_RTS +void updateRemembSetPushClosure_(StgRegTable *reg, struct StgClosure_ *p); + +extern DLL_IMPORT_RTS +void updateRemembSetPushThunk_(StgRegTable *reg, struct StgThunk_ *p); + +// Forward declaration for unregisterised backend. +EF_(stg_copyArray_barrier); + +// Note that RTS code should not condition on this directly by rather +// use the IF_NONMOVING_WRITE_BARRIER_ENABLED macro to ensure that +// the barrier is eliminated in the non-threaded RTS. +extern StgWord DLL_IMPORT_DATA_VAR(nonmoving_write_barrier_enabled); + +// A similar macro is defined in rts/include/Cmm.h for C-- code. +#if defined(THREADED_RTS) +#define IF_NONMOVING_WRITE_BARRIER_ENABLED \ + if (RTS_UNLIKELY(nonmoving_write_barrier_enabled)) +#else +#define IF_NONMOVING_WRITE_BARRIER_ENABLED \ + if (0) +#endif diff --git a/rts/include/rts/OSThreads.h b/rts/include/rts/OSThreads.h new file mode 100644 index 0000000000..d24a1313a6 --- /dev/null +++ b/rts/include/rts/OSThreads.h @@ -0,0 +1,267 @@ +/* --------------------------------------------------------------------------- + * + * (c) The GHC Team, 2001-2009 + * + * Accessing OS threads functionality in a (mostly) OS-independent + * manner. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * --------------------------------------------------------------------------*/ + +#pragma once + +#if defined(HAVE_PTHREAD_H) && !defined(mingw32_HOST_OS) + +#if defined(CMINUSMINUS) + +#define OS_ACQUIRE_LOCK(mutex) foreign "C" pthread_mutex_lock(mutex) +#define OS_RELEASE_LOCK(mutex) foreign "C" pthread_mutex_unlock(mutex) +#define OS_ASSERT_LOCK_HELD(mutex) /* nothing */ + +#else + +#include <pthread.h> +#include <errno.h> + +typedef struct { + pthread_cond_t cond; + + // Which clock are pthread_cond_timedwait calls referenced against? + // N.B. Some older Darwin releases don't support clock_gettime. However, we + // do want to reference to CLOCK_MONOTONIC whenever possible as it is more + // robust against system time changes and is likely cheaper to query. +#if defined(HAVE_CLOCK_GETTIME) && defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) + clockid_t timeout_clk; +#endif +} Condition; +typedef pthread_mutex_t Mutex; +typedef pthread_t OSThreadId; +typedef pthread_key_t ThreadLocalKey; + +#define OSThreadProcAttr /* nothing */ + +#define INIT_COND_VAR PTHREAD_COND_INITIALIZER + +#if defined(LOCK_DEBUG) +#define LOCK_DEBUG_BELCH(what, mutex) \ + debugBelch("%s(0x%p) %s %d\n", what, mutex, __FILE__, __LINE__) +#else +#define LOCK_DEBUG_BELCH(what, mutex) /* nothing */ +#endif + +/* Always check the result of lock and unlock. */ +#define OS_ACQUIRE_LOCK(mutex) { \ + LOCK_DEBUG_BELCH("ACQUIRE_LOCK", mutex); \ + int __r = pthread_mutex_lock(mutex); \ + if (__r != 0) { \ + barf("ACQUIRE_LOCK failed (%s:%d): %d", __FILE__, __LINE__, __r); \ + } } + +// Returns zero if the lock was acquired. +EXTERN_INLINE int OS_TRY_ACQUIRE_LOCK(pthread_mutex_t *mutex); +EXTERN_INLINE int OS_TRY_ACQUIRE_LOCK(pthread_mutex_t *mutex) +{ + LOCK_DEBUG_BELCH("TRY_ACQUIRE_LOCK", mutex); + return pthread_mutex_trylock(mutex); +} + +#define OS_RELEASE_LOCK(mutex) \ + LOCK_DEBUG_BELCH("RELEASE_LOCK", mutex); \ + if (pthread_mutex_unlock(mutex) != 0) { \ + barf("RELEASE_LOCK: I do not own this lock: %s %d", __FILE__,__LINE__); \ + } + +// Note: this assertion calls pthread_mutex_lock() on a mutex that +// is already held by the calling thread. The mutex should therefore +// have been created with PTHREAD_MUTEX_ERRORCHECK, otherwise this +// assertion will hang. We always initialise mutexes with +// PTHREAD_MUTEX_ERRORCHECK when DEBUG is on (see rts/posix/OSThreads.h). +#define OS_ASSERT_LOCK_HELD(mutex) ASSERT(pthread_mutex_lock(mutex) == EDEADLK) + +#endif // CMINUSMINUS + +# elif defined(HAVE_WINDOWS_H) + +#if defined(CMINUSMINUS) + +/* We jump through a hoop here to get a CCall AcquireSRWLockExclusive + and ReleaseSRWLockExclusive, as that's what C-- wants. */ + +#define OS_ACQUIRE_LOCK(mutex) foreign "stdcall" AcquireSRWLockExclusive(mutex) +#define OS_RELEASE_LOCK(mutex) foreign "stdcall" ReleaseSRWLockExclusive(mutex) +#define OS_ASSERT_LOCK_HELD(mutex) /* nothing */ + +#else // CMINUSMINUS + +#include <windows.h> +#include <synchapi.h> + +/* Use native conditional variables coupled with SRW locks, these are more + efficient and occur a smaller overhead then emulating them with events. + See Note [SRW locks]. */ +typedef CONDITION_VARIABLE Condition; +typedef DWORD OSThreadId; +// don't be tempted to use HANDLE as the OSThreadId: there can be +// many HANDLES to a given thread, so comparison would not work. +typedef DWORD ThreadLocalKey; + +#define OSThreadProcAttr __stdcall + +#define INIT_COND_VAR 0 + +/* Note [SRW locks] + We have a choice for implementing Mutexes on Windows. Standard + Mutexes are kernel objects that require kernel calls to + acquire/release, whereas CriticalSections are spin-locks that block + in the kernel after spinning for a configurable number of times. + CriticalSections are *much* faster than Mutexes, however not as fast as + slim reader/writer locks. CriticalSections also require a 48 byte structure + to provide lock re-entrancy. We don't need that because the other primitives + used for other platforms don't have this, as such locks are used defensively + in the RTS in a way that we don't need re-entrancy. This means that SRW's + 8 byte size is much more appropriate. With an 8 byte payload there's a + higher chance of it being in your cache line. They're also a lot faster than + CriticalSections when multiple threads are involved. CS requires setup and + teardown via kernel calls while SRWL is zero-initialized via + SRWLOCK_INIT assignment. */ + +typedef SRWLOCK Mutex; + +#if defined(LOCK_DEBUG) + +#define OS_ACQUIRE_LOCK(mutex) \ + debugBelch("ACQUIRE_LOCK(0x%p) %s %d\n", mutex,__FILE__,__LINE__); \ + AcquireSRWLockExclusive(mutex) +#define OS_RELEASE_LOCK(mutex) \ + debugBelch("RELEASE_LOCK(0x%p) %s %d\n", mutex,__FILE__,__LINE__); \ + ReleaseSRWLockExclusive(mutex) +#define OS_ASSERT_LOCK_HELD(mutex) /* nothing */ + +#else + +#define OS_ACQUIRE_LOCK(mutex) AcquireSRWLockExclusive(mutex) +#define OS_TRY_ACQUIRE_LOCK(mutex) (TryAcquireSRWLockExclusive(mutex) == 0) +#define OS_RELEASE_LOCK(mutex) ReleaseSRWLockExclusive(mutex) +#define OS_INIT_LOCK(mutex) InitializeSRWLock(mutex) +#define OS_CLOSE_LOCK(mutex) + +// I don't know how to do this. TryEnterCriticalSection() doesn't do +// the right thing. +#define OS_ASSERT_LOCK_HELD(mutex) /* nothing */ + +#endif // LOCK_DEBUG + +#endif // CMINUSMINUS + +# elif defined(THREADED_RTS) +# error "Threads not supported" +# endif + + +#if !defined(CMINUSMINUS) +// +// General thread operations +// +extern OSThreadId osThreadId ( void ); +extern void shutdownThread ( void ) GNUC3_ATTRIBUTE(__noreturn__); +extern void yieldThread ( void ); + +typedef void* OSThreadProcAttr OSThreadProc(void *); + +extern int createOSThread ( OSThreadId* tid, char *name, + OSThreadProc *startProc, void *param); +extern bool osThreadIsAlive ( OSThreadId id ); +extern void interruptOSThread ( OSThreadId id ); +extern void joinOSThread ( OSThreadId id ); + +// +// Condition Variables +// +extern void initCondition ( Condition* pCond ); +extern void closeCondition ( Condition* pCond ); +extern void broadcastCondition ( Condition* pCond ); +extern void signalCondition ( Condition* pCond ); +extern void waitCondition ( Condition* pCond, Mutex* pMut ); +// Returns false on timeout, true otherwise. +extern bool timedWaitCondition ( Condition* pCond, Mutex* pMut, Time timeout); + +// +// Mutexes +// +extern void initMutex ( Mutex* pMut ); +extern void closeMutex ( Mutex* pMut ); + +// +// Thread-local storage +// +void newThreadLocalKey (ThreadLocalKey *key); +void *getThreadLocalVar (ThreadLocalKey *key); +void setThreadLocalVar (ThreadLocalKey *key, void *value); +void freeThreadLocalKey (ThreadLocalKey *key); + +// Processors and affinity +void setThreadAffinity (uint32_t n, uint32_t m); +void setThreadNode (uint32_t node); +void releaseThreadNode (void); +#endif // !CMINUSMINUS + +#if defined(THREADED_RTS) + +#define ACQUIRE_LOCK(l) OS_ACQUIRE_LOCK(l) +#define TRY_ACQUIRE_LOCK(l) OS_TRY_ACQUIRE_LOCK(l) +#define RELEASE_LOCK(l) OS_RELEASE_LOCK(l) +#define ASSERT_LOCK_HELD(l) OS_ASSERT_LOCK_HELD(l) + +#else + +#define ACQUIRE_LOCK(l) +#define TRY_ACQUIRE_LOCK(l) 0 +#define RELEASE_LOCK(l) +#define ASSERT_LOCK_HELD(l) + +#endif /* defined(THREADED_RTS) */ + +#if !defined(CMINUSMINUS) +// +// Support for forkOS (defined regardless of THREADED_RTS, but does +// nothing when !THREADED_RTS). +// +int forkOS_createThread ( HsStablePtr entry ); + +// +// Free any global resources created in OSThreads. +// +void freeThreadingResources(void); + +// +// Returns the number of processor cores in the machine +// +uint32_t getNumberOfProcessors (void); + +// +// Support for getting at the kernel thread Id for tracing/profiling. +// +// This stuff is optional and only used for tracing/profiling purposes, to +// match up thread ids recorded by other tools. For example, on Linux and OSX +// the pthread_t type is not the same as the kernel thread id, and system +// profiling tools like Linux perf, and OSX's DTrace use the kernel thread Id. +// So if we want to match up RTS tasks with kernel threads recorded by these +// tools then we need to know the kernel thread Id, and this must be a separate +// type from the OSThreadId. +// +// If the feature cannot be supported on an OS, it is OK to always return 0. +// In particular it would almost certainly be meaningless on systems not using +// a 1:1 threading model. + +// We use a common serialisable representation on all OSs +// This is ok for Windows, OSX and Linux. +typedef StgWord64 KernelThreadId; + +// Get the current kernel thread id +KernelThreadId kernelThreadId (void); + +#endif /* CMINUSMINUS */ diff --git a/rts/include/rts/Parallel.h b/rts/include/rts/Parallel.h new file mode 100644 index 0000000000..7577a3967c --- /dev/null +++ b/rts/include/rts/Parallel.h @@ -0,0 +1,16 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * Parallelism-related functionality + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +StgInt newSpark (StgRegTable *reg, StgClosure *p); diff --git a/rts/include/rts/PosixSource.h b/rts/include/rts/PosixSource.h new file mode 100644 index 0000000000..13fd7b0ff5 --- /dev/null +++ b/rts/include/rts/PosixSource.h @@ -0,0 +1,38 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2005 + * + * Include this file into sources which should not need any non-Posix services. + * That includes most RTS C sources. + * ---------------------------------------------------------------------------*/ + +#pragma once + +#include <ghcplatform.h> + +/* We aim for C99 so we need to define following two defines in a consistent way + with what POSIX/XOPEN provide for C99. Some OSes are particularly picky about + the right versions defined here, e.g. Solaris + We also settle on lowest version of POSIX/XOPEN needed for proper C99 support + here which is POSIX.1-2001 compilation and Open Group Technical Standard, + Issue 6 (XPG6). XPG6 itself is a result of the merge of X/Open and POSIX + specification. It is also referred as IEEE Std. 1003.1-2001 or ISO/IEC + 9945:2002 or UNIX 03 and SUSv3. + Please also see trac ticket #11757 for more information about switch + to C99/C11. + + However, the use of `strnlen`, which is strictly speaking only available in + IEEE Std 1003.1-2008 (XPG7), requires lifting the bounds, to be able to + compile ghc on systems that are strict about enforcing the standard, e.g. + Apples mobile platforms. + + Oracle's Solaris 11 supports only up to XPG6, hence the ifdef. + */ + +#if defined(solaris2_HOST_OS) +#define _POSIX_C_SOURCE 200112L +#define _XOPEN_SOURCE 600 +#else +#define _POSIX_C_SOURCE 200809L +#define _XOPEN_SOURCE 700 +#endif diff --git a/rts/include/rts/PrimFloat.h b/rts/include/rts/PrimFloat.h new file mode 100644 index 0000000000..26b6f7ceec --- /dev/null +++ b/rts/include/rts/PrimFloat.h @@ -0,0 +1,17 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * Primitive floating-point operations + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +StgDouble __int_encodeDouble (I_ j, I_ e); +StgFloat __int_encodeFloat (I_ j, I_ e); +StgDouble __word_encodeDouble (W_ j, I_ e); +StgFloat __word_encodeFloat (W_ j, I_ e); diff --git a/rts/include/rts/Profiling.h b/rts/include/rts/Profiling.h new file mode 100644 index 0000000000..b329a493db --- /dev/null +++ b/rts/include/rts/Profiling.h @@ -0,0 +1,17 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2017-2018 + * + * Cost-centre profiling API + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +void registerCcList(CostCentre **cc_list); +void registerCcsList(CostCentreStack **cc_list); diff --git a/rts/include/rts/Signals.h b/rts/include/rts/Signals.h new file mode 100644 index 0000000000..96f0756538 --- /dev/null +++ b/rts/include/rts/Signals.h @@ -0,0 +1,23 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * RTS signal handling + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +/* NB. #included in Haskell code, no prototypes in here. */ + +/* arguments to stg_sig_install() */ +#define STG_SIG_DFL (-1) +#define STG_SIG_IGN (-2) +#define STG_SIG_ERR (-3) +#define STG_SIG_HAN (-4) +#define STG_SIG_RST (-5) diff --git a/rts/include/rts/SpinLock.h b/rts/include/rts/SpinLock.h new file mode 100644 index 0000000000..c1fe6c866c --- /dev/null +++ b/rts/include/rts/SpinLock.h @@ -0,0 +1,75 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 2006-2009 + * + * Spin locks + * + * These are simple spin-only locks as opposed to Mutexes which + * probably spin for a while before blocking in the kernel. We use + * these when we are sure that all our threads are actively running on + * a CPU, eg. in the GC. + * + * TODO: measure whether we really need these, or whether Mutexes + * would do (and be a bit safer if a CPU becomes loaded). + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +#if defined(THREADED_RTS) + +typedef struct SpinLock_ +{ + StgWord lock; +#if defined(PROF_SPIN) + StgWord64 spin; // incremented every time we spin in ACQUIRE_SPIN_LOCK + StgWord64 yield; // incremented every time we yield in ACQUIRE_SPIN_LOCK +#endif +} SpinLock; + +// PROF_SPIN enables counting the number of times we spin on a lock +#if defined(PROF_SPIN) +#define IF_PROF_SPIN(x) x +#else +#define IF_PROF_SPIN(x) +#endif + +void acquire_spin_lock_slow_path(SpinLock * p); + +// acquire spin lock +INLINE_HEADER void ACQUIRE_SPIN_LOCK(SpinLock * p) +{ + StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0); + if (RTS_UNLIKELY(r == 0)) + acquire_spin_lock_slow_path(p); +} + +// release spin lock +INLINE_HEADER void RELEASE_SPIN_LOCK(SpinLock * p) +{ + RELEASE_STORE(&p->lock, 1); +} + +// initialise spin lock +INLINE_HEADER void initSpinLock(SpinLock * p) +{ + IF_PROF_SPIN(p->spin = 0); + IF_PROF_SPIN(p->yield = 0); + RELEASE_STORE(&p->lock, 1); +} + +#else /* !THREADED_RTS */ + +// Using macros here means we don't have to ensure the argument is in scope +#define ACQUIRE_SPIN_LOCK(p) /* nothing */ +#define RELEASE_SPIN_LOCK(p) /* nothing */ + +INLINE_HEADER void initSpinLock(void * p STG_UNUSED) +{ /* nothing */ } + +#endif /* THREADED_RTS */ diff --git a/rts/include/rts/StableName.h b/rts/include/rts/StableName.h new file mode 100644 index 0000000000..4e4f976dae --- /dev/null +++ b/rts/include/rts/StableName.h @@ -0,0 +1,32 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * Stable Names + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +/* ----------------------------------------------------------------------------- + PRIVATE from here. + -------------------------------------------------------------------------- */ + +typedef struct { + StgPtr addr; // Haskell object when entry is in use, next free + // entry (NULL when this is the last free entry) + // otherwise. May be NULL temporarily during GC (when + // pointee dies). + + StgPtr old; // Old Haskell object, used during GC + + StgClosure *sn_obj; // The StableName object, or NULL when the entry is + // free +} snEntry; + +extern DLL_IMPORT_RTS snEntry *stable_name_table; diff --git a/rts/include/rts/StablePtr.h b/rts/include/rts/StablePtr.h new file mode 100644 index 0000000000..73cd5bed4d --- /dev/null +++ b/rts/include/rts/StablePtr.h @@ -0,0 +1,39 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * Stable Pointers + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +ATTR_ALWAYS_INLINE EXTERN_INLINE StgPtr deRefStablePtr (StgStablePtr stable_ptr); +StgStablePtr getStablePtr (StgPtr p); + +/* ----------------------------------------------------------------------------- + PRIVATE from here. + -------------------------------------------------------------------------- */ + +typedef struct { + StgPtr addr; // Haskell object when entry is in use, next free + // entry (NULL when this is the last free entry) + // otherwise. +} spEntry; + +extern DLL_IMPORT_RTS spEntry *stable_ptr_table; + +ATTR_ALWAYS_INLINE EXTERN_INLINE +StgPtr deRefStablePtr(StgStablePtr sp) +{ + // acquire load to ensure that we see the new SPT if it has been recently + // enlarged. + const spEntry *spt = ACQUIRE_LOAD(&stable_ptr_table); + // acquire load to ensure that the referenced object is visible. + return ACQUIRE_LOAD(&spt[(StgWord)sp].addr); +} diff --git a/rts/include/rts/StaticPtrTable.h b/rts/include/rts/StaticPtrTable.h new file mode 100644 index 0000000000..5753e957bd --- /dev/null +++ b/rts/include/rts/StaticPtrTable.h @@ -0,0 +1,44 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2008-2009 + * + * Initialization of the Static Pointer Table + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +/** Inserts an entry in the Static Pointer Table. + * + * The key is a fingerprint computed from the static pointer and the spe_closure + * is a pointer to the closure defining the table entry. + * + * A stable pointer to the closure is made to prevent it from being garbage + * collected while the entry exists on the table. + * + * This function is called from the code generated by + * compiler/deSugar/StaticPtrTable.sptInitCode + * + * */ +void hs_spt_insert (StgWord64 key[2],void* spe_closure); + +/** Inserts an entry for a StgTablePtr in the Static Pointer Table. + * + * This function is called from the GHCi interpreter to insert + * SPT entries for bytecode objects. + * + * */ +void hs_spt_insert_stableptr(StgWord64 key[2], StgStablePtr *entry); + +/** Removes an entry from the Static Pointer Table. + * + * This function is called from the code generated by + * compiler/deSugar/StaticPtrTable.sptInitCode + * + * */ +void hs_spt_remove (StgWord64 key[2]); diff --git a/rts/include/rts/TSANUtils.h b/rts/include/rts/TSANUtils.h new file mode 100644 index 0000000000..72f4541a89 --- /dev/null +++ b/rts/include/rts/TSANUtils.h @@ -0,0 +1,67 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 2006-2019 + * + * Utilities for annotating "safe" data races for Thread Sanitizer + * -------------------------------------------------------------------------- */ + +/* + * Note [ThreadSanitizer] + * ~~~~~~~~~~~~~~~~~~~~~~~ + * ThreadSanitizer (abbreviated TSAN) is a library and set of compiler + * instrumentation (supported by both GCC and Clang) for checking C/C++ code + * for data races. + * + * In GHC we use it to check the runtime system implementation (but not yet + * generated code). TSAN requires that the checked program uses C++11-style + * atomics for all potentially-racing accesses. Note that we use the __atomic_* + * builtin operations but not the C11 _Atomic types to maintain compatibility + * with older compilers. + * + * In addition to the atomic operations themselves, TSAN provides a variety of + * annotation operations which can be used to annotate cases where the + * intended semantics are either ambiguous or intentionally racy (known as a + * *benign race*). + * + * Finally, there are a few benign races which we can't easily annotate. To + * silence these errors we have a suppressions file in rts/.tsan-suppressions. + * In general it's best to add suppressions only as a last resort, when the + * more precise annotation functions prove to be insufficient. + * + * Users guide: https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual + */ + +#if defined(__SANITIZE_THREAD__) +#define TSAN_ENABLED +#elif defined(__has_feature) +#if __has_feature(thread_sanitizer) +#define TSAN_ENABLED +#endif +#endif + +#if defined(TSAN_ENABLED) +#if !defined(HAVE_C11_ATOMICS) +#error TSAN cannot be enabled without C11 atomics suppoort. +#endif + +#define TSAN_ANNOTATE_HAPPENS_BEFORE(addr) \ + AnnotateHappensBefore(__FILE__, __LINE__, (void*)(addr)) +#define TSAN_ANNOTATE_HAPPENS_AFTER(addr) \ + AnnotateHappensAfter(__FILE__, __LINE__, (void*)(addr)) +#define TSAN_ANNOTATE_BENIGN_RACE_SIZED(addr,size,desc) \ + AnnotateBenignRaceSized(__FILE__, __LINE__, (void*)(addr), size, desc) +void AnnotateHappensBefore(const char* f, int l, void* addr); +void AnnotateHappensAfter(const char* f, int l, void* addr); +void AnnotateBenignRaceSized(const char *file, + int line, + const volatile void *mem, + long size, + const char *description); +#else +#define TSAN_ANNOTATE_HAPPENS_BEFORE(addr) +#define TSAN_ANNOTATE_HAPPENS_AFTER(addr) +#define TSAN_ANNOTATE_BENIGN_RACE_SIZED(addr,size,desc) +#endif + +#define TSAN_ANNOTATE_BENIGN_RACE(addr,desc) \ + TSAN_ANNOTATE_BENIGN_RACE_SIZED((void*)(addr), sizeof(*addr), desc) diff --git a/rts/include/rts/TTY.h b/rts/include/rts/TTY.h new file mode 100644 index 0000000000..9892571a17 --- /dev/null +++ b/rts/include/rts/TTY.h @@ -0,0 +1,17 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2009 + * + * POSIX TTY-related functionality + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +void* __hscore_get_saved_termios(int fd); +void __hscore_set_saved_termios(int fd, void* ts); diff --git a/rts/include/rts/Threads.h b/rts/include/rts/Threads.h new file mode 100644 index 0000000000..51c11742ca --- /dev/null +++ b/rts/include/rts/Threads.h @@ -0,0 +1,79 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team 1998-2009 + * + * External API for the scheduler. For most uses, the functions in + * RtsAPI.h should be enough. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#if defined(HAVE_SYS_TYPES_H) +#include <sys/types.h> +#endif + +// +// Creating threads +// +StgTSO *createThread (Capability *cap, W_ stack_size); + +// precondition: +// (*cap)->running_task != NULL +// (*cap)->running_task must be a bound task (e.g. newBoundTask() has been +// called on that thread). +void scheduleWaitThread (/* in */ StgTSO *tso, + /* out */ HaskellObj* ret, + /* inout */ Capability **cap); + +StgTSO *createGenThread (Capability *cap, W_ stack_size, + StgClosure *closure); +StgTSO *createIOThread (Capability *cap, W_ stack_size, + StgClosure *closure); +StgTSO *createStrictIOThread (Capability *cap, W_ stack_size, + StgClosure *closure); + +// Suspending/resuming threads around foreign calls +void * suspendThread (StgRegTable *, bool interruptible); +StgRegTable * resumeThread (void *); + +// +// Thread operations from Threads.c +// +bool eq_thread (StgPtr tso1, StgPtr tso2); +int cmp_thread (StgPtr tso1, StgPtr tso2); +long rts_getThreadId (StgPtr tso); +void rts_enableThreadAllocationLimit (StgPtr tso); +void rts_disableThreadAllocationLimit (StgPtr tso); + +#if !defined(mingw32_HOST_OS) +pid_t forkProcess (HsStablePtr *entry); +#else +pid_t forkProcess (HsStablePtr *entry) + GNU_ATTRIBUTE(__noreturn__); +#endif + +HsBool rtsSupportsBoundThreads (void); + +// The number of Capabilities. +// ToDo: I would like this to be private to the RTS and instead expose a +// function getNumCapabilities(), but it is used in compiler/cbits/genSym.c +extern unsigned int n_capabilities; + +// The number of Capabilities that are not disabled +extern uint32_t enabled_capabilities; + +#if !IN_STG_CODE +extern Capability MainCapability; +#endif + +// +// Change the number of capabilities (only supports increasing the +// current value at the moment). +// +extern void setNumCapabilities (uint32_t new_); diff --git a/rts/include/rts/Ticky.h b/rts/include/rts/Ticky.h new file mode 100644 index 0000000000..93043d8514 --- /dev/null +++ b/rts/include/rts/Ticky.h @@ -0,0 +1,32 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * TICKY_TICKY types + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +/* ----------------------------------------------------------------------------- + The StgEntCounter type - needed regardless of TICKY_TICKY + -------------------------------------------------------------------------- */ + +typedef struct _StgEntCounter { + /* Using StgWord for everything, because both the C and asm code + generators make trouble if you try to pack things tighter */ + StgWord registeredp; /* 0 == no, 1 == yes */ + StgInt arity; /* arity (static info) */ + StgInt allocd; /* # allocation of this closure */ + /* (rest of args are in registers) */ + char *str; /* name of the thing */ + char *arg_kinds; /* info about the args types */ + StgInt entry_count; /* Trips to fast entry code */ + StgInt allocs; /* number of allocations by this fun */ + struct _StgEntCounter *link;/* link to chain them all together */ +} StgEntCounter; diff --git a/rts/include/rts/Time.h b/rts/include/rts/Time.h new file mode 100644 index 0000000000..ab291cd6c5 --- /dev/null +++ b/rts/include/rts/Time.h @@ -0,0 +1,45 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2004 + * + * Time values in the RTS + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * --------------------------------------------------------------------------*/ + +#pragma once + +// For most time values in the RTS we use a fixed resolution of nanoseconds, +// normalising the time we get from platform-dependent APIs to this +// resolution. +#define TIME_RESOLUTION 1000000000 +typedef int64_t Time; + +#define TIME_MAX HS_INT64_MAX + +#if TIME_RESOLUTION == 1000000000 +// I'm being lazy, but it's awkward to define fully general versions of these +#define TimeToMS(t) ((t) / 1000000) +#define TimeToUS(t) ((t) / 1000) +#define TimeToNS(t) (t) +#define MSToTime(t) ((Time)(t) * 1000000) +#define USToTime(t) ((Time)(t) * 1000) +#define NSToTime(t) ((Time)(t)) +#else +#error Fix TimeToNS(), TimeToUS() etc. +#endif + +#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION) +#define TimeToSeconds(t) ((t) / TIME_RESOLUTION) +#define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION) + +// Use instead of SecondsToTime() when we have a floating-point +// seconds value, to avoid truncating it. +INLINE_HEADER Time fsecondsToTime (double t) +{ + return (Time)(t * TIME_RESOLUTION); +} + +Time getProcessElapsedTime (void); diff --git a/rts/include/rts/Timer.h b/rts/include/rts/Timer.h new file mode 100644 index 0000000000..c60cd37590 --- /dev/null +++ b/rts/include/rts/Timer.h @@ -0,0 +1,18 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1995-2009 + * + * Interface to the RTS timer signal (uses OS-dependent Ticker.h underneath) + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +void startTimer (void); +void stopTimer (void); +int rtsTimerSignal (void); diff --git a/rts/include/rts/Types.h b/rts/include/rts/Types.h new file mode 100644 index 0000000000..51e8f80e66 --- /dev/null +++ b/rts/include/rts/Types.h @@ -0,0 +1,31 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * RTS-specific types. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#include <stddef.h> +#include <stdbool.h> + +// Deprecated, use uint32_t instead. +typedef unsigned int nat __attribute__((deprecated)); /* uint32_t */ + +/* ullong (64|128-bit) type: only include if needed (not ANSI) */ +#if defined(__GNUC__) +#define LL(x) (x##LL) +#else +#define LL(x) (x##L) +#endif + +typedef struct StgClosure_ StgClosure; +typedef struct StgInfoTable_ StgInfoTable; +typedef struct StgTSO_ StgTSO; diff --git a/rts/include/rts/Utils.h b/rts/include/rts/Utils.h new file mode 100644 index 0000000000..4aee9c3a67 --- /dev/null +++ b/rts/include/rts/Utils.h @@ -0,0 +1,16 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * RTS external APIs. This file declares everything that the GHC RTS + * exposes externally. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +/* Alternate to raise(3) for threaded rts, for BSD-based OSes */ +int genericRaise(int sig); diff --git a/rts/include/rts/prof/CCS.h b/rts/include/rts/prof/CCS.h new file mode 100644 index 0000000000..7685f03003 --- /dev/null +++ b/rts/include/rts/prof/CCS.h @@ -0,0 +1,226 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2009-2012 + * + * Macros for profiling operations in STG code + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +/* ----------------------------------------------------------------------------- + * Data Structures + * ---------------------------------------------------------------------------*/ +/* + * Note [struct alignment] + * NB. be careful to avoid unwanted padding between fields, by + * putting the 8-byte fields on an 8-byte boundary. Padding can + * vary between C compilers, and we don't take into account any + * possible padding when generating CCS and CC decls in the code + * generator (GHC.StgToCmm.Prof). + */ + +typedef struct CostCentre_ { + StgInt ccID; // Unique Id, allocated by the RTS + + char * label; + char * module; + char * srcloc; + + // used for accumulating costs at the end of the run... + StgWord64 mem_alloc; // align 8 (Note [struct alignment]) + StgWord time_ticks; + + StgBool is_caf; // true <=> CAF cost centre + + struct CostCentre_ *link; +} CostCentre; + +typedef struct CostCentreStack_ { + StgInt ccsID; // unique ID, allocated by the RTS + + CostCentre *cc; // Cost centre at the top of the stack + + struct CostCentreStack_ *prevStack; // parent + struct IndexTable_ *indexTable; // children + struct CostCentreStack_ *root; // root of stack + StgWord depth; // number of items in the stack + + StgWord64 scc_count; // Count of times this CCS is entered + // align 8 (Note [struct alignment]) + + StgWord selected; // is this CCS shown in the heap + // profile? (zero if excluded via -hc + // -hm etc.) + + StgWord time_ticks; // number of time ticks accumulated by + // this CCS + + StgWord64 mem_alloc; // mem allocated by this CCS + // align 8 (Note [struct alignment]) + + StgWord64 inherited_alloc; // sum of mem_alloc over all children + // (calculated at the end) + // align 8 (Note [struct alignment]) + + StgWord inherited_ticks; // sum of time_ticks over all children + // (calculated at the end) +} CostCentreStack; + + +/* ----------------------------------------------------------------------------- + * Start and stop the profiling timer. These can be called from + * Haskell to restrict the profile to portion(s) of the execution. + * See the module GHC.Profiling. + * ---------------------------------------------------------------------------*/ + +void stopProfTimer ( void ); +void startProfTimer ( void ); + +/* ----------------------------------------------------------------------------- + * The rest is PROFILING only... + * ---------------------------------------------------------------------------*/ + +#if defined(PROFILING) + +/* ----------------------------------------------------------------------------- + * Constants + * ---------------------------------------------------------------------------*/ + +#define EMPTY_STACK NULL +#define EMPTY_TABLE NULL + +/* Constants used to set is_caf flag on CostCentres */ +#define CC_IS_CAF true +#define CC_NOT_CAF false +/* ----------------------------------------------------------------------------- + * Data Structures + * ---------------------------------------------------------------------------*/ + +// IndexTable is the list of children of a CCS. (Alternatively it is a +// cache of the results of pushing onto a CCS, so that the second and +// subsequent times we push a certain CC on a CCS we get the same +// result). + +typedef struct IndexTable_ { + // Just a linked list of (cc, ccs) pairs, where the `ccs` is the result of + // pushing `cc` to the owner of the index table (another CostCentreStack). + CostCentre *cc; + CostCentreStack *ccs; + struct IndexTable_ *next; + // back_edge is true when `cc` is already in the stack, so pushing it + // truncates or drops (see RECURSION_DROPS and RECURSION_TRUNCATES in + // Profiling.c). + bool back_edge; +} IndexTable; + + +/* ----------------------------------------------------------------------------- + Pre-defined cost centres and cost centre stacks + -------------------------------------------------------------------------- */ + +#if IN_STG_CODE + +extern StgWord CC_MAIN[]; +extern StgWord CCS_MAIN[]; // Top CCS + +extern StgWord CC_SYSTEM[]; +extern StgWord CCS_SYSTEM[]; // RTS costs + +extern StgWord CC_GC[]; +extern StgWord CCS_GC[]; // Garbage collector costs + +extern StgWord CC_OVERHEAD[]; +extern StgWord CCS_OVERHEAD[]; // Profiling overhead + +extern StgWord CC_DONT_CARE[]; +extern StgWord CCS_DONT_CARE[]; // CCS attached to static constructors + +#else + +extern CostCentre CC_MAIN[]; +extern CostCentreStack CCS_MAIN[]; // Top CCS + +extern CostCentre CC_SYSTEM[]; +extern CostCentreStack CCS_SYSTEM[]; // RTS costs + +extern CostCentre CC_GC[]; +extern CostCentreStack CCS_GC[]; // Garbage collector costs + +extern CostCentre CC_OVERHEAD[]; +extern CostCentreStack CCS_OVERHEAD[]; // Profiling overhead + +extern CostCentre CC_DONT_CARE[]; +extern CostCentreStack CCS_DONT_CARE[]; // shouldn't ever get set + +extern CostCentre CC_PINNED[]; +extern CostCentreStack CCS_PINNED[]; // pinned memory + +extern CostCentre CC_IDLE[]; +extern CostCentreStack CCS_IDLE[]; // capability is idle + +#endif /* IN_STG_CODE */ + +extern unsigned int RTS_VAR(era); + +/* ----------------------------------------------------------------------------- + * Functions + * ---------------------------------------------------------------------------*/ + +CostCentreStack * pushCostCentre (CostCentreStack *, CostCentre *); +void enterFunCCS (StgRegTable *reg, CostCentreStack *); +CostCentre *mkCostCentre (char *label, char *module, char *srcloc); + +extern CostCentre * RTS_VAR(CC_LIST); // registered CC list + +/* ----------------------------------------------------------------------------- + * Declaring Cost Centres & Cost Centre Stacks. + * -------------------------------------------------------------------------- */ + +# define CC_DECLARE(cc_ident,name,mod,loc,caf,is_local) \ + is_local CostCentre cc_ident[1] \ + = {{ .ccID = 0, \ + .label = name, \ + .module = mod, \ + .srcloc = loc, \ + .time_ticks = 0, \ + .mem_alloc = 0, \ + .link = 0, \ + .is_caf = caf \ + }}; + +# define CCS_DECLARE(ccs_ident,cc_ident,is_local) \ + is_local CostCentreStack ccs_ident[1] \ + = {{ .ccsID = 0, \ + .cc = cc_ident, \ + .prevStack = NULL, \ + .indexTable = NULL, \ + .root = NULL, \ + .depth = 0, \ + .selected = 0, \ + .scc_count = 0, \ + .time_ticks = 0, \ + .mem_alloc = 0, \ + .inherited_ticks = 0, \ + .inherited_alloc = 0 \ + }}; + +/* ----------------------------------------------------------------------------- + * Time / Allocation Macros + * ---------------------------------------------------------------------------*/ + +/* eliminate profiling overhead from allocation costs */ +#define CCS_ALLOC(ccs, size) (ccs)->mem_alloc += ((size)-sizeofW(StgProfHeader)) +#define ENTER_CCS_THUNK(cap,p) cap->r.rCCCS = p->header.prof.ccs + +#else /* !PROFILING */ + +#define CCS_ALLOC(ccs, amount) doNothing() +#define ENTER_CCS_THUNK(cap,p) doNothing() + +#endif /* PROFILING */ diff --git a/rts/include/rts/prof/Heap.h b/rts/include/rts/prof/Heap.h new file mode 100644 index 0000000000..90700c809b --- /dev/null +++ b/rts/include/rts/prof/Heap.h @@ -0,0 +1,24 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The University of Glasgow, 2009 + * + * Heap Census Profiling + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +/* ----------------------------------------------------------------------------- + * Fine-grained control over heap census profiling which can be called from + * Haskell to restrict the profile to portion(s) of the execution. + * See the module GHC.Profiling. + * ---------------------------------------------------------------------------*/ + +void requestHeapCensus ( void ); +void startHeapProfTimer ( void ); +void stopHeapProfTimer ( void ); diff --git a/rts/include/rts/prof/LDV.h b/rts/include/rts/prof/LDV.h new file mode 100644 index 0000000000..73f7786537 --- /dev/null +++ b/rts/include/rts/prof/LDV.h @@ -0,0 +1,44 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The University of Glasgow, 2009 + * + * Lag/Drag/Void profiling. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#if defined(PROFILING) + +/* retrieves the LDV word from closure c */ +#define LDVW(c) (((StgClosure *)(c))->header.prof.hp.ldvw) + +/* + * Stores the creation time for closure c. + * This macro is called at the very moment of closure creation. + * + * NOTE: this initializes LDVW(c) to zero, which ensures that there + * is no conflict between retainer profiling and LDV profiling, + * because retainer profiling also expects LDVW(c) to be initialised + * to zero. + */ + +#if defined(CMINUSMINUS) + +#else + +#define LDV_RECORD_CREATE(c) \ + LDVW((c)) = ((StgWord)RTS_DEREF(era) << LDV_SHIFT) | LDV_STATE_CREATE + +#endif + +#else /* !PROFILING */ + +#define LDV_RECORD_CREATE(c) /* nothing */ + +#endif /* PROFILING */ diff --git a/rts/include/rts/storage/Block.h b/rts/include/rts/storage/Block.h new file mode 100644 index 0000000000..730947e375 --- /dev/null +++ b/rts/include/rts/storage/Block.h @@ -0,0 +1,368 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-1999 + * + * Block structure for the storage manager + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#include "ghcconfig.h" + +/* The actual block and megablock-size constants are defined in + * rts/include/Constants.h, all constants here are derived from these. + */ + +/* Block related constants (BLOCK_SHIFT is defined in Constants.h) */ + +#if SIZEOF_LONG == SIZEOF_VOID_P +#define UNIT 1UL +#elif SIZEOF_LONG_LONG == SIZEOF_VOID_P +#define UNIT 1ULL +#else +#error "Size of pointer is suspicious." +#endif + +#if defined(CMINUSMINUS) +#define BLOCK_SIZE (1<<BLOCK_SHIFT) +#else +#define BLOCK_SIZE (UNIT<<BLOCK_SHIFT) +// Note [integer overflow] +#endif + +#define BLOCK_SIZE_W (BLOCK_SIZE/sizeof(W_)) +#define BLOCK_MASK (BLOCK_SIZE-1) + +#define BLOCK_ROUND_UP(p) (((W_)(p)+BLOCK_SIZE-1) & ~BLOCK_MASK) +#define BLOCK_ROUND_DOWN(p) ((void *) ((W_)(p) & ~BLOCK_MASK)) + +/* Megablock related constants (MBLOCK_SHIFT is defined in Constants.h) */ + +#if defined(CMINUSMINUS) +#define MBLOCK_SIZE (1<<MBLOCK_SHIFT) +#else +#define MBLOCK_SIZE (UNIT<<MBLOCK_SHIFT) +// Note [integer overflow] +#endif + +#define MBLOCK_SIZE_W (MBLOCK_SIZE/sizeof(W_)) +#define MBLOCK_MASK (MBLOCK_SIZE-1) + +#define MBLOCK_ROUND_UP(p) ((void *)(((W_)(p)+MBLOCK_SIZE-1) & ~MBLOCK_MASK)) +#define MBLOCK_ROUND_DOWN(p) ((void *)((W_)(p) & ~MBLOCK_MASK )) + +/* The largest size an object can be before we give it a block of its + * own and treat it as an immovable object during GC, expressed as a + * fraction of BLOCK_SIZE. + */ +#define LARGE_OBJECT_THRESHOLD ((uint32_t)(BLOCK_SIZE * 8 / 10)) + +/* + * Note [integer overflow] + * + * The UL suffix in BLOCK_SIZE and MBLOCK_SIZE promotes the expression + * to an unsigned long, which means that expressions involving these + * will be promoted to unsigned long, which makes integer overflow + * less likely. Historically, integer overflow in expressions like + * (n * BLOCK_SIZE) + * where n is int or unsigned int, have caused obscure segfaults in + * programs that use large amounts of memory (e.g. #7762, #5086). + */ + +/* ----------------------------------------------------------------------------- + * Block descriptor. This structure *must* be the right length, so we + * can do pointer arithmetic on pointers to it. + */ + +/* The block descriptor is 64 bytes on a 64-bit machine, and 32-bytes + * on a 32-bit machine. + */ + +// Note: fields marked with [READ ONLY] must not be modified by the +// client of the block allocator API. All other fields can be +// freely modified. + +#if !defined(CMINUSMINUS) + + +struct NonmovingSegmentInfo { + StgWord8 log_block_size; + StgWord16 next_free_snap; +}; + +typedef struct bdescr_ { + + StgPtr start; // [READ ONLY] start addr of memory + + + union { + StgPtr free; // First free byte of memory. + // allocGroup() sets this to the value of start. + // NB. during use this value should lie + // between start and start + blocks * + // BLOCK_SIZE. Values outside this + // range are reserved for use by the + // block allocator. In particular, the + // value (StgPtr)(-1) is used to + // indicate that a block is unallocated. + // + // Unused by the non-moving allocator. + struct NonmovingSegmentInfo nonmoving_segment; + }; + + struct bdescr_ *link; // used for chaining blocks together + + union { + struct bdescr_ *back; // used (occasionally) for doubly-linked lists + StgWord *bitmap; // bitmap for marking GC + StgPtr scan; // scan pointer for copying GC + } u; + + struct generation_ *gen; // generation + + StgWord16 gen_no; // gen->no, cached + StgWord16 dest_no; // number of destination generation + StgWord16 node; // which memory node does this block live on? + + StgWord16 flags; // block flags, see below + + StgWord32 blocks; // [READ ONLY] no. of blocks in a group + // (if group head, 0 otherwise) + +#if SIZEOF_VOID_P == 8 + StgWord32 _padding[3]; +#else + StgWord32 _padding[0]; +#endif +} bdescr; +#endif + +#if SIZEOF_VOID_P == 8 +#define BDESCR_SIZE 0x40 +#define BDESCR_MASK 0x3f +#define BDESCR_SHIFT 6 +#else +#define BDESCR_SIZE 0x20 +#define BDESCR_MASK 0x1f +#define BDESCR_SHIFT 5 +#endif + +/* Block contains objects evacuated during this GC */ +#define BF_EVACUATED 1 +/* Block is a large object */ +#define BF_LARGE 2 +/* Block is pinned */ +#define BF_PINNED 4 +/* Block is to be marked, not copied. Also used for marked large objects in + * non-moving heap. */ +#define BF_MARKED 8 +/* Block is executable */ +#define BF_EXEC 32 +/* Block contains only a small amount of live data */ +#define BF_FRAGMENTED 64 +/* we know about this block (for finding leaks) */ +#define BF_KNOWN 128 +/* Block was swept in the last generation */ +#define BF_SWEPT 256 +/* Block is part of a Compact */ +#define BF_COMPACT 512 +/* A non-moving allocator segment (see NonMoving.c) */ +#define BF_NONMOVING 1024 +/* A large object which has been moved to off of oldest_gen->large_objects and + * onto nonmoving_large_objects. The mark phase ignores objects which aren't + * so-flagged */ +#define BF_NONMOVING_SWEEPING 2048 +/* Maximum flag value (do not define anything higher than this!) */ +#define BF_FLAG_MAX (1 << 15) + +/* Finding the block descriptor for a given block -------------------------- */ + +#if defined(CMINUSMINUS) + +#define Bdescr(p) \ + ((((p) & MBLOCK_MASK & ~BLOCK_MASK) >> (BLOCK_SHIFT-BDESCR_SHIFT)) \ + | ((p) & ~MBLOCK_MASK)) + +#else + +EXTERN_INLINE bdescr *Bdescr(StgPtr p); +EXTERN_INLINE bdescr *Bdescr(StgPtr p) +{ + return (bdescr *) + ((((W_)p & MBLOCK_MASK & ~BLOCK_MASK) >> (BLOCK_SHIFT-BDESCR_SHIFT)) + | ((W_)p & ~MBLOCK_MASK) + ); +} + +#endif + +/* Useful Macros ------------------------------------------------------------ */ + +/* Offset of first real data block in a megablock */ + +#define FIRST_BLOCK_OFF \ + ((W_)BLOCK_ROUND_UP(BDESCR_SIZE * (MBLOCK_SIZE / BLOCK_SIZE))) + +/* First data block in a given megablock */ + +#define FIRST_BLOCK(m) ((void *)(FIRST_BLOCK_OFF + (W_)(m))) + +/* Last data block in a given megablock */ + +#define LAST_BLOCK(m) ((void *)(MBLOCK_SIZE-BLOCK_SIZE + (W_)(m))) + +/* First real block descriptor in a megablock */ + +#define FIRST_BDESCR(m) \ + ((bdescr *)((FIRST_BLOCK_OFF>>(BLOCK_SHIFT-BDESCR_SHIFT)) + (W_)(m))) + +/* Last real block descriptor in a megablock */ + +#define LAST_BDESCR(m) \ + ((bdescr *)(((MBLOCK_SIZE-BLOCK_SIZE)>>(BLOCK_SHIFT-BDESCR_SHIFT)) + (W_)(m))) + +/* Number of usable blocks in a megablock */ + +#if !defined(CMINUSMINUS) // already defined in DerivedConstants.h +#define BLOCKS_PER_MBLOCK ((MBLOCK_SIZE - FIRST_BLOCK_OFF) / BLOCK_SIZE) +#endif + +/* How many blocks in this megablock group */ + +#define MBLOCK_GROUP_BLOCKS(n) \ + (BLOCKS_PER_MBLOCK + (n-1) * (MBLOCK_SIZE / BLOCK_SIZE)) + +/* Compute the required size of a megablock group */ + +#define BLOCKS_TO_MBLOCKS(n) \ + (1 + (W_)MBLOCK_ROUND_UP((n-BLOCKS_PER_MBLOCK) * BLOCK_SIZE) / MBLOCK_SIZE) + + +#if !defined(CMINUSMINUS) +/* to the end... */ + +/* Double-linked block lists: --------------------------------------------- */ + +INLINE_HEADER void +dbl_link_onto(bdescr *bd, bdescr **list) +{ + bd->link = *list; + bd->u.back = NULL; + if (*list) { + (*list)->u.back = bd; /* double-link the list */ + } + *list = bd; +} + +INLINE_HEADER void +dbl_link_remove(bdescr *bd, bdescr **list) +{ + if (bd->u.back) { + bd->u.back->link = bd->link; + } else { + *list = bd->link; + } + if (bd->link) { + bd->link->u.back = bd->u.back; + } +} + +INLINE_HEADER void +dbl_link_insert_after(bdescr *bd, bdescr *after) +{ + bd->link = after->link; + bd->u.back = after; + if (after->link) { + after->link->u.back = bd; + } + after->link = bd; +} + +INLINE_HEADER void +dbl_link_replace(bdescr *new_, bdescr *old, bdescr **list) +{ + new_->link = old->link; + new_->u.back = old->u.back; + if (old->link) { + old->link->u.back = new_; + } + if (old->u.back) { + old->u.back->link = new_; + } else { + *list = new_; + } +} + +/* Initialisation ---------------------------------------------------------- */ + +extern void initBlockAllocator(void); + +/* Allocation -------------------------------------------------------------- */ + +bdescr *allocGroup(W_ n); + +EXTERN_INLINE bdescr* allocBlock(void); +EXTERN_INLINE bdescr* allocBlock(void) +{ + return allocGroup(1); +} + +bdescr *allocGroupOnNode(uint32_t node, W_ n); + +// Allocate n blocks, aligned at n-block boundary. The returned bdescr will +// have this invariant +// +// bdescr->start % BLOCK_SIZE*n == 0 +// +bdescr *allocAlignedGroupOnNode(uint32_t node, W_ n); + +EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node); +EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node) +{ + return allocGroupOnNode(node,1); +} + +// versions that take the storage manager lock for you: +bdescr *allocGroup_lock(W_ n); +bdescr *allocBlock_lock(void); + +bdescr *allocGroupOnNode_lock(uint32_t node, W_ n); +bdescr *allocBlockOnNode_lock(uint32_t node); + +/* De-Allocation ----------------------------------------------------------- */ + +void freeGroup(bdescr *p); +void freeChain(bdescr *p); + +// versions that take the storage manager lock for you: +void freeGroup_lock(bdescr *p); +void freeChain_lock(bdescr *p); + +bdescr * splitBlockGroup (bdescr *bd, uint32_t blocks); + +/* Round a value to megablocks --------------------------------------------- */ + +// We want to allocate an object around a given size, round it up or +// down to the nearest size that will fit in an mblock group. +INLINE_HEADER StgWord +round_to_mblocks(StgWord words) +{ + if (words > BLOCKS_PER_MBLOCK * BLOCK_SIZE_W) { + // first, ignore the gap at the beginning of the first mblock by + // adding it to the total words. Then we can pretend we're + // dealing in a uniform unit of megablocks. + words += FIRST_BLOCK_OFF/sizeof(W_); + + if ((words % MBLOCK_SIZE_W) < (MBLOCK_SIZE_W / 2)) { + words = (words / MBLOCK_SIZE_W) * MBLOCK_SIZE_W; + } else { + words = ((words / MBLOCK_SIZE_W) + 1) * MBLOCK_SIZE_W; + } + + words -= FIRST_BLOCK_OFF/sizeof(W_); + } + return words; +} + +#endif /* !CMINUSMINUS */ diff --git a/rts/include/rts/storage/ClosureMacros.h b/rts/include/rts/storage/ClosureMacros.h new file mode 100644 index 0000000000..b841ef8be0 --- /dev/null +++ b/rts/include/rts/storage/ClosureMacros.h @@ -0,0 +1,645 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2012 + * + * Macros for building and manipulating closures + * + * -------------------------------------------------------------------------- */ + +#pragma once + +/* ----------------------------------------------------------------------------- + Info tables are slammed up against the entry code, and the label + for the info table is at the *end* of the table itself. This + inline function adjusts an info pointer to point to the beginning + of the table, so we can use standard C structure indexing on it. + + Note: this works for SRT info tables as long as you don't want to + access the SRT, since they are laid out the same with the SRT + pointer as the first word in the table. + + NOTES ABOUT MANGLED C VS. MINI-INTERPRETER: + + A couple of definitions: + + "info pointer" The first word of the closure. Might point + to either the end or the beginning of the + info table, depending on whether we're using + the mini interpreter or not. GET_INFO(c) + retrieves the info pointer of a closure. + + "info table" The info table structure associated with a + closure. This is always a pointer to the + beginning of the structure, so we can + use standard C structure indexing to pull out + the fields. get_itbl(c) returns a pointer to + the info table for closure c. + + An address of the form xxxx_info points to the end of the info + table or the beginning of the info table depending on whether we're + mangling or not respectively. So, + + c->header.info = xxx_info + + makes absolute sense, whether mangling or not. + + -------------------------------------------------------------------------- */ + +INLINE_HEADER void SET_INFO(StgClosure *c, const StgInfoTable *info) { + RELAXED_STORE(&c->header.info, info); +} +INLINE_HEADER void SET_INFO_RELEASE(StgClosure *c, const StgInfoTable *info) { + RELEASE_STORE(&c->header.info, info); +} +INLINE_HEADER const StgInfoTable *GET_INFO(StgClosure *c) { + return RELAXED_LOAD(&c->header.info); +} + +#if defined(TABLES_NEXT_TO_CODE) +EXTERN_INLINE StgInfoTable *INFO_PTR_TO_STRUCT(const StgInfoTable *info); +EXTERN_INLINE StgInfoTable *INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgInfoTable *)info - 1;} +EXTERN_INLINE StgRetInfoTable *RET_INFO_PTR_TO_STRUCT(const StgInfoTable *info); +EXTERN_INLINE StgRetInfoTable *RET_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgRetInfoTable *)info - 1;} +INLINE_HEADER StgFunInfoTable *FUN_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgFunInfoTable *)info - 1;} +INLINE_HEADER StgThunkInfoTable *THUNK_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgThunkInfoTable *)info - 1;} +INLINE_HEADER StgConInfoTable *CON_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgConInfoTable *)info - 1;} +INLINE_HEADER StgFunInfoTable *itbl_to_fun_itbl(const StgInfoTable *i) {return (StgFunInfoTable *)(i + 1) - 1;} +INLINE_HEADER StgRetInfoTable *itbl_to_ret_itbl(const StgInfoTable *i) {return (StgRetInfoTable *)(i + 1) - 1;} +INLINE_HEADER StgThunkInfoTable *itbl_to_thunk_itbl(const StgInfoTable *i) {return (StgThunkInfoTable *)(i + 1) - 1;} +INLINE_HEADER StgConInfoTable *itbl_to_con_itbl(const StgInfoTable *i) {return (StgConInfoTable *)(i + 1) - 1;} +#else +EXTERN_INLINE StgInfoTable *INFO_PTR_TO_STRUCT(const StgInfoTable *info); +EXTERN_INLINE StgInfoTable *INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgInfoTable *)info;} +EXTERN_INLINE StgRetInfoTable *RET_INFO_PTR_TO_STRUCT(const StgInfoTable *info); +EXTERN_INLINE StgRetInfoTable *RET_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgRetInfoTable *)info;} +INLINE_HEADER StgFunInfoTable *FUN_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgFunInfoTable *)info;} +INLINE_HEADER StgThunkInfoTable *THUNK_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgThunkInfoTable *)info;} +INLINE_HEADER StgConInfoTable *CON_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgConInfoTable *)info;} +INLINE_HEADER StgFunInfoTable *itbl_to_fun_itbl(const StgInfoTable *i) {return (StgFunInfoTable *)i;} +INLINE_HEADER StgRetInfoTable *itbl_to_ret_itbl(const StgInfoTable *i) {return (StgRetInfoTable *)i;} +INLINE_HEADER StgThunkInfoTable *itbl_to_thunk_itbl(const StgInfoTable *i) {return (StgThunkInfoTable *)i;} +INLINE_HEADER StgConInfoTable *itbl_to_con_itbl(const StgInfoTable *i) {return (StgConInfoTable *)i;} +#endif + +EXTERN_INLINE const StgInfoTable *get_itbl(const StgClosure *c); +EXTERN_INLINE const StgInfoTable *get_itbl(const StgClosure *c) +{ + return INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info)); +} + +EXTERN_INLINE const StgRetInfoTable *get_ret_itbl(const StgClosure *c); +EXTERN_INLINE const StgRetInfoTable *get_ret_itbl(const StgClosure *c) +{ + return RET_INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info)); +} + +INLINE_HEADER const StgFunInfoTable *get_fun_itbl(const StgClosure *c) +{ + return FUN_INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info)); +} + +INLINE_HEADER const StgThunkInfoTable *get_thunk_itbl(const StgClosure *c) +{ + return THUNK_INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info)); +} + +INLINE_HEADER const StgConInfoTable *get_con_itbl(const StgClosure *c) +{ + return CON_INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info)); +} + +INLINE_HEADER StgHalfWord GET_TAG(const StgClosure *con) +{ + return get_itbl(con)->srt; +} + +/* ----------------------------------------------------------------------------- + Macros for building closures + -------------------------------------------------------------------------- */ + +#if defined(PROFILING) +/* + The following macro works for both retainer profiling and LDV profiling. For + retainer profiling, 'era' remains 0, so by setting the 'ldvw' field we also set + 'rs' to zero. + + Note that we don't have to bother handling the 'flip' bit properly[1] since the + retainer profiling code will just set 'rs' to NULL upon visiting a closure with + an invalid 'flip' bit anyways. + + See Note [Profiling heap traversal visited bit] for details. + + [1]: Technically we should set 'rs' to `NULL | flip`. + */ +#define SET_PROF_HDR(c,ccs_) \ + ((c)->header.prof.ccs = ccs_, \ + LDV_RECORD_CREATE((c))) +#else +#define SET_PROF_HDR(c,ccs) +#endif + +#define SET_HDR(c,_info,ccs) \ + { \ + SET_PROF_HDR((StgClosure *)(c),ccs); \ + RELAXED_STORE(&(c)->header.info, _info); \ + } + +#define SET_HDR_RELEASE(c,_info,ccs) \ + { \ + SET_PROF_HDR((StgClosure *)(c),ccs); \ + RELEASE_STORE(&(c)->header.info, _info); \ + } + +#define SET_ARR_HDR(c,info,costCentreStack,n_bytes) \ + (c)->bytes = n_bytes; \ + SET_HDR(c,info,costCentreStack); + +// Use when changing a closure from one kind to another +#define OVERWRITE_INFO(c, new_info) \ + OVERWRITING_CLOSURE((StgClosure *)(c)); \ + SET_INFO((StgClosure *)(c), (new_info)); \ + LDV_RECORD_CREATE(c); + +/* ----------------------------------------------------------------------------- + How to get hold of the static link field for a static closure. + -------------------------------------------------------------------------- */ + +/* These are hard-coded. */ +#define THUNK_STATIC_LINK(p) (&(p)->payload[1]) +#define IND_STATIC_LINK(p) (&(p)->payload[1]) + +INLINE_HEADER StgClosure ** +STATIC_LINK(const StgInfoTable *info, StgClosure *p) +{ + switch (info->type) { + case THUNK_STATIC: + return THUNK_STATIC_LINK(p); + case IND_STATIC: + return IND_STATIC_LINK(p); + default: + return &p->payload[info->layout.payload.ptrs + + info->layout.payload.nptrs]; + } +} + +/* ----------------------------------------------------------------------------- + INTLIKE and CHARLIKE closures. + -------------------------------------------------------------------------- */ + +INLINE_HEADER P_ CHARLIKE_CLOSURE(int n) { + return (P_)&stg_CHARLIKE_closure[(n)-MIN_CHARLIKE]; +} +INLINE_HEADER P_ INTLIKE_CLOSURE(int n) { + return (P_)&stg_INTLIKE_closure[(n)-MIN_INTLIKE]; +} + +/* ---------------------------------------------------------------------------- + Macros for untagging and retagging closure pointers + For more information look at the comments in Cmm.h + ------------------------------------------------------------------------- */ + +static inline StgWord +GET_CLOSURE_TAG(const StgClosure * p) +{ + return (StgWord)p & TAG_MASK; +} + +static inline StgClosure * +UNTAG_CLOSURE(StgClosure * p) +{ + return (StgClosure*)((StgWord)p & ~TAG_MASK); +} + +static inline const StgClosure * +UNTAG_CONST_CLOSURE(const StgClosure * p) +{ + return (const StgClosure*)((StgWord)p & ~TAG_MASK); +} + +static inline StgClosure * +TAG_CLOSURE(StgWord tag,StgClosure * p) +{ + return (StgClosure*)((StgWord)p | tag); +} + +/* ----------------------------------------------------------------------------- + Forwarding pointers + -------------------------------------------------------------------------- */ + +#define IS_FORWARDING_PTR(p) ((((StgWord)p) & 1) != 0) +#define MK_FORWARDING_PTR(p) (((StgWord)p) | 1) +#define UN_FORWARDING_PTR(p) (((StgWord)p) - 1) + +/* ----------------------------------------------------------------------------- + DEBUGGING predicates for pointers + + LOOKS_LIKE_INFO_PTR(p) returns False if p is definitely not an info ptr + LOOKS_LIKE_CLOSURE_PTR(p) returns False if p is definitely not a closure ptr + + These macros are complete but not sound. That is, they might + return false positives. Do not rely on them to distinguish info + pointers from closure pointers, for example. + + We don't use address-space predicates these days, for portability + reasons, and the fact that code/data can be scattered about the + address space in a dynamically-linked environment. Our best option + is to look at the alleged info table and see whether it seems to + make sense... + -------------------------------------------------------------------------- */ + +INLINE_HEADER bool LOOKS_LIKE_INFO_PTR_NOT_NULL (StgWord p) +{ + StgInfoTable *info = INFO_PTR_TO_STRUCT((StgInfoTable *)p); + return info->type != INVALID_OBJECT && info->type < N_CLOSURE_TYPES; +} + +INLINE_HEADER bool LOOKS_LIKE_INFO_PTR (StgWord p) +{ + return p && (IS_FORWARDING_PTR(p) || LOOKS_LIKE_INFO_PTR_NOT_NULL(p)); +} + +INLINE_HEADER bool LOOKS_LIKE_CLOSURE_PTR (const void *p) +{ + const StgInfoTable *info = RELAXED_LOAD(&UNTAG_CONST_CLOSURE((const StgClosure *) (p))->header.info); + return LOOKS_LIKE_INFO_PTR((StgWord) info); +} + +/* ----------------------------------------------------------------------------- + Macros for calculating the size of a closure + -------------------------------------------------------------------------- */ + +EXTERN_INLINE StgOffset PAP_sizeW ( uint32_t n_args ); +EXTERN_INLINE StgOffset PAP_sizeW ( uint32_t n_args ) +{ return sizeofW(StgPAP) + n_args; } + +EXTERN_INLINE StgOffset AP_sizeW ( uint32_t n_args ); +EXTERN_INLINE StgOffset AP_sizeW ( uint32_t n_args ) +{ return sizeofW(StgAP) + n_args; } + +EXTERN_INLINE StgOffset AP_STACK_sizeW ( uint32_t size ); +EXTERN_INLINE StgOffset AP_STACK_sizeW ( uint32_t size ) +{ return sizeofW(StgAP_STACK) + size; } + +EXTERN_INLINE StgOffset CONSTR_sizeW( uint32_t p, uint32_t np ); +EXTERN_INLINE StgOffset CONSTR_sizeW( uint32_t p, uint32_t np ) +{ return sizeofW(StgHeader) + p + np; } + +EXTERN_INLINE StgOffset THUNK_SELECTOR_sizeW ( void ); +EXTERN_INLINE StgOffset THUNK_SELECTOR_sizeW ( void ) +{ return sizeofW(StgSelector); } + +EXTERN_INLINE StgOffset BLACKHOLE_sizeW ( void ); +EXTERN_INLINE StgOffset BLACKHOLE_sizeW ( void ) +{ return sizeofW(StgInd); } // a BLACKHOLE is a kind of indirection + +/* -------------------------------------------------------------------------- + Sizes of closures + ------------------------------------------------------------------------*/ + +EXTERN_INLINE StgOffset sizeW_fromITBL( const StgInfoTable* itbl ); +EXTERN_INLINE StgOffset sizeW_fromITBL( const StgInfoTable* itbl ) +{ return sizeofW(StgClosure) + + sizeofW(StgPtr) * itbl->layout.payload.ptrs + + sizeofW(StgWord) * itbl->layout.payload.nptrs; } + +EXTERN_INLINE StgOffset thunk_sizeW_fromITBL( const StgInfoTable* itbl ); +EXTERN_INLINE StgOffset thunk_sizeW_fromITBL( const StgInfoTable* itbl ) +{ return sizeofW(StgThunk) + + sizeofW(StgPtr) * itbl->layout.payload.ptrs + + sizeofW(StgWord) * itbl->layout.payload.nptrs; } + +EXTERN_INLINE StgOffset ap_stack_sizeW( StgAP_STACK* x ); +EXTERN_INLINE StgOffset ap_stack_sizeW( StgAP_STACK* x ) +{ return AP_STACK_sizeW(x->size); } + +EXTERN_INLINE StgOffset ap_sizeW( StgAP* x ); +EXTERN_INLINE StgOffset ap_sizeW( StgAP* x ) +{ return AP_sizeW(x->n_args); } + +EXTERN_INLINE StgOffset pap_sizeW( StgPAP* x ); +EXTERN_INLINE StgOffset pap_sizeW( StgPAP* x ) +{ return PAP_sizeW(x->n_args); } + +EXTERN_INLINE StgWord arr_words_words( StgArrBytes* x); +EXTERN_INLINE StgWord arr_words_words( StgArrBytes* x) +{ return ROUNDUP_BYTES_TO_WDS(x->bytes); } + +EXTERN_INLINE StgOffset arr_words_sizeW( StgArrBytes* x ); +EXTERN_INLINE StgOffset arr_words_sizeW( StgArrBytes* x ) +{ return sizeofW(StgArrBytes) + arr_words_words(x); } + +EXTERN_INLINE StgOffset mut_arr_ptrs_sizeW( StgMutArrPtrs* x ); +EXTERN_INLINE StgOffset mut_arr_ptrs_sizeW( StgMutArrPtrs* x ) +{ return sizeofW(StgMutArrPtrs) + x->size; } + +EXTERN_INLINE StgOffset small_mut_arr_ptrs_sizeW( StgSmallMutArrPtrs* x ); +EXTERN_INLINE StgOffset small_mut_arr_ptrs_sizeW( StgSmallMutArrPtrs* x ) +{ return sizeofW(StgSmallMutArrPtrs) + x->ptrs; } + +EXTERN_INLINE StgWord stack_sizeW ( StgStack *stack ); +EXTERN_INLINE StgWord stack_sizeW ( StgStack *stack ) +{ return sizeofW(StgStack) + stack->stack_size; } + +EXTERN_INLINE StgWord bco_sizeW ( StgBCO *bco ); +EXTERN_INLINE StgWord bco_sizeW ( StgBCO *bco ) +{ return bco->size; } + +EXTERN_INLINE StgWord compact_nfdata_full_sizeW ( StgCompactNFData *str ); +EXTERN_INLINE StgWord compact_nfdata_full_sizeW ( StgCompactNFData *str ) +{ return str->totalW; } + +/* + * TODO: Consider to switch return type from 'uint32_t' to 'StgWord' #8742 + * + * (Also for 'closure_sizeW' below) + */ +EXTERN_INLINE uint32_t +closure_sizeW_ (const StgClosure *p, const StgInfoTable *info); +EXTERN_INLINE uint32_t +closure_sizeW_ (const StgClosure *p, const StgInfoTable *info) +{ + switch (info->type) { + case THUNK_0_1: + case THUNK_1_0: + return sizeofW(StgThunk) + 1; + case FUN_0_1: + case CONSTR_0_1: + case FUN_1_0: + case CONSTR_1_0: + return sizeofW(StgHeader) + 1; + case THUNK_0_2: + case THUNK_1_1: + case THUNK_2_0: + return sizeofW(StgThunk) + 2; + case FUN_0_2: + case CONSTR_0_2: + case FUN_1_1: + case CONSTR_1_1: + case FUN_2_0: + case CONSTR_2_0: + return sizeofW(StgHeader) + 2; + case THUNK: + return thunk_sizeW_fromITBL(info); + case THUNK_SELECTOR: + return THUNK_SELECTOR_sizeW(); + case AP_STACK: + return ap_stack_sizeW((StgAP_STACK *)p); + case AP: + return ap_sizeW((StgAP *)p); + case PAP: + return pap_sizeW((StgPAP *)p); + case IND: + return sizeofW(StgInd); + case ARR_WORDS: + return arr_words_sizeW((StgArrBytes *)p); + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN_CLEAN: + case MUT_ARR_PTRS_FROZEN_DIRTY: + return mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); + case SMALL_MUT_ARR_PTRS_CLEAN: + case SMALL_MUT_ARR_PTRS_DIRTY: + case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: + case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: + return small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs*)p); + case TSO: + return sizeofW(StgTSO); + case STACK: + return stack_sizeW((StgStack*)p); + case BCO: + return bco_sizeW((StgBCO *)p); + case TREC_CHUNK: + return sizeofW(StgTRecChunk); + default: + return sizeW_fromITBL(info); + } +} + +// The definitive way to find the size, in words, of a heap-allocated closure +EXTERN_INLINE uint32_t closure_sizeW (const StgClosure *p); +EXTERN_INLINE uint32_t closure_sizeW (const StgClosure *p) +{ + return closure_sizeW_(p, get_itbl(p)); +} + +/* ----------------------------------------------------------------------------- + Sizes of stack frames + -------------------------------------------------------------------------- */ + +EXTERN_INLINE StgWord stack_frame_sizeW( StgClosure *frame ); +EXTERN_INLINE StgWord stack_frame_sizeW( StgClosure *frame ) +{ + const StgRetInfoTable *info; + + info = get_ret_itbl(frame); + switch (info->i.type) { + + case RET_FUN: + return sizeofW(StgRetFun) + ((StgRetFun *)frame)->size; + + case RET_BIG: + return 1 + GET_LARGE_BITMAP(&info->i)->size; + + case RET_BCO: + return 2 + BCO_BITMAP_SIZE((StgBCO *)((P_)frame)[1]); + + default: + return 1 + BITMAP_SIZE(info->i.layout.bitmap); + } +} + +/* ----------------------------------------------------------------------------- + StgMutArrPtrs macros + + An StgMutArrPtrs has a card table to indicate which elements are + dirty for the generational GC. The card table is an array of + bytes, where each byte covers (1 << MUT_ARR_PTRS_CARD_BITS) + elements. The card table is directly after the array data itself. + -------------------------------------------------------------------------- */ + +// The number of card bytes needed +INLINE_HEADER W_ mutArrPtrsCards (W_ elems) +{ + return (W_)((elems + (1 << MUT_ARR_PTRS_CARD_BITS) - 1) + >> MUT_ARR_PTRS_CARD_BITS); +} + +// The number of words in the card table +INLINE_HEADER W_ mutArrPtrsCardTableSize (W_ elems) +{ + return ROUNDUP_BYTES_TO_WDS(mutArrPtrsCards(elems)); +} + +// The address of the card for a particular card number +INLINE_HEADER StgWord8 *mutArrPtrsCard (StgMutArrPtrs *a, W_ n) +{ + return ((StgWord8 *)&(a->payload[a->ptrs]) + n); +} + +/* ----------------------------------------------------------------------------- + Replacing a closure with a different one. We must call + OVERWRITING_CLOSURE(p) on the old closure that is about to be + overwritten. + + Note [zeroing slop when overwriting closures] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + When we overwrite a closure in the heap with a smaller one, in some scenarios + we need to write zero words into "slop"; the memory that is left + unoccupied. See Note [slop on the heap] + + Zeroing slop is required for: + + - full-heap sanity checks (DEBUG, and +RTS -DS), + + - LDV profiling (PROFILING, and +RTS -hb) and + + However we can get into trouble if we're zeroing slop for ordinarily + immutable closures when using multiple threads, since there is nothing + preventing another thread from still being in the process of reading the + memory we're about to zero. + + Thus, with the THREADED RTS and +RTS -N2 or greater we must not zero + immutable closure's slop. + + Hence, an immutable closure's slop is zeroed when either: + + - PROFILING && era > 0 (LDV is on) or + - !THREADED && DEBUG + + Additionally: + + - LDV profiling and +RTS -N2 are incompatible, + + - full-heap sanity checks are disabled for the THREADED RTS, at least when + they don't run right after GC when there is no slop. + See Note [heap sanity checking with SMP]. + + -------------------------------------------------------------------------- */ + +#if defined(PROFILING) || defined(DEBUG) +#define OVERWRITING_CLOSURE(c) \ + overwritingClosure(c) +#define OVERWRITING_CLOSURE_SIZE(c, size) \ + overwritingClosureSize(c, size) +#define OVERWRITING_CLOSURE_MUTABLE(c, off) \ + overwritingMutableClosureOfs(c, off) +#else +#define OVERWRITING_CLOSURE(c) \ + do { (void) sizeof(c); } while(0) +#define OVERWRITING_CLOSURE_SIZE(c, size) \ + do { (void) sizeof(c); (void) sizeof(size); } while(0) +#define OVERWRITING_CLOSURE_MUTABLE(c, off) \ + do { (void) sizeof(c); (void) sizeof(off); } while(0) +#endif + +#if defined(PROFILING) +void LDV_recordDead (const StgClosure *c, uint32_t size); +RTS_PRIVATE bool isInherentlyUsed ( StgHalfWord closure_type ); +#endif + +EXTERN_INLINE void +zeroSlop ( + StgClosure *p, + uint32_t offset, /*< offset to start zeroing at, in words */ + uint32_t size, /*< total closure size, in words */ + bool known_mutable /*< is this a closure who's slop we can always zero? */ + ); + +EXTERN_INLINE void +zeroSlop (StgClosure *p, uint32_t offset, uint32_t size, bool known_mutable) +{ + // see Note [zeroing slop when overwriting closures], also #8402 + + const bool want_to_zero_immutable_slop = false + // Sanity checking (-DS) is enabled + || RTS_DEREF(RtsFlags).DebugFlags.sanity +#if defined(PROFILING) + // LDV profiler is enabled + || era > 0 +#endif + ; + + const bool can_zero_immutable_slop = + // Only if we're running single threaded. + RTS_DEREF(RtsFlags).ParFlags.nCapabilities <= 1; + + const bool zero_slop_immutable = + want_to_zero_immutable_slop && can_zero_immutable_slop; + + const bool zero_slop_mutable = +#if defined(PROFILING) + // Always zero mutable closure slop when profiling. We do this to cover + // the case of shrinking mutable arrays in pinned blocks for the heap + // profiler, see Note [skipping slop in the heap profiler] + // + // TODO: We could make this check more specific and only zero if the + // object is in a BF_PINNED bdescr here. Update Note [slop on the heap] + // and [zeroing slop when overwriting closures] if you change this. + true +#else + zero_slop_immutable +#endif + ; + + const bool zero_slop = + // If we're not sure this is a mutable closure treat it like an + // immutable one. + known_mutable ? zero_slop_mutable : zero_slop_immutable; + + if(!zero_slop) + return; + + for (uint32_t i = offset; i < size; i++) { + ((StgWord *)p)[i] = 0; + } +} + +EXTERN_INLINE void overwritingClosure (StgClosure *p); +EXTERN_INLINE void overwritingClosure (StgClosure *p) +{ + W_ size = closure_sizeW(p); +#if defined(PROFILING) + if(era > 0 && !isInherentlyUsed(get_itbl(p)->type)) + LDV_recordDead(p, size); +#endif + zeroSlop(p, sizeofW(StgThunkHeader), size, /*known_mutable=*/false); +} + +// Version of 'overwritingClosure' which overwrites only a suffix of a +// closure. The offset is expressed in words relative to 'p' and shall +// be less than or equal to closure_sizeW(p), and usually at least as +// large as the respective thunk header. +EXTERN_INLINE void +overwritingMutableClosureOfs (StgClosure *p, uint32_t offset); + +EXTERN_INLINE void +overwritingMutableClosureOfs (StgClosure *p, uint32_t offset) +{ + // Since overwritingClosureOfs is only ever called by: + // + // - shrinkMutableByteArray# (ARR_WORDS) and + // + // - shrinkSmallMutableArray# (SMALL_MUT_ARR_PTRS) + // + // we can safely omit the Ldv_recordDead call. Since these closures are + // considered inherenlty used we don't need to track their destruction. +#if defined(PROFILING) + ASSERT(isInherentlyUsed(get_itbl(p)->type) == true); +#endif + zeroSlop(p, offset, closure_sizeW(p), /*known_mutable=*/true); +} + +// Version of 'overwritingClosure' which takes closure size as argument. +EXTERN_INLINE void overwritingClosureSize (StgClosure *p, uint32_t size /* in words */); +EXTERN_INLINE void overwritingClosureSize (StgClosure *p, uint32_t size) +{ + // This function is only called from stg_AP_STACK so we can assume it's not + // inherently used. +#if defined(PROFILING) + ASSERT(isInherentlyUsed(get_itbl(p)->type) == false); + if(era > 0) + LDV_recordDead(p, size); +#endif + zeroSlop(p, sizeofW(StgThunkHeader), size, /*known_mutable=*/false); +} diff --git a/rts/include/rts/storage/ClosureTypes.h b/rts/include/rts/storage/ClosureTypes.h new file mode 100644 index 0000000000..85dc1a0ce4 --- /dev/null +++ b/rts/include/rts/storage/ClosureTypes.h @@ -0,0 +1,86 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2005 + * + * Closure Type Constants: out here because the native code generator + * needs to get at them. + * + * -------------------------------------------------------------------------- */ + +#pragma once + +/* + * WARNING WARNING WARNING + * + * If you add or delete any closure types, don't forget to update the following, + * - the closure flags table in rts/ClosureFlags.c + * - isRetainer in rts/RetainerProfile.c + * - the closure_type_names list in rts/Printer.c + */ + +/* Object tag 0 raises an internal error */ +#define INVALID_OBJECT 0 +#define CONSTR 1 +#define CONSTR_1_0 2 +#define CONSTR_0_1 3 +#define CONSTR_2_0 4 +#define CONSTR_1_1 5 +#define CONSTR_0_2 6 +#define CONSTR_NOCAF 7 +#define FUN 8 +#define FUN_1_0 9 +#define FUN_0_1 10 +#define FUN_2_0 11 +#define FUN_1_1 12 +#define FUN_0_2 13 +#define FUN_STATIC 14 +#define THUNK 15 +#define THUNK_1_0 16 +#define THUNK_0_1 17 +#define THUNK_2_0 18 +#define THUNK_1_1 19 +#define THUNK_0_2 20 +#define THUNK_STATIC 21 +#define THUNK_SELECTOR 22 +#define BCO 23 +#define AP 24 +#define PAP 25 +#define AP_STACK 26 +#define IND 27 +#define IND_STATIC 28 +#define RET_BCO 29 +#define RET_SMALL 30 +#define RET_BIG 31 +#define RET_FUN 32 +#define UPDATE_FRAME 33 +#define CATCH_FRAME 34 +#define UNDERFLOW_FRAME 35 +#define STOP_FRAME 36 +#define BLOCKING_QUEUE 37 +#define BLACKHOLE 38 +#define MVAR_CLEAN 39 +#define MVAR_DIRTY 40 +#define TVAR 41 +#define ARR_WORDS 42 +#define MUT_ARR_PTRS_CLEAN 43 +#define MUT_ARR_PTRS_DIRTY 44 +#define MUT_ARR_PTRS_FROZEN_DIRTY 45 +#define MUT_ARR_PTRS_FROZEN_CLEAN 46 +#define MUT_VAR_CLEAN 47 +#define MUT_VAR_DIRTY 48 +#define WEAK 49 +#define PRIM 50 +#define MUT_PRIM 51 +#define TSO 52 +#define STACK 53 +#define TREC_CHUNK 54 +#define ATOMICALLY_FRAME 55 +#define CATCH_RETRY_FRAME 56 +#define CATCH_STM_FRAME 57 +#define WHITEHOLE 58 +#define SMALL_MUT_ARR_PTRS_CLEAN 59 +#define SMALL_MUT_ARR_PTRS_DIRTY 60 +#define SMALL_MUT_ARR_PTRS_FROZEN_DIRTY 61 +#define SMALL_MUT_ARR_PTRS_FROZEN_CLEAN 62 +#define COMPACT_NFDATA 63 +#define N_CLOSURE_TYPES 64 diff --git a/rts/include/rts/storage/Closures.h b/rts/include/rts/storage/Closures.h new file mode 100644 index 0000000000..ebb836bca2 --- /dev/null +++ b/rts/include/rts/storage/Closures.h @@ -0,0 +1,488 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2004 + * + * Closures + * + * -------------------------------------------------------------------------- */ + +#pragma once + +/* + * The Layout of a closure header depends on which kind of system we're + * compiling for: profiling, parallel, ticky, etc. + */ + +/* ----------------------------------------------------------------------------- + The profiling header + -------------------------------------------------------------------------- */ + +typedef struct { + CostCentreStack *ccs; + union { + StgWord trav; /* Heap traversal */ + StgWord ldvw; /* Lag/Drag/Void Word */ + } hp; + // Heap profiling header. This field is shared among the various heap + // profiling modes. Currently it is used by ProfHeap.c for Lag/Drag/Void + // profiling and by the heap traversal modes using TraverseHeap.c such as + // the retainer profiler. +} StgProfHeader; + +/* ----------------------------------------------------------------------------- + The SMP header + + A thunk has a padding word to take the updated value. This is so + that the update doesn't overwrite the payload, so we can avoid + needing to lock the thunk during entry and update. + + Note: this doesn't apply to THUNK_STATICs, which have no payload. + + Note: we leave this padding word in all ways, rather than just SMP, + so that we don't have to recompile all our libraries for SMP. + -------------------------------------------------------------------------- */ + +typedef struct { + StgWord pad; +} StgSMPThunkHeader; + +/* ----------------------------------------------------------------------------- + The full fixed-size closure header + + The size of the fixed header is the sum of the optional parts plus a single + word for the entry code pointer. + -------------------------------------------------------------------------- */ + +typedef struct { + // If TABLES_NEXT_TO_CODE is defined, then `info` is offset by + // `sizeof(StgInfoTable)` and so points to the `code` field of the + // StgInfoTable! You may want to use `get_itbl` to get the pointer to the + // start of the info table. See + // https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/rts/storage/heap-objects#tables_next_to_code. + const StgInfoTable* info; +#if defined(PROFILING) + StgProfHeader prof; +#endif +} StgHeader; + +typedef struct { + const StgInfoTable* info; +#if defined(PROFILING) + StgProfHeader prof; +#endif + StgSMPThunkHeader smp; +} StgThunkHeader; + +#define THUNK_EXTRA_HEADER_W (sizeofW(StgThunkHeader)-sizeofW(StgHeader)) + +/* ----------------------------------------------------------------------------- + Closure Types + + For any given closure type (defined in InfoTables.h), there is a + corresponding structure defined below. The name of the structure + is obtained by concatenating the closure type with '_closure' + -------------------------------------------------------------------------- */ + +/* All closures follow the generic format */ + +typedef struct StgClosure_ { + StgHeader header; + struct StgClosure_ *payload[]; +} *StgClosurePtr; // StgClosure defined in rts/Types.h + +typedef struct StgThunk_ { + StgThunkHeader header; + struct StgClosure_ *payload[]; +} StgThunk; + +typedef struct { + StgThunkHeader header; + StgClosure *selectee; +} StgSelector; + +/* + PAP payload contains pointers and non-pointers interleaved and we only have + one info table for PAPs (stg_PAP_info). To visit pointers in a PAP payload we + use the `fun`s bitmap. For a PAP with n_args arguments the first n_args bits + in the fun's bitmap tell us which payload locations contain pointers. +*/ +typedef struct { + StgHeader header; + StgHalfWord arity; /* zero if it is an AP */ + StgHalfWord n_args; + StgClosure *fun; /* really points to a fun */ + StgClosure *payload[]; +} StgPAP; + +typedef struct { + StgThunkHeader header; + StgHalfWord arity; /* zero if it is an AP */ + StgHalfWord n_args; + StgClosure *fun; /* really points to a fun */ + StgClosure *payload[]; +} StgAP; + +typedef struct { + StgThunkHeader header; + StgWord size; /* number of words in payload */ + StgClosure *fun; + StgClosure *payload[]; /* contains a chunk of *stack* */ +} StgAP_STACK; + +typedef struct { + StgHeader header; + StgClosure *indirectee; +} StgInd; + +typedef struct { + StgHeader header; + StgClosure *indirectee; + StgClosure *static_link; // See Note [CAF lists] + const StgInfoTable *saved_info; + // `saved_info` also used for the link field for `debug_caf_list`, + // see `newCAF` and Note [CAF lists] in rts/sm/Storage.h. +} StgIndStatic; + +typedef struct StgBlockingQueue_ { + StgHeader header; + struct StgBlockingQueue_ *link; + // here so it looks like an IND, to be able to skip the queue without + // deleting it (done in wakeBlockingQueue()) + StgClosure *bh; // the BLACKHOLE + StgTSO *owner; + struct MessageBlackHole_ *queue; + // holds TSOs blocked on `bh` +} StgBlockingQueue; + +typedef struct { + StgHeader header; + StgWord bytes; + StgWord payload[]; +} StgArrBytes; + +typedef struct { + StgHeader header; + StgWord ptrs; + StgWord size; // ptrs plus card table + StgClosure *payload[]; + // see also: StgMutArrPtrs macros in ClosureMacros.h +} StgMutArrPtrs; + +typedef struct { + StgHeader header; + StgWord ptrs; + StgClosure *payload[]; +} StgSmallMutArrPtrs; + +typedef struct { + StgHeader header; + StgClosure *var; +} StgMutVar; + +typedef struct _StgUpdateFrame { + StgHeader header; + StgClosure *updatee; +} StgUpdateFrame; + +typedef struct { + StgHeader header; + StgWord exceptions_blocked; + StgClosure *handler; +} StgCatchFrame; + +typedef struct { + const StgInfoTable* info; + struct StgStack_ *next_chunk; +} StgUnderflowFrame; + +typedef struct { + StgHeader header; +} StgStopFrame; + +typedef struct { + StgHeader header; + StgWord data; +} StgIntCharlikeClosure; + +/* statically allocated */ +typedef struct { + StgHeader header; +} StgRetry; + +typedef struct _StgStableName { + StgHeader header; + StgWord sn; +} StgStableName; + +typedef struct _StgWeak { /* Weak v */ + StgHeader header; + StgClosure *cfinalizers; + StgClosure *key; + StgClosure *value; /* v */ + StgClosure *finalizer; + struct _StgWeak *link; +} StgWeak; + +typedef struct _StgCFinalizerList { + StgHeader header; + StgClosure *link; + void (*fptr)(void); + void *ptr; + void *eptr; + StgWord flag; /* has environment (0 or 1) */ +} StgCFinalizerList; + +/* Byte code objects. These are fixed size objects with pointers to + * four arrays, designed so that a BCO can be easily "re-linked" to + * other BCOs, to facilitate GHC's intelligent recompilation. The + * array of instructions is static and not re-generated when the BCO + * is re-linked, but the other 3 arrays will be regenerated. + * + * A BCO represents either a function or a stack frame. In each case, + * it needs a bitmap to describe to the garbage collector the + * pointerhood of its arguments/free variables respectively, and in + * the case of a function it also needs an arity. These are stored + * directly in the BCO, rather than in the instrs array, for two + * reasons: + * (a) speed: we need to get at the bitmap info quickly when + * the GC is examining APs and PAPs that point to this BCO + * (b) a subtle interaction with the compacting GC. In compacting + * GC, the info that describes the size/layout of a closure + * cannot be in an object more than one level of indirection + * away from the current object, because of the order in + * which pointers are updated to point to their new locations. + */ + +typedef struct { + StgHeader header; + StgArrBytes *instrs; /* a pointer to an ArrWords */ + StgArrBytes *literals; /* a pointer to an ArrWords */ + StgMutArrPtrs *ptrs; /* a pointer to a MutArrPtrs */ + StgHalfWord arity; /* arity of this BCO */ + StgHalfWord size; /* size of this BCO (in words) */ + StgWord bitmap[]; /* an StgLargeBitmap */ +} StgBCO; + +#define BCO_BITMAP(bco) ((StgLargeBitmap *)((StgBCO *)(bco))->bitmap) +#define BCO_BITMAP_SIZE(bco) (BCO_BITMAP(bco)->size) +#define BCO_BITMAP_BITS(bco) (BCO_BITMAP(bco)->bitmap) +#define BCO_BITMAP_SIZEW(bco) ((BCO_BITMAP_SIZE(bco) + BITS_IN(StgWord) - 1) \ + / BITS_IN(StgWord)) + +/* A function return stack frame: used when saving the state for a + * garbage collection at a function entry point. The function + * arguments are on the stack, and we also save the function (its + * info table describes the pointerhood of the arguments). + * + * The stack frame size is also cached in the frame for convenience. + * + * The only RET_FUN is stg_gc_fun, which is created by __stg_gc_fun, + * both in HeapStackCheck.cmm. + */ +typedef struct { + const StgInfoTable* info; + StgWord size; + StgClosure * fun; + StgClosure * payload[]; +} StgRetFun; + +/* Concurrent communication objects */ + +typedef struct StgMVarTSOQueue_ { + StgHeader header; + struct StgMVarTSOQueue_ *link; + struct StgTSO_ *tso; +} StgMVarTSOQueue; + +typedef struct { + StgHeader header; + struct StgMVarTSOQueue_ *head; + struct StgMVarTSOQueue_ *tail; + StgClosure* value; +} StgMVar; + + +/* STM data structures + * + * StgTVar defines the only type that can be updated through the STM + * interface. + * + * Note that various optimisations may be possible in order to use less + * space for these data structures at the cost of more complexity in the + * implementation: + * + * - In StgTVar, current_value and first_watch_queue_entry could be held in + * the same field: if any thread is waiting then its expected_value for + * the tvar is the current value. + * + * - In StgTRecHeader, it might be worthwhile having separate chunks + * of read-only and read-write locations. This would save a + * new_value field in the read-only locations. + * + * - In StgAtomicallyFrame, we could combine the waiting bit into + * the header (maybe a different info tbl for a waiting transaction). + * This means we can specialise the code for the atomically frame + * (it immediately switches on frame->waiting anyway). + */ + +typedef struct StgTRecHeader_ StgTRecHeader; + +typedef struct StgTVarWatchQueue_ { + StgHeader header; + StgClosure *closure; // StgTSO + struct StgTVarWatchQueue_ *next_queue_entry; + struct StgTVarWatchQueue_ *prev_queue_entry; +} StgTVarWatchQueue; + +typedef struct { + StgHeader header; + StgClosure *current_value; /* accessed via atomics */ + StgTVarWatchQueue *first_watch_queue_entry; /* accessed via atomics */ + StgInt num_updates; /* accessed via atomics */ +} StgTVar; + +/* new_value == expected_value for read-only accesses */ +/* new_value is a StgTVarWatchQueue entry when trec in state TREC_WAITING */ +typedef struct { + StgTVar *tvar; + StgClosure *expected_value; + StgClosure *new_value; +#if defined(THREADED_RTS) + StgInt num_updates; +#endif +} TRecEntry; + +#define TREC_CHUNK_NUM_ENTRIES 16 + +typedef struct StgTRecChunk_ { + StgHeader header; + struct StgTRecChunk_ *prev_chunk; + StgWord next_entry_idx; + TRecEntry entries[TREC_CHUNK_NUM_ENTRIES]; +} StgTRecChunk; + +typedef enum { + TREC_ACTIVE, /* Transaction in progress, outcome undecided */ + TREC_CONDEMNED, /* Transaction in progress, inconsistent / out of date reads */ + TREC_COMMITTED, /* Transaction has committed, now updating tvars */ + TREC_ABORTED, /* Transaction has aborted, now reverting tvars */ + TREC_WAITING, /* Transaction currently waiting */ +} TRecState; + +struct StgTRecHeader_ { + StgHeader header; + struct StgTRecHeader_ *enclosing_trec; + StgTRecChunk *current_chunk; + TRecState state; +}; + +typedef struct { + StgHeader header; + StgClosure *code; + StgClosure *result; +} StgAtomicallyFrame; + +typedef struct { + StgHeader header; + StgClosure *code; + StgClosure *handler; +} StgCatchSTMFrame; + +typedef struct { + StgHeader header; + StgWord running_alt_code; + StgClosure *first_code; + StgClosure *alt_code; +} StgCatchRetryFrame; + +/* ---------------------------------------------------------------------------- + Messages + ------------------------------------------------------------------------- */ + +typedef struct Message_ { + StgHeader header; + struct Message_ *link; +} Message; + +typedef struct MessageWakeup_ { + StgHeader header; + Message *link; + StgTSO *tso; +} MessageWakeup; + +typedef struct MessageThrowTo_ { + StgHeader header; + struct MessageThrowTo_ *link; + StgTSO *source; + StgTSO *target; + StgClosure *exception; +} MessageThrowTo; + +typedef struct MessageBlackHole_ { + StgHeader header; + struct MessageBlackHole_ *link; + // here so it looks like an IND, to be able to skip the message without + // deleting it (done in throwToMsg()) + StgTSO *tso; + StgClosure *bh; +} MessageBlackHole; + +/* ---------------------------------------------------------------------------- + Compact Regions + ------------------------------------------------------------------------- */ + +// +// A compact region is a list of blocks. Each block starts with an +// StgCompactNFDataBlock structure, and the list is chained through the next +// field of these structs. (the link field of the bdescr is used to chain +// together multiple compact region on the compact_objects field of a +// generation). +// +// See Note [Compact Normal Forms] for details +// +typedef struct StgCompactNFDataBlock_ { + struct StgCompactNFDataBlock_ *self; + // the address of this block this is copied over to the + // receiving end when serializing a compact, so the receiving + // end can allocate the block at best as it can, and then + // verify if pointer adjustment is needed or not by comparing + // self with the actual address; the same data is sent over as + // SerializedCompact metadata, but having it here simplifies + // the fixup implementation. + struct StgCompactNFData_ *owner; + // the closure who owns this block (used in objectGetCompact) + struct StgCompactNFDataBlock_ *next; + // chain of blocks used for serialization and freeing +} StgCompactNFDataBlock; + +// +// This is the Compact# primitive object. +// +typedef struct StgCompactNFData_ { + StgHeader header; + // for sanity and other checks in practice, nothing should ever + // need the compact info pointer (we don't even need fwding + // pointers because it's a large object) + StgWord totalW; + // Total number of words in all blocks in the compact + StgWord autoBlockW; + // size of automatically appended blocks + StgPtr hp, hpLim; + // the beginning and end of the free area in the nursery block. This is + // just a convenience so that we can avoid multiple indirections through + // the nursery pointer below during compaction. + StgCompactNFDataBlock *nursery; + // where to (try to) allocate from when appending + StgCompactNFDataBlock *last; + // the last block of the chain (to know where to append new + // blocks for resize) + struct hashtable *hash; + // the hash table for the current compaction, or NULL if + // there's no (sharing-preserved) compaction in progress. + StgClosure *result; + // Used temporarily to store the result of compaction. Doesn't need to be + // a GC root. + struct StgCompactNFData_ *link; + // Used by compacting GC for linking CNFs with threaded hash tables. See + // Note [CNFs in compacting GC] in Compact.c for details. +} StgCompactNFData; diff --git a/rts/include/rts/storage/FunTypes.h b/rts/include/rts/storage/FunTypes.h new file mode 100644 index 0000000000..3b066ef7dc --- /dev/null +++ b/rts/include/rts/storage/FunTypes.h @@ -0,0 +1,54 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2002 + * + * Things for functions. + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +/* generic - function comes with a small bitmap */ +#define ARG_GEN 0 + +/* generic - function comes with a large bitmap */ +#define ARG_GEN_BIG 1 + +/* BCO - function is really a BCO */ +#define ARG_BCO 2 + +/* + * Specialised function types: bitmaps and calling sequences + * for these functions are pre-generated: see ghc/utils/genapply and + * generated code in ghc/rts/AutoApply.cmm. + * + * NOTE: other places to change if you change this table: + * - utils/genapply/Main.hs: stackApplyTypes + * - GHC.StgToCmm.Layout: stdPattern + */ +#define ARG_NONE 3 +#define ARG_N 4 +#define ARG_P 5 +#define ARG_F 6 +#define ARG_D 7 +#define ARG_L 8 +#define ARG_V16 9 +#define ARG_V32 10 +#define ARG_V64 11 +#define ARG_NN 12 +#define ARG_NP 13 +#define ARG_PN 14 +#define ARG_PP 15 +#define ARG_NNN 16 +#define ARG_NNP 17 +#define ARG_NPN 18 +#define ARG_NPP 19 +#define ARG_PNN 20 +#define ARG_PNP 21 +#define ARG_PPN 22 +#define ARG_PPP 23 +#define ARG_PPPP 24 +#define ARG_PPPPP 25 +#define ARG_PPPPPP 26 +#define ARG_PPPPPPP 27 +#define ARG_PPPPPPPP 28 diff --git a/rts/include/rts/storage/GC.h b/rts/include/rts/storage/GC.h new file mode 100644 index 0000000000..478503aaee --- /dev/null +++ b/rts/include/rts/storage/GC.h @@ -0,0 +1,261 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2004 + * + * External Storage Manger Interface + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#include <stddef.h> +#include "rts/OSThreads.h" + +/* ----------------------------------------------------------------------------- + * Generational GC + * + * We support an arbitrary number of generations. Notes (in no particular + * order): + * + * - Objects "age" in the nursery for one GC cycle before being promoted + * to the next generation. There is no aging in other generations. + * + * - generation 0 is the allocation area. It is given + * a fixed set of blocks during initialisation, and these blocks + * normally stay in G0S0. In parallel execution, each + * Capability has its own nursery. + * + * - during garbage collection, each generation which is an + * evacuation destination (i.e. all generations except G0) is + * allocated a to-space. evacuated objects are allocated into + * the generation's to-space until GC is finished, when the + * original generations's contents may be freed and replaced + * by the to-space. + * + * - the mutable-list is per-generation. G0 doesn't have one + * (since every garbage collection collects at least G0). + * + * - block descriptors contain a pointer to the generation that + * the block belongs to, for convenience. + * + * - static objects are stored in per-generation lists. See GC.c for + * details of how we collect CAFs in the generational scheme. + * + * - large objects are per-generation, and are promoted in the + * same way as small objects. + * + * ------------------------------------------------------------------------- */ + +// A count of blocks needs to store anything up to the size of memory +// divided by the block size. The safest thing is therefore to use a +// type that can store the full range of memory addresses, +// ie. StgWord. Note that we have had some tricky int overflows in a +// couple of cases caused by using ints rather than longs (e.g. #5086) + +typedef StgWord memcount; + +typedef struct nursery_ { + bdescr * blocks; + memcount n_blocks; +} nursery; + +// Nursery invariants: +// +// - cap->r.rNursery points to the nursery for this capability +// +// - cap->r.rCurrentNursery points to the block in the nursery that we are +// currently allocating into. While in Haskell the current heap pointer is +// in Hp, outside Haskell it is stored in cap->r.rCurrentNursery->free. +// +// - the blocks *after* cap->rCurrentNursery in the chain are empty +// (although their bd->free pointers have not been updated to +// reflect that) +// +// - the blocks *before* cap->rCurrentNursery have been used. Except +// for rCurrentAlloc. +// +// - cap->r.rCurrentAlloc is either NULL, or it points to a block in +// the nursery *before* cap->r.rCurrentNursery. +// +// See also Note [allocation accounting] to understand how total +// memory allocation is tracked. + +typedef struct generation_ { + uint32_t no; // generation number + + bdescr * blocks; // blocks in this gen + memcount n_blocks; // number of blocks + memcount n_words; // number of used words + + bdescr * large_objects; // large objects (doubly linked) + memcount n_large_blocks; // no. of blocks used by large objs + memcount n_large_words; // no. of words used by large objs + memcount n_new_large_words; // words of new large objects + // (for doYouWantToGC()) + + bdescr * compact_objects; // compact objects chain + // the second block in each compact is + // linked from the closure object, while + // the second compact object in the + // chain is linked from bd->link (like + // large objects) + memcount n_compact_blocks; // no. of blocks used by all compacts + bdescr * compact_blocks_in_import; // compact objects being imported + // (not known to the GC because + // potentially invalid, but we + // need to keep track of them + // to avoid assertions in Sanity) + // this is a list shaped like compact_objects + memcount n_compact_blocks_in_import; // no. of blocks used by compacts + // being imported + + // Max blocks to allocate in this generation before collecting it. Collect + // this generation when + // + // n_blocks + n_large_blocks + n_compact_blocks > max_blocks + // + memcount max_blocks; + + StgTSO * threads; // threads in this gen + // linked via global_link + StgWeak * weak_ptr_list; // weak pointers in this gen + + struct generation_ *to; // destination gen for live objects + + // stats information + uint32_t collections; + uint32_t par_collections; + uint32_t failed_promotions; // Currently unused + + // ------------------------------------ + // Fields below are used during GC only + +#if defined(THREADED_RTS) + char pad[128]; // make sure the following is + // on a separate cache line. + SpinLock sync; // lock for large_objects + // and scavenged_large_objects +#endif + + int mark; // mark (not copy)? (old gen only) + int compact; // compact (not sweep)? (old gen only) + + // During GC, if we are collecting this gen, blocks and n_blocks + // are copied into the following two fields. After GC, these blocks + // are freed. + bdescr * old_blocks; // bdescr of first from-space block + memcount n_old_blocks; // number of blocks in from-space + memcount live_estimate; // for sweeping: estimate of live data + + bdescr * scavenged_large_objects; // live large objs after GC (d-link) + memcount n_scavenged_large_blocks; // size (not count) of above + + bdescr * live_compact_objects; // live compact objs after GC (d-link) + memcount n_live_compact_blocks; // size (not count) of above + + bdescr * bitmap; // bitmap for compacting collection + + StgTSO * old_threads; + StgWeak * old_weak_ptr_list; +} generation; + +extern generation * generations; +extern generation * g0; +extern generation * oldest_gen; + +typedef void(*ListBlocksCb)(void *user, bdescr *); +void listAllBlocks(ListBlocksCb cb, void *user); + +/* ----------------------------------------------------------------------------- + Generic allocation + + StgPtr allocate(Capability *cap, W_ n) + Allocates memory from the nursery in + the current Capability. + + StgPtr allocatePinned(Capability *cap, W_ n, W_ alignment, W_ align_off) + Allocates a chunk of contiguous store + n words long, which is at a fixed + address (won't be moved by GC). The + word at the byte offset 'align_off' + will be aligned to 'alignment', which + must be a power of two. + Returns a pointer to the first word. + Always succeeds. + + NOTE: the GC can't in general handle + pinned objects, so allocatePinned() + can only be used for ByteArrays at the + moment. + + Don't forget to TICK_ALLOC_XXX(...) + after calling allocate or + allocatePinned, for the + benefit of the ticky-ticky profiler. + + -------------------------------------------------------------------------- */ + +StgPtr allocate ( Capability *cap, W_ n ); +StgPtr allocateMightFail ( Capability *cap, W_ n ); +StgPtr allocatePinned ( Capability *cap, W_ n, W_ alignment, W_ align_off); + +/* memory allocator for executable memory */ +typedef void* AdjustorWritable; +typedef void* AdjustorExecutable; + +void flushExec(W_ len, AdjustorExecutable exec_addr); + +// Used by GC checks in external .cmm code: +extern W_ large_alloc_lim; + +/* ----------------------------------------------------------------------------- + Performing Garbage Collection + -------------------------------------------------------------------------- */ + +void performGC(void); +void performMajorGC(void); + +/* ----------------------------------------------------------------------------- + The CAF table - used to let us revert CAFs in GHCi + -------------------------------------------------------------------------- */ + +StgInd *newCAF (StgRegTable *reg, StgIndStatic *caf); +StgInd *newRetainedCAF (StgRegTable *reg, StgIndStatic *caf); +StgInd *newGCdCAF (StgRegTable *reg, StgIndStatic *caf); +void revertCAFs (void); + +// Request that all CAFs are retained indefinitely. +// (preferably use RtsConfig.keep_cafs instead) +void setKeepCAFs (void); + +// Let the runtime know that all the CAFs in high mem are not +// to be retained. Useful in conjunction with loadNativeObj +void setHighMemDynamic (void); + +/* ----------------------------------------------------------------------------- + This is the write barrier for MUT_VARs, a.k.a. IORefs. A + MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY + is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY + and is put on the mutable list. + -------------------------------------------------------------------------- */ + +void dirty_MUT_VAR(StgRegTable *reg, StgMutVar *mv, StgClosure *old); + +/* set to disable CAF garbage collection in GHCi. */ +/* (needed when dynamic libraries are used). */ +extern bool keepCAFs; + +#include "rts/Flags.h" + +INLINE_HEADER void initBdescr(bdescr *bd, generation *gen, generation *dest) +{ + RELAXED_STORE(&bd->gen, gen); + RELAXED_STORE(&bd->gen_no, gen->no); + RELAXED_STORE(&bd->dest_no, dest->no); + +#if !IN_STG_CODE + /* See Note [RtsFlags is a pointer in STG code] */ + ASSERT(gen->no < RtsFlags.GcFlags.generations); + ASSERT(dest->no < RtsFlags.GcFlags.generations); +#endif +} diff --git a/rts/include/rts/storage/Heap.h b/rts/include/rts/storage/Heap.h new file mode 100644 index 0000000000..b3b1efaf9f --- /dev/null +++ b/rts/include/rts/storage/Heap.h @@ -0,0 +1,31 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The University of Glasgow 2006-2017 + * + * Introspection into GHC's heap representation + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#include "rts/storage/Closures.h" + +StgMutArrPtrs *heap_view_closurePtrs(Capability *cap, StgClosure *closure); + +void heap_view_closure_ptrs_in_pap_payload(StgClosure *ptrs[], StgWord *nptrs + , StgClosure *fun, StgClosure **payload, StgWord size); + +StgWord heap_view_closureSize(StgClosure *closure); + +/* + * Collect the pointers of a closure into the given array. The given array should be + * large enough to hold all collected pointers e.g. + * `heap_view_closureSize(closure)`. Returns the number of pointers collected. + * The caller must ensure that `closure` is not modified (or moved by the GC) + * for the duration of the call to `collect_pointers`. + * + * In principle this is + * StgWord collect_pointers(StgClosure *closure, StgWord size, StgClosure *ptrs[size]); + * but we cannot write this and retain C++ compatibility. + */ +StgWord collect_pointers(StgClosure *closure, StgClosure *ptrs[]); diff --git a/rts/include/rts/storage/InfoTables.h b/rts/include/rts/storage/InfoTables.h new file mode 100644 index 0000000000..b97e12982b --- /dev/null +++ b/rts/include/rts/storage/InfoTables.h @@ -0,0 +1,405 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2002 + * + * Info Tables + * + * -------------------------------------------------------------------------- */ + +#pragma once + +/* ---------------------------------------------------------------------------- + Relative pointers + + Several pointer fields in info tables are expressed as offsets + relative to the info pointer, so that we can generate + position-independent code. + + Note [x86-64-relative] + There is a complication on the x86_64 platform, where pointers are + 64 bits, but the tools don't support 64-bit relative relocations. + However, the default memory model (small) ensures that all symbols + have values in the lower 2Gb of the address space, so offsets all + fit in 32 bits. Hence we can use 32-bit offset fields. + + Somewhere between binutils-2.16.1 and binutils-2.16.91.0.6, + support for 64-bit PC-relative relocations was added, so maybe this + hackery can go away sometime. + ------------------------------------------------------------------------- */ + +#if defined(x86_64_HOST_ARCH) +#define OFFSET_FIELD(n) StgHalfInt n; StgHalfWord __pad_##n +#else +#define OFFSET_FIELD(n) StgInt n +#endif + +/* ----------------------------------------------------------------------------- + Profiling info + -------------------------------------------------------------------------- */ + +typedef struct { +#if !defined(TABLES_NEXT_TO_CODE) + char *closure_type; + char *closure_desc; +#else + OFFSET_FIELD(closure_type_off); + OFFSET_FIELD(closure_desc_off); +#endif +} StgProfInfo; + +/* ----------------------------------------------------------------------------- + Closure flags + -------------------------------------------------------------------------- */ + +/* The type flags provide quick access to certain properties of a closure. */ + +#define _HNF (1<<0) /* head normal form? */ +#define _BTM (1<<1) /* uses info->layout.bitmap */ +#define _NS (1<<2) /* non-sparkable */ +#define _THU (1<<3) /* thunk? */ +#define _MUT (1<<4) /* mutable? */ +#define _UPT (1<<5) /* unpointed? */ +#define _SRT (1<<6) /* has an SRT? */ +#define _IND (1<<7) /* is an indirection? */ + +#define isMUTABLE(flags) ((flags) &_MUT) +#define isBITMAP(flags) ((flags) &_BTM) +#define isTHUNK(flags) ((flags) &_THU) +#define isUNPOINTED(flags) ((flags) &_UPT) +#define hasSRT(flags) ((flags) &_SRT) + +extern StgWord16 closure_flags[]; + +#define closureFlags(c) (closure_flags[get_itbl \ + (UNTAG_CONST_CLOSURE(c))->type]) + +#define closure_HNF(c) ( closureFlags(c) & _HNF) +#define closure_BITMAP(c) ( closureFlags(c) & _BTM) +#define closure_NON_SPARK(c) ( (closureFlags(c) & _NS)) +#define closure_SHOULD_SPARK(c) (!(closureFlags(c) & _NS)) +#define closure_THUNK(c) ( closureFlags(c) & _THU) +#define closure_MUTABLE(c) ( closureFlags(c) & _MUT) +#define closure_UNPOINTED(c) ( closureFlags(c) & _UPT) +#define closure_SRT(c) ( closureFlags(c) & _SRT) +#define closure_IND(c) ( closureFlags(c) & _IND) + +/* same as above but for info-ptr rather than closure */ +#define ipFlags(ip) (closure_flags[ip->type]) + +#define ip_HNF(ip) ( ipFlags(ip) & _HNF) +#define ip_BITMAP(ip) ( ipFlags(ip) & _BTM) +#define ip_SHOULD_SPARK(ip) (!(ipFlags(ip) & _NS)) +#define ip_THUNK(ip) ( ipFlags(ip) & _THU) +#define ip_MUTABLE(ip) ( ipFlags(ip) & _MUT) +#define ip_UNPOINTED(ip) ( ipFlags(ip) & _UPT) +#define ip_SRT(ip) ( ipFlags(ip) & _SRT) +#define ip_IND(ip) ( ipFlags(ip) & _IND) + +/* ----------------------------------------------------------------------------- + Bitmaps + + These are used to describe the pointerhood of a sequence of words + (usually on the stack) to the garbage collector. The two primary + uses are for stack frames, and functions (where we need to describe + the layout of a PAP to the GC). + + In these bitmaps: 0 == ptr, 1 == non-ptr. + -------------------------------------------------------------------------- */ + +/* + * Small bitmaps: for a small bitmap, we store the size and bitmap in + * the same word, using the following macros. If the bitmap doesn't + * fit in a single word, we use a pointer to an StgLargeBitmap below. + */ +#define MK_SMALL_BITMAP(size,bits) (((bits)<<BITMAP_BITS_SHIFT) | (size)) + +#define BITMAP_SIZE(bitmap) ((bitmap) & BITMAP_SIZE_MASK) +#define BITMAP_BITS(bitmap) ((bitmap) >> BITMAP_BITS_SHIFT) + +/* + * A large bitmap. + */ +typedef struct { + StgWord size; + StgWord bitmap[]; +} StgLargeBitmap; + +/* ---------------------------------------------------------------------------- + Info Tables + ------------------------------------------------------------------------- */ + +/* + * Stuff describing the closure layout. Well, actually, it might + * contain the selector index for a THUNK_SELECTOR. This union is one + * word long. + */ +typedef union { + struct { /* Heap closure payload layout: */ + StgHalfWord ptrs; /* number of pointers */ + StgHalfWord nptrs; /* number of non-pointers */ + } payload; + + StgWord bitmap; /* word-sized bit pattern describing */ + /* a stack frame: see below */ + +#if !defined(TABLES_NEXT_TO_CODE) + StgLargeBitmap* large_bitmap; /* pointer to large bitmap structure */ +#else + OFFSET_FIELD(large_bitmap_offset); /* offset from info table to large bitmap structure */ +#endif + + StgWord selector_offset; /* used in THUNK_SELECTORs */ + +} StgClosureInfo; + + +#if defined(x86_64_HOST_ARCH) && defined(TABLES_NEXT_TO_CODE) +// On x86_64 we can fit a pointer offset in half a word, so put the SRT offset +// in the info->srt field directly. +// +// See the section "Referring to an SRT from the info table" in +// Note [SRTs] in CmmBuildInfoTables.hs +#define USE_INLINE_SRT_FIELD +#endif + +#if defined(USE_INLINE_SRT_FIELD) +// offset to the SRT / closure, or zero if there's no SRT +typedef StgHalfInt StgSRTField; +#else +// non-zero if there is an SRT, the offset is in the optional srt field. +typedef StgHalfWord StgSRTField; +#endif + + +/* + * The "standard" part of an info table. Every info table has this bit. + */ +typedef struct StgInfoTable_ { + +#if !defined(TABLES_NEXT_TO_CODE) + StgFunPtr entry; /* pointer to the entry code */ +#endif + +#if defined(PROFILING) + StgProfInfo prof; +#endif + + StgClosureInfo layout; /* closure layout info (one word) */ + + StgHalfWord type; /* closure type */ + StgSRTField srt; + /* In a CONSTR: + - the zero-based constructor tag + In a FUN/THUNK + - if USE_INLINE_SRT_FIELD + - offset to the SRT (or zero if no SRT) + - otherwise + - non-zero if there is an SRT, offset is in srt_offset + */ + +#if defined(TABLES_NEXT_TO_CODE) + StgCode code[]; +#endif +} *StgInfoTablePtr; // StgInfoTable defined in rts/Types.h + + +/* ----------------------------------------------------------------------------- + Function info tables + + This is the general form of function info tables. The compiler + will omit some of the fields in common cases: + + - If fun_type is not ARG_GEN or ARG_GEN_BIG, then the slow_apply + and bitmap fields may be left out (they are at the end, so omitting + them doesn't affect the layout). + + - If has_srt (in the std info table part) is zero, then the srt + field needn't be set. This only applies if the slow_apply and + bitmap fields have also been omitted. + -------------------------------------------------------------------------- */ + +/* + Note [Encoding static reference tables] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + As static reference tables appear frequently in code, we use a special + compact encoding for the common case of a module defining only a few CAFs: We + produce one table containing a list of CAFs in the module and then include a + bitmap in each info table describing which entries of this table the closure + references. + */ + +typedef struct StgFunInfoExtraRev_ { + OFFSET_FIELD(slow_apply_offset); /* apply to args on the stack */ + union { + StgWord bitmap; + OFFSET_FIELD(bitmap_offset); /* arg ptr/nonptr bitmap */ + } b; +#if !defined(USE_INLINE_SRT_FIELD) + OFFSET_FIELD(srt_offset); /* pointer to the SRT closure */ +#endif + StgHalfWord fun_type; /* function type */ + StgHalfWord arity; /* function arity */ +} StgFunInfoExtraRev; + +typedef struct StgFunInfoExtraFwd_ { + StgHalfWord fun_type; /* function type */ + StgHalfWord arity; /* function arity */ + StgClosure *srt; /* pointer to the SRT closure */ + union { /* union for compat. with TABLES_NEXT_TO_CODE version */ + StgWord bitmap; /* arg ptr/nonptr bitmap */ + } b; + StgFun *slow_apply; /* apply to args on the stack */ +} StgFunInfoExtraFwd; + +typedef struct { +#if defined(TABLES_NEXT_TO_CODE) + StgFunInfoExtraRev f; + StgInfoTable i; +#else + StgInfoTable i; + StgFunInfoExtraFwd f; +#endif +} StgFunInfoTable; + +// canned bitmap for each arg type, indexed by constants in FunTypes.h +extern const StgWord stg_arg_bitmaps[]; + +/* ----------------------------------------------------------------------------- + Return info tables + -------------------------------------------------------------------------- */ + +/* + * When info tables are laid out backwards, we can omit the SRT + * pointer iff has_srt is zero. + */ + +typedef struct { +#if defined(TABLES_NEXT_TO_CODE) +#if !defined(USE_INLINE_SRT_FIELD) + OFFSET_FIELD(srt_offset); /* offset to the SRT closure */ +#endif + StgInfoTable i; +#else + StgInfoTable i; + StgClosure *srt; /* pointer to the SRT closure */ +#endif +} StgRetInfoTable; + +/* ----------------------------------------------------------------------------- + Thunk info tables + -------------------------------------------------------------------------- */ + +/* + * When info tables are laid out backwards, we can omit the SRT + * pointer iff has_srt is zero. + */ + +typedef struct StgThunkInfoTable_ { +#if defined(TABLES_NEXT_TO_CODE) +#if !defined(USE_INLINE_SRT_FIELD) + OFFSET_FIELD(srt_offset); /* offset to the SRT closure */ +#endif + StgInfoTable i; +#else + StgInfoTable i; + StgClosure *srt; /* pointer to the SRT closure */ +#endif +} StgThunkInfoTable; + +/* ----------------------------------------------------------------------------- + Constructor info tables + -------------------------------------------------------------------------- */ + +typedef struct StgConInfoTable_ { +#if !defined(TABLES_NEXT_TO_CODE) + StgInfoTable i; +#endif + +#if defined(TABLES_NEXT_TO_CODE) + OFFSET_FIELD(con_desc); // the name of the data constructor + // as: Package:Module.Name +#else + char *con_desc; +#endif + +#if defined(TABLES_NEXT_TO_CODE) + StgInfoTable i; +#endif +} StgConInfoTable; + + +/* ----------------------------------------------------------------------------- + Accessor macros for fields that might be offsets (C version) + -------------------------------------------------------------------------- */ + +/* + * GET_SRT(info) + * info must be a Stg[Ret|Thunk]InfoTable* (an info table that has a SRT) + */ +#if defined(TABLES_NEXT_TO_CODE) +#if defined(x86_64_HOST_ARCH) +#define GET_SRT(info) \ + ((StgClosure*) (((StgWord) ((info)+1)) + (info)->i.srt)) +#else +#define GET_SRT(info) \ + ((StgClosure*) (((StgWord) ((info)+1)) + (info)->srt_offset)) +#endif +#else // !TABLES_NEXT_TO_CODE +#define GET_SRT(info) ((info)->srt) +#endif + +/* + * GET_CON_DESC(info) + * info must be a StgConInfoTable*. + */ +#if defined(TABLES_NEXT_TO_CODE) +#define GET_CON_DESC(info) \ + ((const char *)((StgWord)((info)+1) + ((info)->con_desc))) +#else +#define GET_CON_DESC(info) ((const char *)(info)->con_desc) +#endif + +/* + * GET_FUN_SRT(info) + * info must be a StgFunInfoTable* + */ +#if defined(TABLES_NEXT_TO_CODE) +#if defined(x86_64_HOST_ARCH) +#define GET_FUN_SRT(info) \ + ((StgClosure*) (((StgWord) ((info)+1)) + (info)->i.srt)) +#else +#define GET_FUN_SRT(info) \ + ((StgClosure*) (((StgWord) ((info)+1)) + (info)->f.srt_offset)) +#endif +#else +#define GET_FUN_SRT(info) ((info)->f.srt) +#endif + +#if defined(TABLES_NEXT_TO_CODE) +#define GET_LARGE_BITMAP(info) ((StgLargeBitmap*) (((StgWord) ((info)+1)) \ + + (info)->layout.large_bitmap_offset)) +#else +#define GET_LARGE_BITMAP(info) ((info)->layout.large_bitmap) +#endif + +#if defined(TABLES_NEXT_TO_CODE) +#define GET_FUN_LARGE_BITMAP(info) ((StgLargeBitmap*) (((StgWord) ((info)+1)) \ + + (info)->f.b.bitmap_offset)) +#else +#define GET_FUN_LARGE_BITMAP(info) ((StgLargeBitmap*) ((info)->f.b.bitmap)) +#endif + +/* + * GET_PROF_TYPE, GET_PROF_DESC + */ +#if defined(TABLES_NEXT_TO_CODE) +#define GET_PROF_TYPE(info) ((char *)((StgWord)((info)+1) + (info->prof.closure_type_off))) +#else +#define GET_PROF_TYPE(info) ((info)->prof.closure_type) +#endif +#if defined(TABLES_NEXT_TO_CODE) +#define GET_PROF_DESC(info) ((char *)((StgWord)((info)+1) + (info->prof.closure_desc_off))) +#else +#define GET_PROF_DESC(info) ((info)->prof.closure_desc) +#endif diff --git a/rts/include/rts/storage/MBlock.h b/rts/include/rts/storage/MBlock.h new file mode 100644 index 0000000000..3acefda9a0 --- /dev/null +++ b/rts/include/rts/storage/MBlock.h @@ -0,0 +1,32 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2008 + * + * MegaBlock Allocator interface. + * + * See wiki commentary at + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/heap-alloced + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +extern W_ peak_mblocks_allocated; +extern W_ mblocks_allocated; + +extern void initMBlocks(void); +extern void * getMBlock(void); +extern void * getMBlocks(uint32_t n); +extern void * getMBlockOnNode(uint32_t node); +extern void * getMBlocksOnNode(uint32_t node, uint32_t n); +extern void freeMBlocks(void *addr, uint32_t n); +extern void releaseFreeMemory(void); +extern void freeAllMBlocks(void); + +extern void *getFirstMBlock(void **state); +extern void *getNextMBlock(void **state, void *mblock); + +#if defined(THREADED_RTS) +// needed for HEAP_ALLOCED below +extern SpinLock gc_alloc_block_sync; +#endif diff --git a/rts/include/rts/storage/TSO.h b/rts/include/rts/storage/TSO.h new file mode 100644 index 0000000000..61215d9f38 --- /dev/null +++ b/rts/include/rts/storage/TSO.h @@ -0,0 +1,330 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * The definitions for Thread State Objects. + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +/* + * PROFILING info in a TSO + */ +typedef struct { + CostCentreStack *cccs; /* thread's current CCS */ +} StgTSOProfInfo; + +/* + * There is no TICKY info in a TSO at this time. + */ + +/* + * Thread IDs are 64 bits. + */ +typedef StgWord64 StgThreadID; + +#define tsoLocked(tso) ((tso)->flags & TSO_LOCKED) + +/* + * Type returned after running a thread. Values of this type + * include HeapOverflow, StackOverflow etc. See Constants.h for the + * full list. + */ +typedef unsigned int StgThreadReturnCode; + +#if defined(mingw32_HOST_OS) +/* results from an async I/O request + its request ID. */ +typedef struct { + unsigned int reqID; + int len; + int errCode; +} StgAsyncIOResult; +#endif + +/* Reason for thread being blocked. See comment above struct StgTso_. */ +typedef union { + StgClosure *closure; + StgTSO *prev; // a back-link when the TSO is on the run queue (NotBlocked) + struct MessageBlackHole_ *bh; + struct MessageThrowTo_ *throwto; + struct MessageWakeup_ *wakeup; + StgInt fd; /* StgInt instead of int, so that it's the same size as the ptrs */ +#if defined(mingw32_HOST_OS) + StgAsyncIOResult *async_result; +#endif +#if !defined(THREADED_RTS) + StgWord target; + // Only for the non-threaded RTS: the target time for a thread + // blocked in threadDelay, in units of 1ms. This is a + // compromise: we don't want to take up much space in the TSO. If + // you want better resolution for threadDelay, use -threaded. +#endif +} StgTSOBlockInfo; + + +/* + * TSOs live on the heap, and therefore look just like heap objects. + * Large TSOs will live in their own "block group" allocated by the + * storage manager, and won't be copied during garbage collection. + */ + +/* + * Threads may be blocked for several reasons. A blocked thread will + * have the reason in the why_blocked field of the TSO, and some + * further info (such as the closure the thread is blocked on, or the + * file descriptor if the thread is waiting on I/O) in the block_info + * field. + */ + +typedef struct StgTSO_ { + StgHeader header; + + /* The link field, for linking threads together in lists (e.g. the + run queue on a Capability. + */ + struct StgTSO_* _link; + /* + Currently used for linking TSOs on: + * cap->run_queue_{hd,tl} + * (non-THREADED_RTS); the blocked_queue + * and pointing to the next chunk for a ThreadOldStack + + NOTE!!! do not modify _link directly, it is subject to + a write barrier for generational GC. Instead use the + setTSOLink() function. Exceptions to this rule are: + + * setting the link field to END_TSO_QUEUE + * setting the link field of the currently running TSO, as it + will already be dirty. + */ + + struct StgTSO_* global_link; // Links threads on the + // generation->threads lists + + /* + * The thread's stack + */ + struct StgStack_ *stackobj; + + /* + * The tso->dirty flag indicates that this TSO's stack should be + * scanned during garbage collection. It also indicates that this + * TSO is on the mutable list. + * + * NB. The dirty flag gets a word to itself, so that it can be set + * safely by multiple threads simultaneously (the flags field is + * not safe for this purpose; see #3429). It is harmless for the + * TSO to be on the mutable list multiple times. + * + * tso->dirty is set by dirty_TSO(), and unset by the garbage + * collector (only). + */ + + StgWord16 what_next; // Values defined in Constants.h + StgWord16 why_blocked; // Values defined in Constants.h + StgWord32 flags; // Values defined in Constants.h + StgTSOBlockInfo block_info; + StgThreadID id; + StgWord32 saved_errno; + StgWord32 dirty; /* non-zero => dirty */ + struct InCall_* bound; + struct Capability_* cap; + + struct StgTRecHeader_ * trec; /* STM transaction record */ + + /* + * A list of threads blocked on this TSO waiting to throw exceptions. + */ + struct MessageThrowTo_ * blocked_exceptions; + + /* + * A list of StgBlockingQueue objects, representing threads + * blocked on thunks that are under evaluation by this thread. + */ + struct StgBlockingQueue_ *bq; + + /* + * The allocation limit for this thread, which is updated as the + * thread allocates. If the value drops below zero, and + * TSO_ALLOC_LIMIT is set in flags, we raise an exception in the + * thread, and give the thread a little more space to handle the + * exception before we raise the exception again. + * + * This is an integer, because we might update it in a place where + * it isn't convenient to raise the exception, so we want it to + * stay negative until we get around to checking it. + * + * Use only PK_Int64/ASSIGN_Int64 macros to get/set the value of alloc_limit + * in C code otherwise you will cause alignment issues on SPARC + */ + StgInt64 alloc_limit; /* in bytes */ + + /* + * sum of the sizes of all stack chunks (in words), used to decide + * whether to throw the StackOverflow exception when the stack + * overflows, or whether to just chain on another stack chunk. + * + * Note that this overestimates the real stack size, because each + * chunk will have a gap at the end, of +RTS -kb<size> words. + * This means stack overflows are not entirely accurate, because + * the more gaps there are, the sooner the stack will run into the + * hard +RTS -K<size> limit. + */ + StgWord32 tot_stack_size; + +#if defined(TICKY_TICKY) + /* TICKY-specific stuff would go here. */ +#endif +#if defined(PROFILING) + StgTSOProfInfo prof; +#endif +#if defined(mingw32_HOST_OS) + StgWord32 saved_winerror; +#endif + +} *StgTSOPtr; // StgTSO defined in rts/Types.h + +/* Note [StgStack dirtiness flags and concurrent marking] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Without concurrent collection by the nonmoving collector the stack dirtiness story + * is quite simple: The stack is either STACK_DIRTY (meaning it has been added to mut_list) + * or not. + * + * However, things are considerably more complicated with concurrent collection + * (namely, when nonmoving_write_barrier_enabled is set): In addition to adding + * the stack to mut_list and flagging it as STACK_DIRTY, we also must ensure + * that stacks are marked in accordance with the nonmoving collector's snapshot + * invariant. This is: every stack alive at the time the snapshot is taken must + * be marked at some point after the moment the snapshot is taken and before it + * is mutated or the commencement of the sweep phase. + * + * This marking may be done by the concurrent mark phase (in the case of a + * thread that never runs during the concurrent mark) or by the mutator when + * dirtying the stack. However, it is unsafe for the concurrent collector to + * traverse the stack while it is under mutation. Consequently, the following + * handshake is obeyed by the mutator's write barrier and the concurrent mark to + * ensure this doesn't happen: + * + * 1. The entity seeking to mark first checks that the stack lives in the nonmoving + * generation; if not then the stack was not alive at the time the snapshot + * was taken and therefore we need not mark it. + * + * 2. The entity seeking to mark checks the stack's mark bit. If it is set then + * no mark is necessary. + * + * 3. The entity seeking to mark tries to lock the stack for marking by + * atomically setting its `marking` field to the current non-moving mark + * epoch: + * + * a. If the mutator finds the concurrent collector has already locked the + * stack then it waits until it is finished (indicated by the mark bit + * being set) before proceeding with execution. + * + * b. If the concurrent collector finds that the mutator has locked the stack + * then it moves on, leaving the mutator to mark it. There is no need to wait; + * the mark is guaranteed to finish before sweep due to the post-mark + * synchronization with mutators. + * + * c. Whoever succeeds in locking the stack is responsible for marking it and + * setting the stack's mark bit (either the BF_MARKED bit for large objects + * or otherwise its bit in its segment's mark bitmap). + * + * To ensure that mutation does not proceed until the stack is fully marked the + * mark phase must not set the mark bit until it has finished tracing. + * + */ + +#define STACK_DIRTY 1 +// used by sanity checker to verify that all dirty stacks are on the mutable list +#define STACK_SANE 64 + +typedef struct StgStack_ { + StgHeader header; + + /* Size of the `stack` field in *words*. This is not affected by how much of + * the stack space is used, nor if more stack space is linked to by an + * UNDERFLOW_FRAME. + */ + StgWord32 stack_size; + + StgWord8 dirty; // non-zero => dirty + StgWord8 marking; // non-zero => someone is currently marking the stack + + /* Pointer to the "top" of the stack i.e. the most recently written address. + * The stack is filled downwards, so the "top" of the stack starts with `sp + * = stack + stack_size` and is decremented as the stack fills with data. + * See comment on "Invariants" below. + */ + StgPtr sp; + StgWord stack[]; +} StgStack; + +// Calculate SpLim from a TSO (reads tso->stackobj, but no fields from +// the stackobj itself). +INLINE_HEADER StgPtr tso_SpLim (StgTSO* tso) +{ + return tso->stackobj->stack + RESERVED_STACK_WORDS; +} + +/* ----------------------------------------------------------------------------- + functions + -------------------------------------------------------------------------- */ + +void dirty_TSO (Capability *cap, StgTSO *tso); +void setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target); +void setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target); + +void dirty_STACK (Capability *cap, StgStack *stack); + +/* ----------------------------------------------------------------------------- + Invariants: + + An active thread has the following properties: + + tso->stack < tso->sp < tso->stack+tso->stack_size + tso->stack_size <= tso->max_stack_size + + RESERVED_STACK_WORDS is large enough for any heap-check or + stack-check failure. + + The size of the TSO struct plus the stack is either + (a) smaller than a block, or + (b) a multiple of BLOCK_SIZE + + tso->why_blocked tso->block_info location + ---------------------------------------------------------------------- + NotBlocked END_TSO_QUEUE runnable_queue, or running + + BlockedOnBlackHole MessageBlackHole * TSO->bq + + BlockedOnMVar the MVAR the MVAR's queue + BlockedOnIOCompletion the PortEVent the IOCP's queue + + BlockedOnSTM END_TSO_QUEUE STM wait queue(s) + BlockedOnSTM STM_AWOKEN run queue + + BlockedOnMsgThrowTo MessageThrowTo * TSO->blocked_exception + + BlockedOnRead NULL blocked_queue + BlockedOnWrite NULL blocked_queue + BlockedOnDelay NULL blocked_queue + + tso->link == END_TSO_QUEUE, if the thread is currently running. + + A zombie thread has the following properties: + + tso->what_next == ThreadComplete or ThreadKilled + tso->link == (could be on some queue somewhere) + tso->sp == tso->stack + tso->stack_size - 1 (i.e. top stack word) + tso->sp[0] == return value of thread, if what_next == ThreadComplete, + exception , if what_next == ThreadKilled + + (tso->sp is left pointing at the top word on the stack so that + the return value or exception will be retained by a GC). + + ---------------------------------------------------------------------------- */ + +/* this is the NIL ptr for a TSO queue (e.g. runnable queue) */ +#define END_TSO_QUEUE ((StgTSO *)(void*)&stg_END_TSO_QUEUE_closure) diff --git a/rts/include/stg/DLL.h b/rts/include/stg/DLL.h new file mode 100644 index 0000000000..11902fc239 --- /dev/null +++ b/rts/include/stg/DLL.h @@ -0,0 +1,72 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * Support for Windows DLLs. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#if defined(COMPILING_WINDOWS_DLL) +# if defined(x86_64_HOST_ARCH) +# define DLL_IMPORT_DATA_REF(x) (__imp_##x) +# define DLL_IMPORT_DATA_VARNAME(x) *__imp_##x +# else +# define DLL_IMPORT_DATA_REF(x) (_imp__##x) +# define DLL_IMPORT_DATA_VARNAME(x) *_imp__##x +# endif +# if __GNUC__ && !defined(__declspec) +# define DLLIMPORT +# else +# define DLLIMPORT __declspec(dllimport) +# if defined(x86_64_HOST_ARCH) +# define DLLIMPORT_DATA(x) __imp_##x +# else +# define DLLIMPORT_DATA(x) _imp__##x +# endif +# endif +#else +# define DLL_IMPORT_DATA_REF(x) (&(x)) +# define DLL_IMPORT_DATA_VARNAME(x) x +# define DLLIMPORT +#endif + +/* The view of the rts/include/ header files differ ever so + slightly depending on whether the RTS is being compiled + or not - so we're forced to distinguish between two. + [oh, you want details :) : Data symbols defined by the RTS + have to be accessed through an extra level of indirection + when compiling generated .hc code compared to when the RTS + sources are being processed. This is only the case when + using Win32 DLLs. ] +*/ +#if defined(COMPILING_RTS) +#define DLL_IMPORT DLLIMPORT +#define DLL_IMPORT_RTS +#define DLL_IMPORT_DATA_VAR(x) x +#else +#define DLL_IMPORT +#define DLL_IMPORT_RTS DLLIMPORT +# if defined(COMPILING_WINDOWS_DLL) +# if defined(x86_64_HOST_ARCH) +# define DLL_IMPORT_DATA_VAR(x) __imp_##x +# else +# define DLL_IMPORT_DATA_VAR(x) _imp__##x +# endif +# else +# define DLL_IMPORT_DATA_VAR(x) x +# endif +#endif + + +#if defined(COMPILING_STDLIB) +#define DLL_IMPORT_STDLIB +#else +#define DLL_IMPORT_STDLIB DLLIMPORT +#endif diff --git a/rts/include/stg/MachRegs.h b/rts/include/stg/MachRegs.h new file mode 100644 index 0000000000..d50969b66a --- /dev/null +++ b/rts/include/stg/MachRegs.h @@ -0,0 +1,854 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2014 + * + * Registers used in STG code. Might or might not correspond to + * actual machine registers. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +/* This file is #included into Haskell code in the compiler: #defines + * only in here please. + */ + +/* + * Undefine these as a precaution: some of them were found to be + * defined by system headers on ARM/Linux. + */ +#undef REG_R1 +#undef REG_R2 +#undef REG_R3 +#undef REG_R4 +#undef REG_R5 +#undef REG_R6 +#undef REG_R7 +#undef REG_R8 +#undef REG_R9 +#undef REG_R10 + +/* + * Defining MACHREGS_NO_REGS to 1 causes no global registers to be used. + * MACHREGS_NO_REGS is typically controlled by NO_REGS, which is + * typically defined by GHC, via a command-line option passed to gcc, + * when the -funregisterised flag is given. + * + * NB. When MACHREGS_NO_REGS to 1, calling & return conventions may be + * different. For example, all function arguments will be passed on + * the stack, and components of an unboxed tuple will be returned on + * the stack rather than in registers. + */ +#if MACHREGS_NO_REGS == 1 + +/* Nothing */ + +#elif MACHREGS_NO_REGS == 0 + +/* ---------------------------------------------------------------------------- + Caller saves and callee-saves regs. + + Caller-saves regs have to be saved around C-calls made from STG + land, so this file defines CALLER_SAVES_<reg> for each <reg> that + is designated caller-saves in that machine's C calling convention. + + As it stands, the only registers that are ever marked caller saves + are the RX, FX, DX and USER registers; as a result, if you + decide to caller save a system register (e.g. SP, HP, etc), note that + this code path is completely untested! -- EZY + + See Note [Register parameter passing] for details. + -------------------------------------------------------------------------- */ + +/* ----------------------------------------------------------------------------- + The x86 register mapping + + Ok, we've only got 6 general purpose registers, a frame pointer and a + stack pointer. \tr{%eax} and \tr{%edx} are return values from C functions, + hence they get trashed across ccalls and are caller saves. \tr{%ebx}, + \tr{%esi}, \tr{%edi}, \tr{%ebp} are all callee-saves. + + Reg STG-Reg + --------------- + ebx Base + ebp Sp + esi R1 + edi Hp + + Leaving SpLim out of the picture. + -------------------------------------------------------------------------- */ + +#if defined(MACHREGS_i386) + +#define REG(x) __asm__("%" #x) + +#if !defined(not_doing_dynamic_linking) +#define REG_Base ebx +#endif +#define REG_Sp ebp + +#if !defined(STOLEN_X86_REGS) +#define STOLEN_X86_REGS 4 +#endif + +#if STOLEN_X86_REGS >= 3 +# define REG_R1 esi +#endif + +#if STOLEN_X86_REGS >= 4 +# define REG_Hp edi +#endif +#define REG_MachSp esp + +#define REG_XMM1 xmm0 +#define REG_XMM2 xmm1 +#define REG_XMM3 xmm2 +#define REG_XMM4 xmm3 + +#define REG_YMM1 ymm0 +#define REG_YMM2 ymm1 +#define REG_YMM3 ymm2 +#define REG_YMM4 ymm3 + +#define REG_ZMM1 zmm0 +#define REG_ZMM2 zmm1 +#define REG_ZMM3 zmm2 +#define REG_ZMM4 zmm3 + +#define MAX_REAL_VANILLA_REG 1 /* always, since it defines the entry conv */ +#define MAX_REAL_FLOAT_REG 0 +#define MAX_REAL_DOUBLE_REG 0 +#define MAX_REAL_LONG_REG 0 +#define MAX_REAL_XMM_REG 4 +#define MAX_REAL_YMM_REG 4 +#define MAX_REAL_ZMM_REG 4 + +/* ----------------------------------------------------------------------------- + The x86-64 register mapping + + %rax caller-saves, don't steal this one + %rbx YES + %rcx arg reg, caller-saves + %rdx arg reg, caller-saves + %rsi arg reg, caller-saves + %rdi arg reg, caller-saves + %rbp YES (our *prime* register) + %rsp (unavailable - stack pointer) + %r8 arg reg, caller-saves + %r9 arg reg, caller-saves + %r10 caller-saves + %r11 caller-saves + %r12 YES + %r13 YES + %r14 YES + %r15 YES + + %xmm0-7 arg regs, caller-saves + %xmm8-15 caller-saves + + Use the caller-saves regs for Rn, because we don't always have to + save those (as opposed to Sp/Hp/SpLim etc. which always have to be + saved). + + --------------------------------------------------------------------------- */ + +#elif defined(MACHREGS_x86_64) + +#define REG(x) __asm__("%" #x) + +#define REG_Base r13 +#define REG_Sp rbp +#define REG_Hp r12 +#define REG_R1 rbx +#define REG_R2 r14 +#define REG_R3 rsi +#define REG_R4 rdi +#define REG_R5 r8 +#define REG_R6 r9 +#define REG_SpLim r15 +#define REG_MachSp rsp + +/* +Map both Fn and Dn to register xmmn so that we can pass a function any +combination of up to six Float# or Double# arguments without touching +the stack. See Note [Overlapping global registers] for implications. +*/ + +#define REG_F1 xmm1 +#define REG_F2 xmm2 +#define REG_F3 xmm3 +#define REG_F4 xmm4 +#define REG_F5 xmm5 +#define REG_F6 xmm6 + +#define REG_D1 xmm1 +#define REG_D2 xmm2 +#define REG_D3 xmm3 +#define REG_D4 xmm4 +#define REG_D5 xmm5 +#define REG_D6 xmm6 + +#define REG_XMM1 xmm1 +#define REG_XMM2 xmm2 +#define REG_XMM3 xmm3 +#define REG_XMM4 xmm4 +#define REG_XMM5 xmm5 +#define REG_XMM6 xmm6 + +#define REG_YMM1 ymm1 +#define REG_YMM2 ymm2 +#define REG_YMM3 ymm3 +#define REG_YMM4 ymm4 +#define REG_YMM5 ymm5 +#define REG_YMM6 ymm6 + +#define REG_ZMM1 zmm1 +#define REG_ZMM2 zmm2 +#define REG_ZMM3 zmm3 +#define REG_ZMM4 zmm4 +#define REG_ZMM5 zmm5 +#define REG_ZMM6 zmm6 + +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_R3 +#define CALLER_SAVES_R4 +#endif +#define CALLER_SAVES_R5 +#define CALLER_SAVES_R6 + +#define CALLER_SAVES_F1 +#define CALLER_SAVES_F2 +#define CALLER_SAVES_F3 +#define CALLER_SAVES_F4 +#define CALLER_SAVES_F5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_F6 +#endif + +#define CALLER_SAVES_D1 +#define CALLER_SAVES_D2 +#define CALLER_SAVES_D3 +#define CALLER_SAVES_D4 +#define CALLER_SAVES_D5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_D6 +#endif + +#define CALLER_SAVES_XMM1 +#define CALLER_SAVES_XMM2 +#define CALLER_SAVES_XMM3 +#define CALLER_SAVES_XMM4 +#define CALLER_SAVES_XMM5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_XMM6 +#endif + +#define CALLER_SAVES_YMM1 +#define CALLER_SAVES_YMM2 +#define CALLER_SAVES_YMM3 +#define CALLER_SAVES_YMM4 +#define CALLER_SAVES_YMM5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_YMM6 +#endif + +#define CALLER_SAVES_ZMM1 +#define CALLER_SAVES_ZMM2 +#define CALLER_SAVES_ZMM3 +#define CALLER_SAVES_ZMM4 +#define CALLER_SAVES_ZMM5 +#if !defined(mingw32_HOST_OS) +#define CALLER_SAVES_ZMM6 +#endif + +#define MAX_REAL_VANILLA_REG 6 +#define MAX_REAL_FLOAT_REG 6 +#define MAX_REAL_DOUBLE_REG 6 +#define MAX_REAL_LONG_REG 0 +#define MAX_REAL_XMM_REG 6 +#define MAX_REAL_YMM_REG 6 +#define MAX_REAL_ZMM_REG 6 + +/* ----------------------------------------------------------------------------- + The PowerPC register mapping + + 0 system glue? (caller-save, volatile) + 1 SP (callee-save, non-volatile) + 2 AIX, powerpc64-linux: + RTOC (a strange special case) + powerpc32-linux: + reserved for use by system + + 3-10 args/return (caller-save, volatile) + 11,12 system glue? (caller-save, volatile) + 13 on 64-bit: reserved for thread state pointer + on 32-bit: (callee-save, non-volatile) + 14-31 (callee-save, non-volatile) + + f0 (caller-save, volatile) + f1-f13 args/return (caller-save, volatile) + f14-f31 (callee-save, non-volatile) + + \tr{14}--\tr{31} are wonderful callee-save registers on all ppc OSes. + \tr{0}--\tr{12} are caller-save registers. + + \tr{%f14}--\tr{%f31} are callee-save floating-point registers. + + We can do the Whole Business with callee-save registers only! + -------------------------------------------------------------------------- */ + +#elif defined(MACHREGS_powerpc) + +#define REG(x) __asm__(#x) + +#define REG_R1 r14 +#define REG_R2 r15 +#define REG_R3 r16 +#define REG_R4 r17 +#define REG_R5 r18 +#define REG_R6 r19 +#define REG_R7 r20 +#define REG_R8 r21 +#define REG_R9 r22 +#define REG_R10 r23 + +#define REG_F1 fr14 +#define REG_F2 fr15 +#define REG_F3 fr16 +#define REG_F4 fr17 +#define REG_F5 fr18 +#define REG_F6 fr19 + +#define REG_D1 fr20 +#define REG_D2 fr21 +#define REG_D3 fr22 +#define REG_D4 fr23 +#define REG_D5 fr24 +#define REG_D6 fr25 + +#define REG_Sp r24 +#define REG_SpLim r25 +#define REG_Hp r26 +#define REG_Base r27 + +#define MAX_REAL_FLOAT_REG 6 +#define MAX_REAL_DOUBLE_REG 6 + +/* ----------------------------------------------------------------------------- + The Sun SPARC register mapping + + !! IMPORTANT: if you change this register mapping you must also update + compiler/GHC/CmmToAsm/SPARC/Regs.hs. That file handles the + mapping for the NCG. This one only affects via-c code. + + The SPARC register (window) story: Remember, within the Haskell + Threaded World, we essentially ``shut down'' the register-window + mechanism---the window doesn't move at all while in this World. It + *does* move, of course, if we call out to arbitrary~C... + + The %i, %l, and %o registers (8 each) are the input, local, and + output registers visible in one register window. The 8 %g (global) + registers are visible all the time. + + zero: always zero + scratch: volatile across C-fn calls. used by linker. + app: usable by application + system: reserved for system + + alloc: allocated to in the register allocator, intra-closure only + + GHC usage v8 ABI v9 ABI + Global + %g0 zero zero zero + %g1 alloc scratch scrach + %g2 alloc app app + %g3 alloc app app + %g4 alloc app scratch + %g5 system scratch + %g6 system system + %g7 system system + + Output: can be zapped by callee + %o0-o5 alloc caller saves + %o6 C stack ptr + %o7 C ret addr + + Local: maintained by register windowing mechanism + %l0 alloc + %l1 R1 + %l2 R2 + %l3 R3 + %l4 R4 + %l5 R5 + %l6 alloc + %l7 alloc + + Input + %i0 Sp + %i1 Base + %i2 SpLim + %i3 Hp + %i4 alloc + %i5 R6 + %i6 C frame ptr + %i7 C ret addr + + The paired nature of the floating point registers causes complications for + the native code generator. For convenience, we pretend that the first 22 + fp regs %f0 .. %f21 are actually 11 double regs, and the remaining 10 are + float (single) regs. The NCG acts accordingly. That means that the + following FP assignment is rather fragile, and should only be changed + with extreme care. The current scheme is: + + %f0 /%f1 FP return from C + %f2 /%f3 D1 + %f4 /%f5 D2 + %f6 /%f7 ncg double spill tmp #1 + %f8 /%f9 ncg double spill tmp #2 + %f10/%f11 allocatable + %f12/%f13 allocatable + %f14/%f15 allocatable + %f16/%f17 allocatable + %f18/%f19 allocatable + %f20/%f21 allocatable + + %f22 F1 + %f23 F2 + %f24 F3 + %f25 F4 + %f26 ncg single spill tmp #1 + %f27 ncg single spill tmp #2 + %f28 allocatable + %f29 allocatable + %f30 allocatable + %f31 allocatable + + -------------------------------------------------------------------------- */ + +#elif defined(MACHREGS_sparc) + +#define REG(x) __asm__("%" #x) + +#define CALLER_SAVES_USER + +#define CALLER_SAVES_F1 +#define CALLER_SAVES_F2 +#define CALLER_SAVES_F3 +#define CALLER_SAVES_F4 +#define CALLER_SAVES_D1 +#define CALLER_SAVES_D2 + +#define REG_R1 l1 +#define REG_R2 l2 +#define REG_R3 l3 +#define REG_R4 l4 +#define REG_R5 l5 +#define REG_R6 i5 + +#define REG_F1 f22 +#define REG_F2 f23 +#define REG_F3 f24 +#define REG_F4 f25 + +/* for each of the double arg regs, + Dn_2 is the high half. */ + +#define REG_D1 f2 +#define REG_D1_2 f3 + +#define REG_D2 f4 +#define REG_D2_2 f5 + +#define REG_Sp i0 +#define REG_SpLim i2 + +#define REG_Hp i3 + +#define REG_Base i1 + +#define NCG_FirstFloatReg f22 + +/* ----------------------------------------------------------------------------- + The ARM EABI register mapping + + Here we consider ARM mode (i.e. 32bit isns) + and also CPU with full VFPv3 implementation + + ARM registers (see Chapter 5.1 in ARM IHI 0042D and + Section 9.2.2 in ARM Software Development Toolkit Reference Guide) + + r15 PC The Program Counter. + r14 LR The Link Register. + r13 SP The Stack Pointer. + r12 IP The Intra-Procedure-call scratch register. + r11 v8/fp Variable-register 8. + r10 v7/sl Variable-register 7. + r9 v6/SB/TR Platform register. The meaning of this register is + defined by the platform standard. + r8 v5 Variable-register 5. + r7 v4 Variable register 4. + r6 v3 Variable register 3. + r5 v2 Variable register 2. + r4 v1 Variable register 1. + r3 a4 Argument / scratch register 4. + r2 a3 Argument / scratch register 3. + r1 a2 Argument / result / scratch register 2. + r0 a1 Argument / result / scratch register 1. + + VFPv2/VFPv3/NEON registers + s0-s15/d0-d7/q0-q3 Argument / result/ scratch registers + s16-s31/d8-d15/q4-q7 callee-saved registers (must be preserved across + subroutine calls) + + VFPv3/NEON registers (added to the VFPv2 registers set) + d16-d31/q8-q15 Argument / result/ scratch registers + ----------------------------------------------------------------------------- */ + +#elif defined(MACHREGS_arm) + +#define REG(x) __asm__(#x) + +#define REG_Base r4 +#define REG_Sp r5 +#define REG_Hp r6 +#define REG_R1 r7 +#define REG_R2 r8 +#define REG_R3 r9 +#define REG_R4 r10 +#define REG_SpLim r11 + +#if !defined(arm_HOST_ARCH_PRE_ARMv6) +/* d8 */ +#define REG_F1 s16 +#define REG_F2 s17 +/* d9 */ +#define REG_F3 s18 +#define REG_F4 s19 + +#define REG_D1 d10 +#define REG_D2 d11 +#endif + +/* ----------------------------------------------------------------------------- + The ARMv8/AArch64 ABI register mapping + + The AArch64 provides 31 64-bit general purpose registers + and 32 128-bit SIMD/floating point registers. + + General purpose registers (see Chapter 5.1.1 in ARM IHI 0055B) + + Register | Special | Role in the procedure call standard + ---------+---------+------------------------------------ + SP | | The Stack Pointer + r30 | LR | The Link Register + r29 | FP | The Frame Pointer + r19-r28 | | Callee-saved registers + r18 | | The Platform Register, if needed; + | | or temporary register + r17 | IP1 | The second intra-procedure-call temporary register + r16 | IP0 | The first intra-procedure-call scratch register + r9-r15 | | Temporary registers + r8 | | Indirect result location register + r0-r7 | | Parameter/result registers + + + FPU/SIMD registers + + s/d/q/v0-v7 Argument / result/ scratch registers + s/d/q/v8-v15 callee-saved registers (must be preserved across subroutine calls, + but only bottom 64-bit value needs to be preserved) + s/d/q/v16-v31 temporary registers + + ----------------------------------------------------------------------------- */ + +#elif defined(MACHREGS_aarch64) + +#define REG(x) __asm__(#x) + +#define REG_Base r19 +#define REG_Sp r20 +#define REG_Hp r21 +#define REG_R1 r22 +#define REG_R2 r23 +#define REG_R3 r24 +#define REG_R4 r25 +#define REG_R5 r26 +#define REG_R6 r27 +#define REG_SpLim r28 + +#define REG_F1 s8 +#define REG_F2 s9 +#define REG_F3 s10 +#define REG_F4 s11 + +#define REG_D1 d12 +#define REG_D2 d13 +#define REG_D3 d14 +#define REG_D4 d15 + +/* ----------------------------------------------------------------------------- + The s390x register mapping + + Register | Role(s) | Call effect + ------------+-------------------------------------+----------------- + r0,r1 | - | caller-saved + r2 | Argument / return value | caller-saved + r3,r4,r5 | Arguments | caller-saved + r6 | Argument | callee-saved + r7...r11 | - | callee-saved + r12 | (Commonly used as GOT pointer) | callee-saved + r13 | (Commonly used as literal pool pointer) | callee-saved + r14 | Return address | caller-saved + r15 | Stack pointer | callee-saved + f0 | Argument / return value | caller-saved + f2,f4,f6 | Arguments | caller-saved + f1,f3,f5,f7 | - | caller-saved + f8...f15 | - | callee-saved + v0...v31 | - | caller-saved + + Each general purpose register r0 through r15 as well as each floating-point + register f0 through f15 is 64 bits wide. Each vector register v0 through v31 + is 128 bits wide. + + Note, the vector registers v0 through v15 overlap with the floating-point + registers f0 through f15. + + -------------------------------------------------------------------------- */ + +#elif defined(MACHREGS_s390x) + +#define REG(x) __asm__("%" #x) + +#define REG_Base r7 +#define REG_Sp r8 +#define REG_Hp r10 +#define REG_R1 r11 +#define REG_R2 r12 +#define REG_R3 r13 +#define REG_R4 r6 +#define REG_R5 r2 +#define REG_R6 r3 +#define REG_R7 r4 +#define REG_R8 r5 +#define REG_SpLim r9 +#define REG_MachSp r15 + +#define REG_F1 f8 +#define REG_F2 f9 +#define REG_F3 f10 +#define REG_F4 f11 +#define REG_F5 f0 +#define REG_F6 f1 + +#define REG_D1 f12 +#define REG_D2 f13 +#define REG_D3 f14 +#define REG_D4 f15 +#define REG_D5 f2 +#define REG_D6 f3 + +#define CALLER_SAVES_R5 +#define CALLER_SAVES_R6 +#define CALLER_SAVES_R7 +#define CALLER_SAVES_R8 + +#define CALLER_SAVES_F5 +#define CALLER_SAVES_F6 + +#define CALLER_SAVES_D5 +#define CALLER_SAVES_D6 + +/* ----------------------------------------------------------------------------- + The riscv64 register mapping + + Register | Role(s) | Call effect + ------------+-----------------------------------------+------------- + zero | Hard-wired zero | - + ra | Return address | caller-saved + sp | Stack pointer | callee-saved + gp | Global pointer | callee-saved + tp | Thread pointer | callee-saved + t0,t1,t2 | - | caller-saved + s0 | Frame pointer | callee-saved + s1 | - | callee-saved + a0,a1 | Arguments / return values | caller-saved + a2..a7 | Arguments | caller-saved + s2..s11 | - | callee-saved + t3..t6 | - | caller-saved + ft0..ft7 | - | caller-saved + fs0,fs1 | - | callee-saved + fa0,fa1 | Arguments / return values | caller-saved + fa2..fa7 | Arguments | caller-saved + fs2..fs11 | - | callee-saved + ft8..ft11 | - | caller-saved + + Each general purpose register as well as each floating-point + register is 64 bits wide. + + -------------------------------------------------------------------------- */ + +#elif defined(MACHREGS_riscv64) + +#define REG(x) __asm__(#x) + +#define REG_Base s1 +#define REG_Sp s2 +#define REG_Hp s3 +#define REG_R1 s4 +#define REG_R2 s5 +#define REG_R3 s6 +#define REG_R4 s7 +#define REG_R5 s8 +#define REG_R6 s9 +#define REG_R7 s10 +#define REG_SpLim s11 + +#define REG_F1 fs0 +#define REG_F2 fs1 +#define REG_F3 fs2 +#define REG_F4 fs3 +#define REG_F5 fs4 +#define REG_F6 fs5 + +#define REG_D1 fs6 +#define REG_D2 fs7 +#define REG_D3 fs8 +#define REG_D4 fs9 +#define REG_D5 fs10 +#define REG_D6 fs11 + +#define MAX_REAL_FLOAT_REG 6 +#define MAX_REAL_DOUBLE_REG 6 + +#else + +#error Cannot find platform to give register info for + +#endif + +#else + +#error Bad MACHREGS_NO_REGS value + +#endif + +/* ----------------------------------------------------------------------------- + * These constants define how many stg registers will be used for + * passing arguments (and results, in the case of an unboxed-tuple + * return). + * + * We usually set MAX_REAL_VANILLA_REG and co. to be the number of the + * highest STG register to occupy a real machine register, otherwise + * the calling conventions will needlessly shuffle data between the + * stack and memory-resident STG registers. We might occasionally + * set these macros to other values for testing, though. + * + * Registers above these values might still be used, for instance to + * communicate with PrimOps and RTS functions. + */ + +#if !defined(MAX_REAL_VANILLA_REG) +# if defined(REG_R10) +# define MAX_REAL_VANILLA_REG 10 +# elif defined(REG_R9) +# define MAX_REAL_VANILLA_REG 9 +# elif defined(REG_R8) +# define MAX_REAL_VANILLA_REG 8 +# elif defined(REG_R7) +# define MAX_REAL_VANILLA_REG 7 +# elif defined(REG_R6) +# define MAX_REAL_VANILLA_REG 6 +# elif defined(REG_R5) +# define MAX_REAL_VANILLA_REG 5 +# elif defined(REG_R4) +# define MAX_REAL_VANILLA_REG 4 +# elif defined(REG_R3) +# define MAX_REAL_VANILLA_REG 3 +# elif defined(REG_R2) +# define MAX_REAL_VANILLA_REG 2 +# elif defined(REG_R1) +# define MAX_REAL_VANILLA_REG 1 +# else +# define MAX_REAL_VANILLA_REG 0 +# endif +#endif + +#if !defined(MAX_REAL_FLOAT_REG) +# if defined(REG_F7) +# error Please manually define MAX_REAL_FLOAT_REG for this architecture +# elif defined(REG_F6) +# define MAX_REAL_FLOAT_REG 6 +# elif defined(REG_F5) +# define MAX_REAL_FLOAT_REG 5 +# elif defined(REG_F4) +# define MAX_REAL_FLOAT_REG 4 +# elif defined(REG_F3) +# define MAX_REAL_FLOAT_REG 3 +# elif defined(REG_F2) +# define MAX_REAL_FLOAT_REG 2 +# elif defined(REG_F1) +# define MAX_REAL_FLOAT_REG 1 +# else +# define MAX_REAL_FLOAT_REG 0 +# endif +#endif + +#if !defined(MAX_REAL_DOUBLE_REG) +# if defined(REG_D7) +# error Please manually define MAX_REAL_DOUBLE_REG for this architecture +# elif defined(REG_D6) +# define MAX_REAL_DOUBLE_REG 6 +# elif defined(REG_D5) +# define MAX_REAL_DOUBLE_REG 5 +# elif defined(REG_D4) +# define MAX_REAL_DOUBLE_REG 4 +# elif defined(REG_D3) +# define MAX_REAL_DOUBLE_REG 3 +# elif defined(REG_D2) +# define MAX_REAL_DOUBLE_REG 2 +# elif defined(REG_D1) +# define MAX_REAL_DOUBLE_REG 1 +# else +# define MAX_REAL_DOUBLE_REG 0 +# endif +#endif + +#if !defined(MAX_REAL_LONG_REG) +# if defined(REG_L1) +# define MAX_REAL_LONG_REG 1 +# else +# define MAX_REAL_LONG_REG 0 +# endif +#endif + +#if !defined(MAX_REAL_XMM_REG) +# if defined(REG_XMM6) +# define MAX_REAL_XMM_REG 6 +# elif defined(REG_XMM5) +# define MAX_REAL_XMM_REG 5 +# elif defined(REG_XMM4) +# define MAX_REAL_XMM_REG 4 +# elif defined(REG_XMM3) +# define MAX_REAL_XMM_REG 3 +# elif defined(REG_XMM2) +# define MAX_REAL_XMM_REG 2 +# elif defined(REG_XMM1) +# define MAX_REAL_XMM_REG 1 +# else +# define MAX_REAL_XMM_REG 0 +# endif +#endif + +/* define NO_ARG_REGS if we have no argument registers at all (we can + * optimise certain code paths using this predicate). + */ +#if MAX_REAL_VANILLA_REG < 2 +#define NO_ARG_REGS +#else +#undef NO_ARG_REGS +#endif diff --git a/rts/include/stg/MachRegsForHost.h b/rts/include/stg/MachRegsForHost.h new file mode 100644 index 0000000000..e902d528f6 --- /dev/null +++ b/rts/include/stg/MachRegsForHost.h @@ -0,0 +1,80 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2011 + * + * This header includes MachRegs.h "selecting" regs for the current host + * platform. + * + * Don't #include this in the RTS directly, instead include "RTS.h". + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#if defined(UnregisterisedCompiler) +#if !defined(NO_REGS) +#define NO_REGS +#endif +#endif + +/* + * Defining NO_REGS causes no global registers to be used. NO_REGS is + * typically defined by GHC, via a command-line option passed to gcc, + * when the -funregisterised flag is given. + * + * NB. When NO_REGS is on, calling & return conventions may be + * different. For example, all function arguments will be passed on + * the stack, and components of an unboxed tuple will be returned on + * the stack rather than in registers. + */ +#if defined(NO_REGS) + +#define MACHREGS_NO_REGS 1 + +#else + +#define MACHREGS_NO_REGS 0 + +#if defined(i386_HOST_ARCH) +#define MACHREGS_i386 1 +#endif + +#if defined(x86_64_HOST_ARCH) +#define MACHREGS_x86_64 1 +#endif + +#if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \ + || defined(powerpc64le_HOST_ARCH) || defined(rs6000_HOST_ARCH) +#define MACHREGS_powerpc 1 +#endif + +#if defined(sparc_HOST_ARCH) +#define MACHREGS_sparc 1 +#endif + +#if defined(arm_HOST_ARCH) +#define MACHREGS_arm 1 +#endif + +#if defined(aarch64_HOST_ARCH) +#define MACHREGS_aarch64 1 +#endif + +#if defined(darwin_HOST_OS) +#define MACHREGS_darwin 1 +#endif + +#if defined(s390x_HOST_ARCH) +#define MACHREGS_s390x 1 +#endif + +#if defined(riscv64_HOST_ARCH) +#define MACHREGS_riscv64 1 +#endif + +#endif + +#include "MachRegs.h" diff --git a/rts/include/stg/MiscClosures.h b/rts/include/stg/MiscClosures.h new file mode 100644 index 0000000000..c4cb7d45a4 --- /dev/null +++ b/rts/include/stg/MiscClosures.h @@ -0,0 +1,639 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2009 + * + * Declarations for various symbols exported by the RTS. + * + * ToDo: many of the symbols in here don't need to be exported, but + * our Cmm code generator doesn't know how to generate local symbols + * for the RTS bits (it assumes all RTS symbols are external). + * + * See wiki:commentary/compiler/backends/ppr-c#prototypes + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * --------------------------------------------------------------------------*/ + +#pragma once + +#if IN_STG_CODE +# define RTS_RET_INFO(i) extern const W_(i)[] +# define RTS_FUN_INFO(i) extern const W_(i)[] +# define RTS_THUNK_INFO(i) extern const W_(i)[] +# define RTS_INFO(i) extern const W_(i)[] +# define RTS_CLOSURE(i) extern W_(i)[] +# define RTS_FUN_DECL(f) extern DLL_IMPORT_RTS StgFunPtr f(void) +#else +# define RTS_RET_INFO(i) extern DLL_IMPORT_RTS const StgRetInfoTable i +# define RTS_FUN_INFO(i) extern DLL_IMPORT_RTS const StgFunInfoTable i +# define RTS_THUNK_INFO(i) extern DLL_IMPORT_RTS const StgThunkInfoTable i +# define RTS_INFO(i) extern DLL_IMPORT_RTS const StgInfoTable i +# define RTS_CLOSURE(i) extern DLL_IMPORT_RTS StgClosure i +# define RTS_FUN_DECL(f) extern DLL_IMPORT_RTS StgFunPtr f(void) +#endif + +#if defined(TABLES_NEXT_TO_CODE) +# define RTS_RET(f) RTS_INFO(f##_info) +# define RTS_ENTRY(f) RTS_INFO(f##_info) +# define RTS_FUN(f) RTS_FUN_INFO(f##_info) +# define RTS_THUNK(f) RTS_THUNK_INFO(f##_info) +#else +# define RTS_RET(f) RTS_INFO(f##_info); RTS_FUN_DECL(f##_ret) +# define RTS_ENTRY(f) RTS_INFO(f##_info); RTS_FUN_DECL(f##_entry) +# define RTS_FUN(f) RTS_FUN_INFO(f##_info); RTS_FUN_DECL(f##_entry) +# define RTS_THUNK(f) RTS_THUNK_INFO(f##_info); RTS_FUN_DECL(f##_entry) +#endif + +/* Stack frames */ +RTS_RET(stg_upd_frame); +RTS_RET(stg_bh_upd_frame); +RTS_RET(stg_marked_upd_frame); +RTS_RET(stg_noupd_frame); +RTS_RET(stg_catch_frame); +RTS_RET(stg_catch_retry_frame); +RTS_RET(stg_atomically_frame); +RTS_RET(stg_atomically_waiting_frame); +RTS_RET(stg_catch_stm_frame); +RTS_RET(stg_unmaskAsyncExceptionszh_ret); +RTS_RET(stg_maskUninterruptiblezh_ret); +RTS_RET(stg_maskAsyncExceptionszh_ret); +RTS_RET(stg_stack_underflow_frame); +RTS_RET(stg_restore_cccs); +RTS_RET(stg_restore_cccs_eval); + +// RTS_FUN(stg_interp_constr1_entry); +// RTS_FUN(stg_interp_constr2_entry); +// RTS_FUN(stg_interp_constr3_entry); +// RTS_FUN(stg_interp_constr4_entry); +// RTS_FUN(stg_interp_constr5_entry); +// RTS_FUN(stg_interp_constr6_entry); +// RTS_FUN(stg_interp_constr7_entry); +// +// This is referenced using the FFI in the compiler (GHC.ByteCode.InfoTable), +// so we can't give it the correct type here because the prototypes +// would clash (FFI references are always declared with type StgWord[] +// in the generated C code). + +/* Magic glue code for when compiled code returns a value in R1/F1/D1 + or a VoidRep to the interpreter. */ +RTS_RET(stg_ctoi_R1p); +RTS_RET(stg_ctoi_R1unpt); +RTS_RET(stg_ctoi_R1n); +RTS_RET(stg_ctoi_F1); +RTS_RET(stg_ctoi_D1); +RTS_RET(stg_ctoi_L1); +RTS_RET(stg_ctoi_V); + +RTS_FUN_DECL(stg_ctoi_t); +RTS_RET(stg_ctoi_t0); +RTS_RET(stg_ctoi_t1); +RTS_RET(stg_ctoi_t2); +RTS_RET(stg_ctoi_t3); +RTS_RET(stg_ctoi_t4); +RTS_RET(stg_ctoi_t5); +RTS_RET(stg_ctoi_t6); +RTS_RET(stg_ctoi_t7); +RTS_RET(stg_ctoi_t8); +RTS_RET(stg_ctoi_t9); + +RTS_RET(stg_ctoi_t10); +RTS_RET(stg_ctoi_t11); +RTS_RET(stg_ctoi_t12); +RTS_RET(stg_ctoi_t13); +RTS_RET(stg_ctoi_t14); +RTS_RET(stg_ctoi_t15); +RTS_RET(stg_ctoi_t16); +RTS_RET(stg_ctoi_t17); +RTS_RET(stg_ctoi_t18); +RTS_RET(stg_ctoi_t19); + +RTS_RET(stg_ctoi_t20); +RTS_RET(stg_ctoi_t21); +RTS_RET(stg_ctoi_t22); +RTS_RET(stg_ctoi_t23); +RTS_RET(stg_ctoi_t24); +RTS_RET(stg_ctoi_t25); +RTS_RET(stg_ctoi_t26); +RTS_RET(stg_ctoi_t27); +RTS_RET(stg_ctoi_t28); +RTS_RET(stg_ctoi_t29); + +RTS_RET(stg_ctoi_t30); +RTS_RET(stg_ctoi_t31); +RTS_RET(stg_ctoi_t32); +RTS_RET(stg_ctoi_t33); +RTS_RET(stg_ctoi_t34); +RTS_RET(stg_ctoi_t35); +RTS_RET(stg_ctoi_t36); +RTS_RET(stg_ctoi_t37); +RTS_RET(stg_ctoi_t38); +RTS_RET(stg_ctoi_t39); + +RTS_RET(stg_ctoi_t40); +RTS_RET(stg_ctoi_t41); +RTS_RET(stg_ctoi_t42); +RTS_RET(stg_ctoi_t43); +RTS_RET(stg_ctoi_t44); +RTS_RET(stg_ctoi_t45); +RTS_RET(stg_ctoi_t46); +RTS_RET(stg_ctoi_t47); +RTS_RET(stg_ctoi_t48); +RTS_RET(stg_ctoi_t49); + +RTS_RET(stg_ctoi_t50); +RTS_RET(stg_ctoi_t51); +RTS_RET(stg_ctoi_t52); +RTS_RET(stg_ctoi_t53); +RTS_RET(stg_ctoi_t54); +RTS_RET(stg_ctoi_t55); +RTS_RET(stg_ctoi_t56); +RTS_RET(stg_ctoi_t57); +RTS_RET(stg_ctoi_t58); +RTS_RET(stg_ctoi_t59); + +RTS_RET(stg_ctoi_t60); +RTS_RET(stg_ctoi_t61); +RTS_RET(stg_ctoi_t62); + +RTS_RET(stg_apply_interp); + +RTS_ENTRY(stg_IND); +RTS_ENTRY(stg_IND_STATIC); +RTS_ENTRY(stg_BLACKHOLE); +RTS_ENTRY(stg_CAF_BLACKHOLE); +RTS_ENTRY(__stg_EAGER_BLACKHOLE); +RTS_ENTRY(stg_WHITEHOLE); +RTS_ENTRY(stg_BLOCKING_QUEUE_CLEAN); +RTS_ENTRY(stg_BLOCKING_QUEUE_DIRTY); + +RTS_FUN(stg_BCO); +RTS_ENTRY(stg_EVACUATED); +RTS_ENTRY(stg_WEAK); +RTS_ENTRY(stg_DEAD_WEAK); +RTS_ENTRY(stg_C_FINALIZER_LIST); +RTS_ENTRY(stg_STABLE_NAME); +RTS_ENTRY(stg_MVAR_CLEAN); +RTS_ENTRY(stg_MVAR_DIRTY); +RTS_ENTRY(stg_TVAR_CLEAN); +RTS_ENTRY(stg_TVAR_DIRTY); +RTS_ENTRY(stg_TSO); +RTS_ENTRY(stg_STACK); +RTS_ENTRY(stg_RUBBISH_ENTRY); +RTS_ENTRY(stg_ARR_WORDS); +RTS_ENTRY(stg_MUT_ARR_WORDS); +RTS_ENTRY(stg_MUT_ARR_PTRS_CLEAN); +RTS_ENTRY(stg_MUT_ARR_PTRS_DIRTY); +RTS_ENTRY(stg_MUT_ARR_PTRS_FROZEN_CLEAN); +RTS_ENTRY(stg_MUT_ARR_PTRS_FROZEN_DIRTY); +RTS_ENTRY(stg_SMALL_MUT_ARR_PTRS_CLEAN); +RTS_ENTRY(stg_SMALL_MUT_ARR_PTRS_DIRTY); +RTS_ENTRY(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN); +RTS_ENTRY(stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY); +RTS_ENTRY(stg_MUT_VAR_CLEAN); +RTS_ENTRY(stg_MUT_VAR_DIRTY); +RTS_ENTRY(stg_END_TSO_QUEUE); +RTS_ENTRY(stg_GCD_CAF); +RTS_ENTRY(stg_STM_AWOKEN); +RTS_ENTRY(stg_MSG_TRY_WAKEUP); +RTS_ENTRY(stg_MSG_THROWTO); +RTS_ENTRY(stg_MSG_BLACKHOLE); +RTS_ENTRY(stg_MSG_NULL); +RTS_ENTRY(stg_MVAR_TSO_QUEUE); +RTS_ENTRY(stg_catch); +RTS_ENTRY(stg_PAP); +RTS_ENTRY(stg_AP); +RTS_ENTRY(stg_AP_NOUPD); +RTS_ENTRY(stg_AP_STACK); +RTS_ENTRY(stg_AP_STACK_NOUPD); +RTS_ENTRY(stg_dummy_ret); +RTS_ENTRY(stg_raise); +RTS_ENTRY(stg_raise_ret); +RTS_ENTRY(stg_atomically); +RTS_ENTRY(stg_TVAR_WATCH_QUEUE); +RTS_ENTRY(stg_TREC_CHUNK); +RTS_ENTRY(stg_TREC_HEADER); +RTS_ENTRY(stg_END_STM_WATCH_QUEUE); +RTS_ENTRY(stg_END_STM_CHUNK_LIST); +RTS_ENTRY(stg_NO_TREC); +RTS_ENTRY(stg_COMPACT_NFDATA_CLEAN); +RTS_ENTRY(stg_COMPACT_NFDATA_DIRTY); +RTS_ENTRY(stg_SRT_1); +RTS_ENTRY(stg_SRT_2); +RTS_ENTRY(stg_SRT_3); +RTS_ENTRY(stg_SRT_4); +RTS_ENTRY(stg_SRT_5); +RTS_ENTRY(stg_SRT_6); +RTS_ENTRY(stg_SRT_7); +RTS_ENTRY(stg_SRT_8); +RTS_ENTRY(stg_SRT_9); +RTS_ENTRY(stg_SRT_10); +RTS_ENTRY(stg_SRT_11); +RTS_ENTRY(stg_SRT_12); +RTS_ENTRY(stg_SRT_13); +RTS_ENTRY(stg_SRT_14); +RTS_ENTRY(stg_SRT_15); +RTS_ENTRY(stg_SRT_16); + +/* closures */ + +RTS_CLOSURE(stg_END_TSO_QUEUE_closure); +RTS_CLOSURE(stg_STM_AWOKEN_closure); +RTS_CLOSURE(stg_NO_FINALIZER_closure); +RTS_CLOSURE(stg_dummy_ret_closure); +RTS_CLOSURE(stg_forceIO_closure); + +RTS_CLOSURE(stg_END_STM_WATCH_QUEUE_closure); +RTS_CLOSURE(stg_END_STM_CHUNK_LIST_closure); +RTS_CLOSURE(stg_NO_TREC_closure); + +RTS_ENTRY(stg_NO_FINALIZER); + +#if IN_STG_CODE +extern DLL_IMPORT_RTS StgWordArray stg_CHARLIKE_closure; +extern DLL_IMPORT_RTS StgWordArray stg_INTLIKE_closure; +#else +extern DLL_IMPORT_RTS StgIntCharlikeClosure stg_CHARLIKE_closure[]; +extern DLL_IMPORT_RTS StgIntCharlikeClosure stg_INTLIKE_closure[]; +#endif + +/* StgStartup */ + +RTS_RET(stg_forceIO); +RTS_RET(stg_noforceIO); + +/* standard entry points */ + +/* standard selector thunks */ + +RTS_ENTRY(stg_sel_0_upd); +RTS_ENTRY(stg_sel_1_upd); +RTS_ENTRY(stg_sel_2_upd); +RTS_ENTRY(stg_sel_3_upd); +RTS_ENTRY(stg_sel_4_upd); +RTS_ENTRY(stg_sel_5_upd); +RTS_ENTRY(stg_sel_6_upd); +RTS_ENTRY(stg_sel_7_upd); +RTS_ENTRY(stg_sel_8_upd); +RTS_ENTRY(stg_sel_9_upd); +RTS_ENTRY(stg_sel_10_upd); +RTS_ENTRY(stg_sel_11_upd); +RTS_ENTRY(stg_sel_12_upd); +RTS_ENTRY(stg_sel_13_upd); +RTS_ENTRY(stg_sel_14_upd); +RTS_ENTRY(stg_sel_15_upd); + +RTS_ENTRY(stg_sel_0_noupd); +RTS_ENTRY(stg_sel_1_noupd); +RTS_ENTRY(stg_sel_2_noupd); +RTS_ENTRY(stg_sel_3_noupd); +RTS_ENTRY(stg_sel_4_noupd); +RTS_ENTRY(stg_sel_5_noupd); +RTS_ENTRY(stg_sel_6_noupd); +RTS_ENTRY(stg_sel_7_noupd); +RTS_ENTRY(stg_sel_8_noupd); +RTS_ENTRY(stg_sel_9_noupd); +RTS_ENTRY(stg_sel_10_noupd); +RTS_ENTRY(stg_sel_11_noupd); +RTS_ENTRY(stg_sel_12_noupd); +RTS_ENTRY(stg_sel_13_noupd); +RTS_ENTRY(stg_sel_14_noupd); +RTS_ENTRY(stg_sel_15_noupd); + +/* standard ap thunks */ + +RTS_THUNK(stg_ap_1_upd); +RTS_THUNK(stg_ap_2_upd); +RTS_THUNK(stg_ap_3_upd); +RTS_THUNK(stg_ap_4_upd); +RTS_THUNK(stg_ap_5_upd); +RTS_THUNK(stg_ap_6_upd); +RTS_THUNK(stg_ap_7_upd); + +/* standard application routines (see also utils/genapply, + * and GHC.StgToCmm.ArgRep). + */ +RTS_RET(stg_ap_v); +RTS_RET(stg_ap_f); +RTS_RET(stg_ap_d); +RTS_RET(stg_ap_l); +RTS_RET(stg_ap_v16); +RTS_RET(stg_ap_v32); +RTS_RET(stg_ap_v64); +RTS_RET(stg_ap_n); +RTS_RET(stg_ap_p); +RTS_RET(stg_ap_pv); +RTS_RET(stg_ap_pp); +RTS_RET(stg_ap_ppv); +RTS_RET(stg_ap_ppp); +RTS_RET(stg_ap_pppv); +RTS_RET(stg_ap_pppp); +RTS_RET(stg_ap_ppppp); +RTS_RET(stg_ap_pppppp); + +RTS_FUN_DECL(stg_ap_0_fast); +RTS_FUN_DECL(stg_ap_v_fast); +RTS_FUN_DECL(stg_ap_f_fast); +RTS_FUN_DECL(stg_ap_d_fast); +RTS_FUN_DECL(stg_ap_l_fast); +RTS_FUN_DECL(stg_ap_v16_fast); +RTS_FUN_DECL(stg_ap_v32_fast); +RTS_FUN_DECL(stg_ap_v64_fast); +RTS_FUN_DECL(stg_ap_n_fast); +RTS_FUN_DECL(stg_ap_p_fast); +RTS_FUN_DECL(stg_ap_pv_fast); +RTS_FUN_DECL(stg_ap_pp_fast); +RTS_FUN_DECL(stg_ap_ppv_fast); +RTS_FUN_DECL(stg_ap_ppp_fast); +RTS_FUN_DECL(stg_ap_pppv_fast); +RTS_FUN_DECL(stg_ap_pppp_fast); +RTS_FUN_DECL(stg_ap_ppppp_fast); +RTS_FUN_DECL(stg_ap_pppppp_fast); +RTS_FUN_DECL(stg_PAP_apply); + +/* standard GC & stack check entry points, all defined in HeapStackCheck.cmm */ + +RTS_FUN_DECL(stg_gc_noregs); + +RTS_RET(stg_ret_v); +RTS_RET(stg_ret_p); +RTS_RET(stg_ret_n); +RTS_RET(stg_ret_f); +RTS_RET(stg_ret_d); +RTS_RET(stg_ret_l); +RTS_RET(stg_ret_t); + +RTS_FUN_DECL(stg_gc_prim); +RTS_FUN_DECL(stg_gc_prim_p); +RTS_FUN_DECL(stg_gc_prim_pp); +RTS_FUN_DECL(stg_gc_prim_n); + +RTS_RET(stg_gc_prim_p_ll_ret); +RTS_FUN_DECL(stg_gc_prim_p_ll); + +RTS_RET(stg_enter); +RTS_FUN_DECL(__stg_gc_enter_1); + +RTS_FUN_DECL(stg_gc_unpt_r1); +RTS_FUN_DECL(stg_gc_unbx_r1); +RTS_FUN_DECL(stg_gc_f1); +RTS_FUN_DECL(stg_gc_d1); +RTS_FUN_DECL(stg_gc_l1); +RTS_FUN_DECL(stg_gc_pp); +RTS_FUN_DECL(stg_gc_ppp); +RTS_FUN_DECL(stg_gc_pppp); + +RTS_RET(stg_gc_fun); +RTS_FUN_DECL(__stg_gc_fun); + +RTS_FUN_DECL(stg_yield_noregs); +RTS_FUN_DECL(stg_yield_to_interpreter); +RTS_FUN_DECL(stg_block_noregs); +RTS_FUN_DECL(stg_block_blackhole); +RTS_FUN_DECL(stg_block_blackhole_finally); +RTS_FUN_DECL(stg_block_takemvar); +RTS_FUN_DECL(stg_block_readmvar); +RTS_RET(stg_block_takemvar); +RTS_RET(stg_block_readmvar); +RTS_FUN_DECL(stg_block_putmvar); +RTS_RET(stg_block_putmvar); +#if defined(mingw32_HOST_OS) +RTS_FUN_DECL(stg_block_async); +RTS_RET(stg_block_async); +RTS_FUN_DECL(stg_block_async_void); +RTS_RET(stg_block_async_void); +#endif +RTS_FUN_DECL(stg_block_stmwait); +RTS_FUN_DECL(stg_block_throwto); +RTS_RET(stg_block_throwto); + +RTS_FUN_DECL(stg_readIOPortzh); +RTS_FUN_DECL(stg_writeIOPortzh); +RTS_FUN_DECL(stg_newIOPortzh); + +/* Entry/exit points from StgStartup.cmm */ + +RTS_RET(stg_stop_thread); + +RTS_FUN_DECL(stg_returnToStackTop); +RTS_FUN_DECL(stg_returnToSched); +RTS_FUN_DECL(stg_returnToSchedNotPaused); +RTS_FUN_DECL(stg_returnToSchedButFirst); +RTS_FUN_DECL(stg_threadFinished); + +RTS_FUN_DECL(StgReturn); + +/* ----------------------------------------------------------------------------- + PrimOps + -------------------------------------------------------------------------- */ + +RTS_FUN_DECL(stg_decodeFloatzuIntzh); +RTS_FUN_DECL(stg_decodeDoublezu2Intzh); +RTS_FUN_DECL(stg_decodeDoublezuInt64zh); + +RTS_FUN_DECL(stg_unsafeThawArrayzh); +RTS_FUN_DECL(stg_casArrayzh); +RTS_FUN_DECL(stg_newByteArrayzh); +RTS_FUN_DECL(stg_newPinnedByteArrayzh); +RTS_FUN_DECL(stg_newAlignedPinnedByteArrayzh); +RTS_FUN_DECL(stg_isByteArrayPinnedzh); +RTS_FUN_DECL(stg_isMutableByteArrayPinnedzh); +RTS_FUN_DECL(stg_shrinkMutableByteArrayzh); +RTS_FUN_DECL(stg_resizzeMutableByteArrayzh); +RTS_FUN_DECL(stg_shrinkSmallMutableArrayzh); +RTS_FUN_DECL(stg_casIntArrayzh); +RTS_FUN_DECL(stg_casInt8Arrayzh); +RTS_FUN_DECL(stg_casInt16Arrayzh); +RTS_FUN_DECL(stg_casInt32Arrayzh); +RTS_FUN_DECL(stg_casInt64Arrayzh); +RTS_FUN_DECL(stg_newArrayzh); +RTS_FUN_DECL(stg_newArrayArrayzh); +RTS_FUN_DECL(stg_copyArrayzh); +RTS_FUN_DECL(stg_copyMutableArrayzh); +RTS_FUN_DECL(stg_copyArrayArrayzh); +RTS_FUN_DECL(stg_copyMutableArrayArrayzh); +RTS_FUN_DECL(stg_cloneArrayzh); +RTS_FUN_DECL(stg_cloneMutableArrayzh); +RTS_FUN_DECL(stg_freezzeArrayzh); +RTS_FUN_DECL(stg_thawArrayzh); + +RTS_FUN_DECL(stg_newSmallArrayzh); +RTS_FUN_DECL(stg_unsafeThawSmallArrayzh); +RTS_FUN_DECL(stg_cloneSmallArrayzh); +RTS_FUN_DECL(stg_cloneSmallMutableArrayzh); +RTS_FUN_DECL(stg_freezzeSmallArrayzh); +RTS_FUN_DECL(stg_thawSmallArrayzh); +RTS_FUN_DECL(stg_copySmallArrayzh); +RTS_FUN_DECL(stg_copySmallMutableArrayzh); +RTS_FUN_DECL(stg_casSmallArrayzh); + +RTS_FUN_DECL(stg_newMutVarzh); +RTS_FUN_DECL(stg_atomicModifyMutVar2zh); +RTS_FUN_DECL(stg_atomicModifyMutVarzuzh); +RTS_FUN_DECL(stg_casMutVarzh); + +RTS_FUN_DECL(stg_isEmptyMVarzh); +RTS_FUN_DECL(stg_newMVarzh); +RTS_FUN_DECL(stg_takeMVarzh); +RTS_FUN_DECL(stg_putMVarzh); +RTS_FUN_DECL(stg_readMVarzh); +RTS_FUN_DECL(stg_tryTakeMVarzh); +RTS_FUN_DECL(stg_tryPutMVarzh); +RTS_FUN_DECL(stg_tryReadMVarzh); + +RTS_FUN_DECL(stg_waitReadzh); +RTS_FUN_DECL(stg_waitWritezh); +RTS_FUN_DECL(stg_delayzh); +#if defined(mingw32_HOST_OS) +RTS_FUN_DECL(stg_asyncReadzh); +RTS_FUN_DECL(stg_asyncWritezh); +RTS_FUN_DECL(stg_asyncDoProczh); +#endif + +RTS_FUN_DECL(stg_catchzh); +RTS_FUN_DECL(stg_raisezh); +RTS_FUN_DECL(stg_raiseDivZZerozh); +RTS_FUN_DECL(stg_raiseUnderflowzh); +RTS_FUN_DECL(stg_raiseOverflowzh); +RTS_FUN_DECL(stg_raiseIOzh); +RTS_FUN_DECL(stg_paniczh); +RTS_FUN_DECL(stg_absentErrorzh); + +RTS_FUN_DECL(stg_makeStableNamezh); +RTS_FUN_DECL(stg_makeStablePtrzh); +RTS_FUN_DECL(stg_deRefStablePtrzh); + +RTS_FUN_DECL(stg_compactAddzh); +RTS_FUN_DECL(stg_compactAddWithSharingzh); +RTS_FUN_DECL(stg_compactNewzh); +RTS_FUN_DECL(stg_compactAppendzh); +RTS_FUN_DECL(stg_compactResizzezh); +RTS_FUN_DECL(stg_compactGetRootzh); +RTS_FUN_DECL(stg_compactContainszh); +RTS_FUN_DECL(stg_compactContainsAnyzh); +RTS_FUN_DECL(stg_compactGetFirstBlockzh); +RTS_FUN_DECL(stg_compactGetNextBlockzh); +RTS_FUN_DECL(stg_compactAllocateBlockzh); +RTS_FUN_DECL(stg_compactFixupPointerszh); +RTS_FUN_DECL(stg_compactSizzezh); + +RTS_FUN_DECL(stg_forkzh); +RTS_FUN_DECL(stg_forkOnzh); +RTS_FUN_DECL(stg_yieldzh); +RTS_FUN_DECL(stg_killMyself); +RTS_FUN_DECL(stg_killThreadzh); +RTS_FUN_DECL(stg_getMaskingStatezh); +RTS_FUN_DECL(stg_maskAsyncExceptionszh); +RTS_FUN_DECL(stg_maskUninterruptiblezh); +RTS_FUN_DECL(stg_unmaskAsyncExceptionszh); +RTS_FUN_DECL(stg_myThreadIdzh); +RTS_FUN_DECL(stg_labelThreadzh); +RTS_FUN_DECL(stg_isCurrentThreadBoundzh); +RTS_FUN_DECL(stg_threadStatuszh); + +RTS_FUN_DECL(stg_mkWeakzh); +RTS_FUN_DECL(stg_mkWeakNoFinalizzerzh); +RTS_FUN_DECL(stg_mkWeakForeignzh); +RTS_FUN_DECL(stg_addCFinalizzerToWeakzh); +RTS_FUN_DECL(stg_finalizzeWeakzh); +RTS_FUN_DECL(stg_deRefWeakzh); + +RTS_FUN_DECL(stg_runRWzh); + +RTS_FUN_DECL(stg_newBCOzh); +RTS_FUN_DECL(stg_mkApUpd0zh); + +RTS_FUN_DECL(stg_retryzh); +RTS_FUN_DECL(stg_catchRetryzh); +RTS_FUN_DECL(stg_catchSTMzh); +RTS_FUN_DECL(stg_atomicallyzh); +RTS_FUN_DECL(stg_newTVarzh); +RTS_FUN_DECL(stg_readTVarzh); +RTS_FUN_DECL(stg_readTVarIOzh); +RTS_FUN_DECL(stg_writeTVarzh); + +RTS_FUN_DECL(stg_unpackClosurezh); +RTS_FUN_DECL(stg_closureSizzezh); +RTS_FUN_DECL(stg_whereFromzh); +RTS_FUN_DECL(stg_getApStackValzh); +RTS_FUN_DECL(stg_getSparkzh); +RTS_FUN_DECL(stg_numSparkszh); + +RTS_FUN_DECL(stg_noDuplicatezh); + +RTS_FUN_DECL(stg_traceCcszh); +RTS_FUN_DECL(stg_clearCCSzh); +RTS_FUN_DECL(stg_traceEventzh); +RTS_FUN_DECL(stg_traceBinaryEventzh); +RTS_FUN_DECL(stg_traceMarkerzh); +RTS_FUN_DECL(stg_getThreadAllocationCounterzh); +RTS_FUN_DECL(stg_setThreadAllocationCounterzh); + + +/* Other misc stuff */ +// See wiki:commentary/compiler/backends/ppr-c#prototypes + +#if IN_STG_CODE && !IN_STGCRUN + +// Interpreter.c +extern StgWord rts_stop_next_breakpoint[]; +extern StgWord rts_stop_on_exception[]; +extern StgWord rts_breakpoint_io_action[]; + +// Schedule.c +extern StgWord RTS_VAR(blocked_queue_hd), RTS_VAR(blocked_queue_tl); +extern StgWord RTS_VAR(sleeping_queue); +extern StgWord RTS_VAR(sched_mutex); + +// Apply.cmm +// canned bitmap for each arg type +extern const StgWord stg_arg_bitmaps[]; +extern const StgWord stg_ap_stack_entries[]; +extern const StgWord stg_stack_save_entries[]; + +// Storage.c +extern unsigned int RTS_VAR(g0); +extern unsigned int RTS_VAR(large_alloc_lim); +extern StgWord RTS_VAR(atomic_modify_mutvar_mutex); + +// RtsFlags +extern StgWord RTS_VAR(RtsFlags); // bogus type + +// StablePtr.c +extern StgWord RTS_VAR(stable_ptr_table); + +// StableName.c +extern StgWord RTS_VAR(stable_name_table); + +// Profiling.c +extern unsigned int RTS_VAR(era); +extern unsigned int RTS_VAR(entering_PAP); +extern StgWord CCS_OVERHEAD[]; +extern StgWord CCS_SYSTEM[]; + +// Calls to these rts functions are generated directly +// by codegen (see GHC.StgToCmm.Prof) +// and don't require (don't emit) forward declarations. +// +// In unregisterised mode (when building via .hc files) +// the calls are ordinary C calls. Functions must be in +// scope and must match prototype assumed by +// 'GHC.StgToCmm.Prof' +// as opposed to real prototype declared in +// 'rts/include/rts/prof/CCS.h' +void enterFunCCS (void *reg, void *ccsfn); +void * pushCostCentre (void *ccs, void *cc); + +// Capability.c +extern unsigned int n_capabilities; + +/* ----------------------------------------------------------------------------- + Nonmoving GC write barrier + -------------------------------------------------------------------------- */ + +#include <rts/NonMoving.h> + + +#endif diff --git a/rts/include/stg/Prim.h b/rts/include/stg/Prim.h new file mode 100644 index 0000000000..20b3d45bee --- /dev/null +++ b/rts/include/stg/Prim.h @@ -0,0 +1,139 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 2014-2014 + * + * Declarations for C fallback primitives implemented by 'ghc-prim' package. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +/* libraries/ghc-prim/cbits/atomic.c */ +StgWord hs_atomic_add8(StgWord x, StgWord val); +StgWord hs_atomic_add16(StgWord x, StgWord val); +StgWord hs_atomic_add32(StgWord x, StgWord val); +StgWord64 hs_atomic_add64(StgWord x, StgWord64 val); +StgWord hs_atomic_sub8(StgWord x, StgWord val); +StgWord hs_atomic_sub16(StgWord x, StgWord val); +StgWord hs_atomic_sub32(StgWord x, StgWord val); +StgWord64 hs_atomic_sub64(StgWord x, StgWord64 val); +StgWord hs_atomic_and8(StgWord x, StgWord val); +StgWord hs_atomic_and16(StgWord x, StgWord val); +StgWord hs_atomic_and32(StgWord x, StgWord val); +StgWord64 hs_atomic_and64(StgWord x, StgWord64 val); +StgWord hs_atomic_nand8(StgWord x, StgWord val); +StgWord hs_atomic_nand16(StgWord x, StgWord val); +StgWord hs_atomic_nand32(StgWord x, StgWord val); +StgWord64 hs_atomic_nand64(StgWord x, StgWord64 val); +StgWord hs_atomic_or8(StgWord x, StgWord val); +StgWord hs_atomic_or16(StgWord x, StgWord val); +StgWord hs_atomic_or32(StgWord x, StgWord val); +StgWord64 hs_atomic_or64(StgWord x, StgWord64 val); +StgWord hs_atomic_xor8(StgWord x, StgWord val); +StgWord hs_atomic_xor16(StgWord x, StgWord val); +StgWord hs_atomic_xor32(StgWord x, StgWord val); +StgWord64 hs_atomic_xor64(StgWord x, StgWord64 val); +StgWord hs_cmpxchg8(StgWord x, StgWord old, StgWord new_); +StgWord hs_cmpxchg16(StgWord x, StgWord old, StgWord new_); +StgWord hs_cmpxchg32(StgWord x, StgWord old, StgWord new_); +StgWord hs_cmpxchg64(StgWord x, StgWord64 old, StgWord64 new_); +StgWord hs_atomicread8(StgWord x); +StgWord hs_atomicread16(StgWord x); +StgWord hs_atomicread32(StgWord x); +StgWord64 hs_atomicread64(StgWord x); +void hs_atomicwrite8(StgWord x, StgWord val); +void hs_atomicwrite16(StgWord x, StgWord val); +void hs_atomicwrite32(StgWord x, StgWord val); +void hs_atomicwrite64(StgWord x, StgWord64 val); +StgWord hs_xchg8(StgWord x, StgWord val); +StgWord hs_xchg16(StgWord x, StgWord val); +StgWord hs_xchg32(StgWord x, StgWord val); +StgWord hs_xchg64(StgWord x, StgWord val); + +/* libraries/ghc-prim/cbits/bswap.c */ +StgWord16 hs_bswap16(StgWord16 x); +StgWord32 hs_bswap32(StgWord32 x); +StgWord64 hs_bswap64(StgWord64 x); + +/* libraries/ghc-prim/cbits/bitrev.c +This was done as part of issue #16164. +See Note [Bit reversal primop] for more details about the implementation.*/ +StgWord hs_bitrev8(StgWord x); +StgWord16 hs_bitrev16(StgWord16 x); +StgWord32 hs_bitrev32(StgWord32 x); +StgWord64 hs_bitrev64(StgWord64 x); + +/* libraries/ghc-prim/cbits/longlong.c */ +#if WORD_SIZE_IN_BITS < 64 +StgInt hs_eq64 (StgWord64 a, StgWord64 b); +StgInt hs_ne64 (StgWord64 a, StgWord64 b); +StgInt hs_gtWord64 (StgWord64 a, StgWord64 b); +StgInt hs_geWord64 (StgWord64 a, StgWord64 b); +StgInt hs_ltWord64 (StgWord64 a, StgWord64 b); +StgInt hs_leWord64 (StgWord64 a, StgWord64 b); +StgInt hs_gtInt64 (StgInt64 a, StgInt64 b); +StgInt hs_geInt64 (StgInt64 a, StgInt64 b); +StgInt hs_ltInt64 (StgInt64 a, StgInt64 b); +StgInt hs_leInt64 (StgInt64 a, StgInt64 b); +StgInt64 hs_neg64 (StgInt64 a); +StgWord64 hs_add64 (StgWord64 a, StgWord64 b); +StgWord64 hs_sub64 (StgWord64 a, StgWord64 b); +StgWord64 hs_mul64 (StgWord64 a, StgWord64 b); +StgWord64 hs_remWord64 (StgWord64 a, StgWord64 b); +StgWord64 hs_quotWord64 (StgWord64 a, StgWord64 b); +StgInt64 hs_remInt64 (StgInt64 a, StgInt64 b); +StgInt64 hs_quotInt64 (StgInt64 a, StgInt64 b); +StgWord64 hs_and64 (StgWord64 a, StgWord64 b); +StgWord64 hs_or64 (StgWord64 a, StgWord64 b); +StgWord64 hs_xor64 (StgWord64 a, StgWord64 b); +StgWord64 hs_not64 (StgWord64 a); +StgWord64 hs_uncheckedShiftL64 (StgWord64 a, StgInt b); +StgWord64 hs_uncheckedShiftRL64 (StgWord64 a, StgInt b); +StgInt64 hs_uncheckedIShiftRA64 (StgInt64 a, StgInt b); +StgInt64 hs_intToInt64 (StgInt i); +StgInt hs_int64ToInt (StgInt64 i); +StgWord64 hs_int64ToWord64 (StgInt64 i); +StgWord64 hs_wordToWord64 (StgWord w); +StgWord hs_word64ToWord (StgWord64 w); +StgInt64 hs_word64ToInt64 (StgWord64 w); +#endif + +/* libraries/ghc-prim/cbits/pdep.c */ +StgWord64 hs_pdep64(StgWord64 src, StgWord64 mask); +StgWord hs_pdep32(StgWord src, StgWord mask); +StgWord hs_pdep16(StgWord src, StgWord mask); +StgWord hs_pdep8(StgWord src, StgWord mask); + +/* libraries/ghc-prim/cbits/pext.c */ +StgWord64 hs_pext64(StgWord64 src, StgWord64 mask); +StgWord hs_pext32(StgWord src, StgWord mask); +StgWord hs_pext16(StgWord src, StgWord mask); +StgWord hs_pext8(StgWord src, StgWord mask); + +/* libraries/ghc-prim/cbits/popcnt.c */ +StgWord hs_popcnt8(StgWord x); +StgWord hs_popcnt16(StgWord x); +StgWord hs_popcnt32(StgWord x); +StgWord hs_popcnt64(StgWord64 x); +StgWord hs_popcnt(StgWord x); + +/* libraries/ghc-prim/cbits/word2float.c */ +StgFloat hs_word2float32(StgWord x); +StgDouble hs_word2float64(StgWord x); + +/* libraries/ghc-prim/cbits/clz.c */ +StgWord hs_clz8(StgWord x); +StgWord hs_clz16(StgWord x); +StgWord hs_clz32(StgWord x); +StgWord hs_clz64(StgWord64 x); + +/* libraries/ghc-prim/cbits/ctz.c */ +StgWord hs_ctz8(StgWord x); +StgWord hs_ctz16(StgWord x); +StgWord hs_ctz32(StgWord x); +StgWord hs_ctz64(StgWord64 x); diff --git a/rts/include/stg/Regs.h b/rts/include/stg/Regs.h new file mode 100644 index 0000000000..e37ce3eba4 --- /dev/null +++ b/rts/include/stg/Regs.h @@ -0,0 +1,528 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2012 + * + * Registers in the STG machine. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +/* + * The STG machine has a collection of "registers", each one of which + * may or may not correspond to an actual machine register when + * running code. + * + * The register set is backed by a table in memory (struct + * StgRegTable). If a particular STG register is not mapped to a + * machine register, then the appropriate slot in this table is used + * instead. + * + * This table is itself pointed to by another register, BaseReg. If + * BaseReg is not in a machine register, then the register table is + * used from an absolute location (MainCapability). + * + */ + +typedef struct { + StgWord stgEagerBlackholeInfo; + StgFunPtr stgGCEnter1; + StgFunPtr stgGCFun; +} StgFunTable; + +/* + * Vanilla registers are given this union type, which is purely so + * that we can cast the vanilla reg to a variety of types with the + * minimum of syntax. eg. R1.w instead of (StgWord)R1. + */ +typedef union { + StgWord w; + StgAddr a; + StgChar c; + StgFloat f; + StgInt i; + StgPtr p; +} StgUnion; + +/* + * This is the table that holds shadow-locations for all the STG + * registers. The shadow locations are used when: + * + * 1) the particular register isn't mapped to a real machine + * register, probably because there's a shortage of real registers. + * 2) caller-saves registers are saved across a CCall + */ +typedef struct { + StgUnion rR1; + StgUnion rR2; + StgUnion rR3; + StgUnion rR4; + StgUnion rR5; + StgUnion rR6; + StgUnion rR7; + StgUnion rR8; + StgUnion rR9; /* used occasionally by heap/stack checks */ + StgUnion rR10; /* used occasionally by heap/stack checks */ + StgFloat rF1; + StgFloat rF2; + StgFloat rF3; + StgFloat rF4; + StgFloat rF5; + StgFloat rF6; + StgDouble rD1; + StgDouble rD2; + StgDouble rD3; + StgDouble rD4; + StgDouble rD5; + StgDouble rD6; + StgWord128 rXMM1; + StgWord128 rXMM2; + StgWord128 rXMM3; + StgWord128 rXMM4; + StgWord128 rXMM5; + StgWord128 rXMM6; + StgWord256 rYMM1; + StgWord256 rYMM2; + StgWord256 rYMM3; + StgWord256 rYMM4; + StgWord256 rYMM5; + StgWord256 rYMM6; + StgWord512 rZMM1; + StgWord512 rZMM2; + StgWord512 rZMM3; + StgWord512 rZMM4; + StgWord512 rZMM5; + StgWord512 rZMM6; + StgWord64 rL1; + StgPtr rSp; + StgPtr rSpLim; + StgPtr rHp; + StgPtr rHpLim; + struct CostCentreStack_ * rCCCS; /* current cost-centre-stack */ + struct StgTSO_ * rCurrentTSO; + struct nursery_ * rNursery; + struct bdescr_ * rCurrentNursery; /* Hp/HpLim point into this block */ + struct bdescr_ * rCurrentAlloc; /* for allocation using allocate() */ + StgWord rHpAlloc; /* number of *bytes* being allocated in heap */ + StgWord rRet; /* holds the return code of the thread */ +} StgRegTable; + +#if IN_STG_CODE + +/* + * Registers Hp and HpLim are global across the entire system, and are + * copied into the RegTable or registers before executing a thread. + * + * Registers Sp and SpLim are saved in the TSO for the thread, but are + * copied into the RegTable or registers before executing a thread. + * + * All other registers are "general purpose", and are used for passing + * arguments to functions, and returning values. The code generator + * knows how many of these are in real registers, and avoids + * generating code that uses non-real registers. General purpose + * registers are never saved when returning to the scheduler, instead + * we save whatever is live at the time on the stack, and restore it + * later. This should reduce the context switch time, amongst other + * things. + * + * For argument passing, the stack will be used in preference to + * pseudo-registers if the architecture has too few general purpose + * registers. + * + * Some special RTS functions like newArray and the Integer primitives + * expect their arguments to be in registers R1-Rn, so we use these + * (pseudo-)registers in those cases. + */ + +/* ----------------------------------------------------------------------------- + * Emit the GCC-specific register declarations for each machine + * register being used. If any STG register isn't mapped to a machine + * register, then map it to an offset from BaseReg. + * + * First, the general purpose registers. The idea is, if a particular + * general-purpose STG register can't be mapped to a real machine + * register, it won't be used at all. Instead, we'll use the stack. + */ + +/* define NO_REGS to omit register declarations - used in RTS C code + * that needs all the STG definitions but not the global register + * settings. + */ +#define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg); + +#if defined(REG_R1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R1,REG_R1) +#else +# define R1 (BaseReg->rR1) +#endif + +#if defined(REG_R2) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R2,REG_R2) +#else +# define R2 (BaseReg->rR2) +#endif + +#if defined(REG_R3) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R3,REG_R3) +#else +# define R3 (BaseReg->rR3) +#endif + +#if defined(REG_R4) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R4,REG_R4) +#else +# define R4 (BaseReg->rR4) +#endif + +#if defined(REG_R5) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R5,REG_R5) +#else +# define R5 (BaseReg->rR5) +#endif + +#if defined(REG_R6) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R6,REG_R6) +#else +# define R6 (BaseReg->rR6) +#endif + +#if defined(REG_R7) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R7,REG_R7) +#else +# define R7 (BaseReg->rR7) +#endif + +#if defined(REG_R8) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R8,REG_R8) +#else +# define R8 (BaseReg->rR8) +#endif + +#if defined(REG_R9) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R9,REG_R9) +#else +# define R9 (BaseReg->rR9) +#endif + +#if defined(REG_R10) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R10,REG_R10) +#else +# define R10 (BaseReg->rR10) +#endif + +#if defined(REG_F1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgFloat,F1,REG_F1) +#else +#define F1 (BaseReg->rF1) +#endif + +#if defined(REG_F2) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgFloat,F2,REG_F2) +#else +#define F2 (BaseReg->rF2) +#endif + +#if defined(REG_F3) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgFloat,F3,REG_F3) +#else +#define F3 (BaseReg->rF3) +#endif + +#if defined(REG_F4) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgFloat,F4,REG_F4) +#else +#define F4 (BaseReg->rF4) +#endif + +#if defined(REG_F5) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgFloat,F5,REG_F5) +#else +#define F5 (BaseReg->rF5) +#endif + +#if defined(REG_F6) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgFloat,F6,REG_F6) +#else +#define F6 (BaseReg->rF6) +#endif + +#if defined(REG_D1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgDouble,D1,REG_D1) +#else +#define D1 (BaseReg->rD1) +#endif + +#if defined(REG_D2) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgDouble,D2,REG_D2) +#else +#define D2 (BaseReg->rD2) +#endif + +#if defined(REG_D3) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgDouble,D3,REG_D3) +#else +#define D3 (BaseReg->rD3) +#endif + +#if defined(REG_D4) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgDouble,D4,REG_D4) +#else +#define D4 (BaseReg->rD4) +#endif + +#if defined(REG_D5) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgDouble,D5,REG_D5) +#else +#define D5 (BaseReg->rD5) +#endif + +#if defined(REG_D6) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgDouble,D6,REG_D6) +#else +#define D6 (BaseReg->rD6) +#endif + +#if defined(REG_XMM1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord128,XMM1,REG_XMM1) +#else +#define XMM1 (BaseReg->rXMM1) +#endif + +#if defined(REG_XMM2) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord128,XMM2,REG_XMM2) +#else +#define XMM2 (BaseReg->rXMM2) +#endif + +#if defined(REG_XMM3) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord128,XMM3,REG_XMM3) +#else +#define XMM3 (BaseReg->rXMM3) +#endif + +#if defined(REG_XMM4) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord128,XMM4,REG_XMM4) +#else +#define XMM4 (BaseReg->rXMM4) +#endif + +#if defined(REG_XMM5) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord128,XMM5,REG_XMM5) +#else +#define XMM5 (BaseReg->rXMM5) +#endif + +#if defined(REG_XMM6) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord128,XMM6,REG_XMM6) +#else +#define XMM6 (BaseReg->rXMM6) +#endif + +#if defined(REG_YMM1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM1,REG_YMM1) +#else +#define YMM1 (BaseReg->rYMM1) +#endif + +#if defined(REG_YMM2) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM2,REG_YMM2) +#else +#define YMM2 (BaseReg->rYMM2) +#endif + +#if defined(REG_YMM3) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM3,REG_YMM3) +#else +#define YMM3 (BaseReg->rYMM3) +#endif + +#if defined(REG_YMM4) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM4,REG_YMM4) +#else +#define YMM4 (BaseReg->rYMM4) +#endif + +#if defined(REG_YMM5) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM5,REG_YMM5) +#else +#define YMM5 (BaseReg->rYMM5) +#endif + +#if defined(REG_YMM6) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord256,YMM6,REG_YMM6) +#else +#define YMM6 (BaseReg->rYMM6) +#endif + +#if defined(REG_ZMM1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord512,ZMM1,REG_ZMM1) +#else +#define ZMM1 (BaseReg->rZMM1) +#endif + +#if defined(REG_ZMM2) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord512,ZMM2,REG_ZMM2) +#else +#define ZMM2 (BaseReg->rZMM2) +#endif + +#if defined(REG_ZMM3) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord512,ZMM3,REG_ZMM3) +#else +#define ZMM3 (BaseReg->rZMM3) +#endif + +#if defined(REG_ZMM4) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord512,ZMM4,REG_ZMM4) +#else +#define ZMM4 (BaseReg->rZMM4) +#endif + +#if defined(REG_ZMM5) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord512,ZMM5,REG_ZMM5) +#else +#define ZMM5 (BaseReg->rZMM5) +#endif + +#if defined(REG_ZMM6) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord512,ZMM6,REG_ZMM6) +#else +#define ZMM6 (BaseReg->rZMM6) +#endif + +#if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord64,L1,REG_L1) +#else +#define L1 (BaseReg->rL1) +#endif + +/* + * If BaseReg isn't mapped to a machine register, just use the global + * address of the current register table (CurrentRegTable in + * concurrent Haskell, MainRegTable otherwise). + */ + +/* A capability is a combination of a FunTable and a RegTable. In STG + * code, BaseReg normally points to the RegTable portion of this + * structure, so that we can index both forwards and backwards to take + * advantage of shorter instruction forms on some archs (eg. x86). + * This is a cut-down version of the Capability structure; the full + * version is defined in Capability.h. + */ +struct PartCapability_ { + StgFunTable f; + StgRegTable r; +}; + +/* No such thing as a MainCapability under THREADED_RTS - each thread must have + * its own Capability. + */ +#if IN_STG_CODE && !(defined(THREADED_RTS) && !defined(NOSMP)) +extern W_ MainCapability[]; +#endif + +/* + * Assigning to BaseReg (the ASSIGN_BaseReg macro): this happens on + * return from a "safe" foreign call, when the thread might be running + * on a new Capability. Obviously if BaseReg is not a register, then + * we are restricted to a single Capability (this invariant is enforced + * in Capability.c:initCapabilities), and assigning to BaseReg can be omitted. + */ + +#if defined(REG_Base) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgRegTable *,BaseReg,REG_Base) +#define ASSIGN_BaseReg(e) (BaseReg = (e)) +#else +#if defined(THREADED_RTS) && !defined(NOSMP) +#error BaseReg must be in a register for THREADED_RTS +#endif +#define BaseReg (&((struct PartCapability_ *)MainCapability)->r) +#define ASSIGN_BaseReg(e) (e) +#endif + +#if defined(REG_Sp) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(P_,Sp,REG_Sp) +#else +#define Sp (BaseReg->rSp) +#endif + +#if defined(REG_SpLim) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(P_,SpLim,REG_SpLim) +#else +#define SpLim (BaseReg->rSpLim) +#endif + +#if defined(REG_Hp) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(P_,Hp,REG_Hp) +#else +#define Hp (BaseReg->rHp) +#endif + +#if defined(REG_HpLim) && !defined(NO_GLOBAL_REG_DECLS) +#error HpLim cannot be in a register +#else +#define HpLim (BaseReg->rHpLim) +#endif + +#if defined(REG_CCCS) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(struct CostCentreStack_ *,CCCS,REG_CCCS) +#else +#define CCCS (BaseReg->rCCCS) +#endif + +#if defined(REG_CurrentTSO) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(struct _StgTSO *,CurrentTSO,REG_CurrentTSO) +#else +#define CurrentTSO (BaseReg->rCurrentTSO) +#endif + +#if defined(REG_CurrentNursery) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(bdescr *,CurrentNursery,REG_CurrentNursery) +#else +#define CurrentNursery (BaseReg->rCurrentNursery) +#endif + +#if defined(REG_HpAlloc) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc) +#else +#define HpAlloc (BaseReg->rHpAlloc) +#endif + +/* ----------------------------------------------------------------------------- + Get absolute function pointers from the register table, to save + code space. On x86, + + jmp *-12(%ebx) + + is shorter than + + jmp absolute_address + + as long as the offset is within the range of a signed byte + (-128..+127). So we pick some common absolute_addresses and put + them in the register table. As a bonus, linking time should also + be reduced. + + Other possible candidates in order of importance: + + stg_upd_frame_info + stg_CAF_BLACKHOLE_info + stg_IND_STATIC_info + + anything else probably isn't worth the effort. + + -------------------------------------------------------------------------- */ + + +#define FunReg ((StgFunTable *)((void *)BaseReg - STG_FIELD_OFFSET(struct PartCapability_, r))) + +#define stg_EAGER_BLACKHOLE_info (FunReg->stgEagerBlackholeInfo) +#define stg_gc_enter_1 (FunReg->stgGCEnter1) +#define stg_gc_fun (FunReg->stgGCFun) + +#endif /* IN_STG_CODE */ diff --git a/rts/include/stg/SMP.h b/rts/include/stg/SMP.h new file mode 100644 index 0000000000..de706da290 --- /dev/null +++ b/rts/include/stg/SMP.h @@ -0,0 +1,579 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 2005-2011 + * + * Macros for multi-CPU support + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +#if defined(arm_HOST_ARCH) && defined(arm_HOST_ARCH_PRE_ARMv6) +void arm_atomic_spin_lock(void); +void arm_atomic_spin_unlock(void); +#endif + +#if defined(THREADED_RTS) + +/* ---------------------------------------------------------------------------- + Atomic operations + ------------------------------------------------------------------------- */ + +#if !IN_STG_CODE || IN_STGCRUN +// We only want the barriers, e.g. write_barrier(), declared in .hc +// files. Defining the other inline functions here causes type +// mismatch errors from gcc, because the generated C code is assuming +// that there are no prototypes in scope. + +/* + * The atomic exchange operation: xchg(p,w) exchanges the value + * pointed to by p with the value w, returning the old value. + * + * Used for locking closures during updates (see lockClosure() + * in rts/include/rts/storage/SMPClosureOps.h) and the MVar primops. + */ +EXTERN_INLINE StgWord xchg(StgPtr p, StgWord w); + +/* + * Compare-and-swap. Atomically does this: + * + * cas(p,o,n) { + * r = *p; + * if (r == o) { *p = n }; + * return r; + * } + */ +EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n); +EXTERN_INLINE StgWord8 cas_word8(StgWord8 *volatile p, StgWord8 o, StgWord8 n); + +/* + * Compare-and-swap + * this uses a seq_cst success memory order and a relaxed failure memory order + */ +EXTERN_INLINE StgWord cas_seq_cst_relaxed(StgVolatilePtr p, StgWord o, StgWord n); + + +/* + * Atomic addition by the provided quantity + * + * atomic_inc(p, n) { + * return ((*p) += n); + * } + */ +EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p, StgWord n); + + +/* + * Atomic decrement + * + * atomic_dec(p) { + * return --(*p); + * } + */ +EXTERN_INLINE StgWord atomic_dec(StgVolatilePtr p); + +/* + * Busy-wait nop: this is a hint to the CPU that we are currently in a + * busy-wait loop waiting for another CPU to change something. On a + * hypertreaded CPU it should yield to another thread, for example. + */ +EXTERN_INLINE void busy_wait_nop(void); + +#endif // !IN_STG_CODE + +/* + * Various kinds of memory barrier. + * write_barrier: prevents future stores occurring before preceding stores. + * store_load_barrier: prevents future loads occurring before preceding stores. + * load_load_barrier: prevents future loads occurring before earlier loads. + * + * Reference for these: "The JSR-133 Cookbook for Compiler Writers" + * http://gee.cs.oswego.edu/dl/jmm/cookbook.html + * + * To check whether you got these right, try the test in + * testsuite/tests/rts/testwsdeque.c + * This tests the work-stealing deque implementation, which relies on + * properly working store_load and load_load memory barriers. + */ +EXTERN_INLINE void write_barrier(void); +EXTERN_INLINE void store_load_barrier(void); +EXTERN_INLINE void load_load_barrier(void); + +/* + * Note [Heap memory barriers] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Machines with weak memory ordering semantics have consequences for how + * closures are observed and mutated. For example, consider a thunk that needs + * to be updated to an indirection. In order for the indirection to be safe for + * concurrent observers to enter, said observers must read the indirection's + * info table before they read the indirectee. Furthermore, the indirectee must + * be set before the info table pointer. This ensures that if the observer sees + * an IND info table then the indirectee is valid. + * + * When a closure is updated with an indirection, both its info table and its + * indirectee must be written. With weak memory ordering, these two writes can + * be arbitrarily reordered, and perhaps even interleaved with other threads' + * reads and writes (in the absence of memory barrier instructions). Consider + * this example of a bad reordering: + * + * - An updater writes to a closure's info table (INFO_TYPE is now IND). + * - A concurrent observer branches upon reading the closure's INFO_TYPE as IND. + * - A concurrent observer reads the closure's indirectee and enters it. + * - An updater writes the closure's indirectee. + * + * Here the update to the indirectee comes too late and the concurrent observer + * has jumped off into the abyss. Speculative execution can also cause us + * issues, consider: + * + * - an observer is about to case on a value in closure's info table. + * - the observer speculatively reads one or more of closure's fields. + * - an updater writes to closure's info table. + * - the observer takes a branch based on the new info table value, but with the + * old closure fields! + * - the updater writes to the closure's other fields, but its too late. + * + * Because of these effects, reads and writes to a closure's info table must be + * ordered carefully with respect to reads and writes to the closure's other + * fields, and memory barriers must be placed to ensure that reads and writes + * occur in program order. Specifically, updates to an already existing closure + * must follow the following pattern: + * + * - Update the closure's (non-info table) fields. + * - Write barrier. + * - Update the closure's info table. + * + * Observing the fields of an updateable closure (e.g. a THUNK) must follow the + * following pattern: + * + * - Read the closure's info pointer. + * - Read barrier. + * - Read the closure's (non-info table) fields. + * + * We must also take care when we expose a newly-allocated closure to other cores + * by writing a pointer to it to some shared data structure (e.g. an MVar#, a Message, + * or MutVar#). Specifically, we need to ensure that all writes constructing the + * closure are visible *before* the write exposing the new closure is made visible: + * + * - Allocate memory for the closure + * - Write the closure's info pointer and fields (ordering between this doesn't + * matter since the closure isn't yet visible to anyone else). + * - Write barrier + * - Make closure visible to other cores + * + * Note that thread stacks are inherently thread-local and consequently allocating an + * object and introducing a reference to it to our stack needs no barrier. + * + * There are several ways in which the mutator may make a newly-allocated + * closure visible to other cores: + * + * - Eager blackholing a THUNK: + * This is protected by an explicit write barrier in the eager blackholing + * code produced by the codegen. See GHC.StgToCmm.Bind.emitBlackHoleCode. + * + * - Lazy blackholing a THUNK: + * This is is protected by an explicit write barrier in the thread suspension + * code. See ThreadPaused.c:threadPaused. + * + * - Updating a BLACKHOLE: + * This case is protected by explicit write barriers in the update frame + * entry code (see rts/Updates.h). + * + * - Blocking on an MVar# (e.g. takeMVar#): + * In this case the appropriate MVar primops (e.g. stg_takeMVarzh). include + * explicit memory barriers to ensure that the newly-allocated + * MVAR_TSO_QUEUE is visible to other cores. + * + * - Write to an MVar# (e.g. putMVar#): + * This protected by the full barrier implied by the CAS in putMVar#. + * + * - Write to a TVar#: + * This is protected by the full barrier implied by the CAS in STM.c:lock_stm. + * + * - Write to an Array#, ArrayArray#, or SmallArray#: + * This case is protected by an explicit write barrier in the code produced + * for this primop by the codegen. See GHC.StgToCmm.Prim.doWritePtrArrayOp and + * GHC.StgToCmm.Prim.doWriteSmallPtrArrayOp. Relevant issue: #12469. + * + * - Write to MutVar# via writeMutVar#: + * This case is protected by an explicit write barrier in the code produced + * for this primop by the codegen. + * + * - Write to MutVar# via atomicModifyMutVar# or casMutVar#: + * This is protected by the full barrier implied by the cmpxchg operations + * in this primops. + * + * - Sending a Message to another capability: + * This is protected by the acquition and release of the target capability's + * lock in Messages.c:sendMessage. + * + * Finally, we must ensure that we flush all cores store buffers before + * entering and leaving GC, since stacks may be read by other cores. This + * happens as a side-effect of taking and release mutexes (which implies + * acquire and release barriers, respectively). + * + * N.B. recordClosureMutated places a reference to the mutated object on + * the capability-local mut_list. Consequently this does not require any memory + * barrier. + * + * During parallel GC we need to be careful during evacuation: before replacing + * a closure with a forwarding pointer we must commit a write barrier to ensure + * that the copy we made in to-space is visible to other cores. + * + * However, we can be a bit lax when *reading* during GC. Specifically, the GC + * can only make a very limited set of changes to existing closures: + * + * - it can replace a closure's info table with stg_WHITEHOLE. + * - it can replace a previously-whitehole'd closure's info table with a + * forwarding pointer + * - it can replace a previously-whitehole'd closure's info table with a + * valid info table pointer (done in eval_thunk_selector) + * - it can update the value of a pointer field after evacuating it + * + * This is quite nice since we don't need to worry about an interleaving + * of writes producing an invalid state: a closure's fields remain valid after + * an update of its info table pointer and vice-versa. + * + * After a round of parallel scavenging we must also ensure that any writes the + * GC thread workers made are visible to the main GC thread. This is ensured by + * the full barrier implied by the atomic decrement in + * GC.c:scavenge_until_all_done. + * + * The work-stealing queue (WSDeque) also requires barriers; these are + * documented in WSDeque.c. + * + */ + +/* ---------------------------------------------------------------------------- + Implementations + ------------------------------------------------------------------------- */ + +#if !IN_STG_CODE || IN_STGCRUN + +/* + * Exchange the value pointed to by p with w and return the former. This + * function is used to acquire a lock. An acquire memory barrier is sufficient + * for a lock operation because corresponding unlock operation issues a + * store-store barrier (write_barrier()) immediately before releasing the lock. + */ +EXTERN_INLINE StgWord +xchg(StgPtr p, StgWord w) +{ +#if defined(HAVE_C11_ATOMICS) + return __atomic_exchange_n(p, w, __ATOMIC_SEQ_CST); +#else + // When porting GHC to a new platform check that + // __sync_lock_test_and_set() actually stores w in *p. + // Use test rts/atomicxchg to verify that the correct value is stored. + // From the gcc manual: + // (https://gcc.gnu.org/onlinedocs/gcc-4.4.3/gcc/Atomic-Builtins.html) + // This built-in function, as described by Intel, is not + // a traditional test-and-set operation, but rather an atomic + // exchange operation. + // [...] + // Many targets have only minimal support for such locks, + // and do not support a full exchange operation. In this case, + // a target may support reduced functionality here by which the + // only valid value to store is the immediate constant 1. The + // exact value actually stored in *ptr is implementation defined. + return __sync_lock_test_and_set(p, w); +#endif +} + +/* + * CMPXCHG - the single-word atomic compare-and-exchange instruction. Used + * in the STM implementation. + */ +EXTERN_INLINE StgWord +cas(StgVolatilePtr p, StgWord o, StgWord n) +{ +#if defined(HAVE_C11_ATOMICS) + __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return o; +#else + return __sync_val_compare_and_swap(p, o, n); +#endif +} + +EXTERN_INLINE StgWord8 +cas_word8(StgWord8 *volatile p, StgWord8 o, StgWord8 n) +{ +#if defined(HAVE_C11_ATOMICS) + __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return o; +#else + return __sync_val_compare_and_swap(p, o, n); +#endif +} + +EXTERN_INLINE StgWord +cas_seq_cst_relaxed(StgVolatilePtr p, StgWord o, StgWord n) { +#if defined(HAVE_C11_ATOMICS) + __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); + return o; +#else + return __sync_val_compare_and_swap(p, o, n); +#endif + +} + +// RRN: Generalized to arbitrary increments to enable fetch-and-add in +// Haskell code (fetchAddIntArray#). +// PT: add-and-fetch, returns new value +EXTERN_INLINE StgWord +atomic_inc(StgVolatilePtr p, StgWord incr) +{ +#if defined(HAVE_C11_ATOMICS) + return __atomic_add_fetch(p, incr, __ATOMIC_SEQ_CST); +#else + return __sync_add_and_fetch(p, incr); +#endif +} + +EXTERN_INLINE StgWord +atomic_dec(StgVolatilePtr p) +{ +#if defined(HAVE_C11_ATOMICS) + return __atomic_sub_fetch(p, 1, __ATOMIC_SEQ_CST); +#else + return __sync_sub_and_fetch(p, (StgWord) 1); +#endif +} + +/* + * Some architectures have a way to tell the CPU that we're in a + * busy-wait loop, and the processor should look for something else to + * do (such as run another hardware thread). + */ +EXTERN_INLINE void +busy_wait_nop(void) +{ +#if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) + // On Intel, the busy-wait-nop instruction is called "pause", + // which is actually represented as a nop with the rep prefix. + // On processors before the P4 this behaves as a nop; on P4 and + // later it might do something clever like yield to another + // hyperthread. In any case, Intel recommends putting one + // of these in a spin lock loop. + __asm__ __volatile__ ("rep; nop"); +#else + // nothing +#endif +} + +#endif // !IN_STG_CODE + +/* + * We need to tell both the compiler AND the CPU about the barriers. + * It's no good preventing the CPU from reordering the operations if + * the compiler has already done so - hence the "memory" restriction + * on each of the barriers below. + */ +EXTERN_INLINE void +write_barrier(void) { +#if defined(NOSMP) + return; +#elif defined(TSAN_ENABLED) + // RELEASE is a bit stronger than the store-store barrier provided by + // write_barrier, consequently we only use this case as a conservative + // approximation when using ThreadSanitizer. See Note [ThreadSanitizer]. + __atomic_thread_fence(__ATOMIC_RELEASE); +#elif defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) + __asm__ __volatile__ ("" : : : "memory"); +#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \ + || defined(powerpc64le_HOST_ARCH) + __asm__ __volatile__ ("lwsync" : : : "memory"); +#elif defined(s390x_HOST_ARCH) + __asm__ __volatile__ ("" : : : "memory"); +#elif defined(sparc_HOST_ARCH) + /* Sparc in TSO mode does not require store/store barriers. */ + __asm__ __volatile__ ("" : : : "memory"); +#elif defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH) + __asm__ __volatile__ ("dmb st" : : : "memory"); +#elif defined(riscv64_HOST_ARCH) + __asm__ __volatile__ ("fence w,w" : : : "memory"); +#else +#error memory barriers unimplemented on this architecture +#endif +} + +EXTERN_INLINE void +store_load_barrier(void) { +#if defined(NOSMP) + return; +#elif defined(i386_HOST_ARCH) + __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory"); +#elif defined(x86_64_HOST_ARCH) + __asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory"); +#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \ + || defined(powerpc64le_HOST_ARCH) + __asm__ __volatile__ ("sync" : : : "memory"); +#elif defined(s390x_HOST_ARCH) + __asm__ __volatile__ ("bcr 14,0" : : : "memory"); +#elif defined(sparc_HOST_ARCH) + __asm__ __volatile__ ("membar #StoreLoad" : : : "memory"); +#elif defined(arm_HOST_ARCH) + __asm__ __volatile__ ("dmb" : : : "memory"); +#elif defined(aarch64_HOST_ARCH) + __asm__ __volatile__ ("dmb sy" : : : "memory"); +#elif defined(riscv64_HOST_ARCH) + __asm__ __volatile__ ("fence w,r" : : : "memory"); +#else +#error memory barriers unimplemented on this architecture +#endif +} + +EXTERN_INLINE void +load_load_barrier(void) { +#if defined(NOSMP) + return; +#elif defined(i386_HOST_ARCH) + __asm__ __volatile__ ("" : : : "memory"); +#elif defined(x86_64_HOST_ARCH) + __asm__ __volatile__ ("" : : : "memory"); +#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \ + || defined(powerpc64le_HOST_ARCH) + __asm__ __volatile__ ("lwsync" : : : "memory"); +#elif defined(s390x_HOST_ARCH) + __asm__ __volatile__ ("" : : : "memory"); +#elif defined(sparc_HOST_ARCH) + /* Sparc in TSO mode does not require load/load barriers. */ + __asm__ __volatile__ ("" : : : "memory"); +#elif defined(arm_HOST_ARCH) + __asm__ __volatile__ ("dmb" : : : "memory"); +#elif defined(aarch64_HOST_ARCH) + __asm__ __volatile__ ("dmb sy" : : : "memory"); +#elif defined(riscv64_HOST_ARCH) + __asm__ __volatile__ ("fence w,r" : : : "memory"); +#else +#error memory barriers unimplemented on this architecture +#endif +} + +// Load a pointer from a memory location that might be being modified +// concurrently. This prevents the compiler from optimising away +// multiple loads of the memory location, as it might otherwise do in +// a busy wait loop for example. +#define VOLATILE_LOAD(p) (*((StgVolatilePtr)(p))) + +// Relaxed atomic operations. +#define RELAXED_LOAD(ptr) __atomic_load_n(ptr, __ATOMIC_RELAXED) +#define RELAXED_STORE(ptr,val) __atomic_store_n(ptr, val, __ATOMIC_RELAXED) +#define RELAXED_ADD(ptr,val) __atomic_add_fetch(ptr, val, __ATOMIC_RELAXED) + +// Acquire/release atomic operations +#define ACQUIRE_LOAD(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE) +#define RELEASE_STORE(ptr,val) __atomic_store_n(ptr, val, __ATOMIC_RELEASE) + +// Sequentially consistent atomic operations +#define SEQ_CST_LOAD(ptr) __atomic_load_n(ptr, __ATOMIC_SEQ_CST) +#define SEQ_CST_STORE(ptr,val) __atomic_store_n(ptr, val, __ATOMIC_SEQ_CST) +#define SEQ_CST_ADD(ptr,val) __atomic_add_fetch(ptr, val, __ATOMIC_SEQ_CST) + +// Non-atomic addition for "approximate" counters that can be lossy +#define NONATOMIC_ADD(ptr,val) RELAXED_STORE(ptr, RELAXED_LOAD(ptr) + val) + +// compare-and-swap atomic operations +#define SEQ_CST_RELAXED_CAS(p,o,n) cas_seq_cst_relaxed(p,o,n) + +// Explicit fences +// +// These are typically necessary only in very specific cases (e.g. WSDeque) +// where the ordered operations aren't expressive enough to capture the desired +// ordering. +#define RELEASE_FENCE() __atomic_thread_fence(__ATOMIC_RELEASE) +#define SEQ_CST_FENCE() __atomic_thread_fence(__ATOMIC_SEQ_CST) + +/* ---------------------------------------------------------------------- */ +#else /* !THREADED_RTS */ + +EXTERN_INLINE void write_barrier(void); +EXTERN_INLINE void store_load_barrier(void); +EXTERN_INLINE void load_load_barrier(void); +EXTERN_INLINE void write_barrier () {} /* nothing */ +EXTERN_INLINE void store_load_barrier() {} /* nothing */ +EXTERN_INLINE void load_load_barrier () {} /* nothing */ + +// Relaxed atomic operations +#define RELAXED_LOAD(ptr) *ptr +#define RELAXED_STORE(ptr,val) *ptr = val +#define RELAXED_ADD(ptr,val) *ptr += val + +// Acquire/release atomic operations +#define ACQUIRE_LOAD(ptr) *ptr +#define RELEASE_STORE(ptr,val) *ptr = val + +// Sequentially consistent atomic operations +#define SEQ_CST_LOAD(ptr) *ptr +#define SEQ_CST_STORE(ptr,val) *ptr = val +#define SEQ_CST_ADD(ptr,val) *ptr += val + +// Non-atomic addition for "approximate" counters that can be lossy +#define NONATOMIC_ADD(ptr,val) *ptr += val + +// compare-and-swap atomic operations +#define SEQ_CST_RELAXED_CAS(p,o,n) cas(p,o,n) + +// Fences +#define RELEASE_FENCE() +#define SEQ_CST_FENCE() + +#if !IN_STG_CODE || IN_STGCRUN +INLINE_HEADER StgWord +xchg(StgPtr p, StgWord w) +{ + StgWord old = *p; + *p = w; + return old; +} + +EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n); +EXTERN_INLINE StgWord +cas(StgVolatilePtr p, StgWord o, StgWord n) +{ + StgWord result; + result = *p; + if (result == o) { + *p = n; + } + return result; +} + +EXTERN_INLINE StgWord8 cas_word8(StgWord8 *volatile p, StgWord8 o, StgWord8 n); +EXTERN_INLINE StgWord8 +cas_word8(StgWord8 *volatile p, StgWord8 o, StgWord8 n) +{ + StgWord8 result; + result = *p; + if (result == o) { + *p = n; + } + return result; +} + +EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p, StgWord incr); +EXTERN_INLINE StgWord +atomic_inc(StgVolatilePtr p, StgWord incr) +{ + return ((*p) += incr); +} + + +INLINE_HEADER StgWord +atomic_dec(StgVolatilePtr p) +{ + return --(*p); +} +#endif + +/* An alias for the C11 declspec */ +#define ATOMIC + +#define VOLATILE_LOAD(p) ((StgWord)*((StgWord*)(p))) + +#endif /* !THREADED_RTS */ diff --git a/rts/include/stg/Ticky.h b/rts/include/stg/Ticky.h new file mode 100644 index 0000000000..3d9d4aee4f --- /dev/null +++ b/rts/include/stg/Ticky.h @@ -0,0 +1,221 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2009 + * + * Declarations for counters used by ticky-ticky profiling. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * -------------------------------------------------------------------------- */ + +#pragma once + +/* These should probably be automatically generated in order to + keep them consistent with the macros that use them (which are + defined in Cmm.h) */ + +/* Here are all the counter declarations: */ +/* If you change this list, make the corresponding change + in RTS_TICKY_SYMBOLS in rts/RtsSymbols.c */ + +/* These two are explicitly declared in rts/Ticky.c, and + hence should not be extern'd except when using this header + file from STG code; hence IN_STG_CODE */ + +#if IN_STG_CODE +extern W_ ticky_entry_ctrs[]; +extern W_ top_ct[]; +#endif + +/* The rest are not explicitly declared in rts/Ticky.c. Instead + we use the same trick as in the former StgTicky.h: recycle the + same declarations for both extern decls (which are included everywhere) + and initializations (which only happen once) + TICKY_C is defined only in rts/Ticky.c */ +#if defined(TICKY_C) +#define INIT(ializer) = ializer +#define EXTERN +#else +#define INIT(ializer) +#define EXTERN extern +#endif + +EXTERN StgInt ENT_VIA_NODE_ctr INIT(0); +EXTERN StgInt ENT_STATIC_THK_SINGLE_ctr INIT(0); +EXTERN StgInt ENT_DYN_THK_SINGLE_ctr INIT(0); +EXTERN StgInt ENT_STATIC_THK_MANY_ctr INIT(0); +EXTERN StgInt ENT_DYN_THK_MANY_ctr INIT(0); +EXTERN StgInt ENT_STATIC_FUN_DIRECT_ctr INIT(0); +EXTERN StgInt ENT_DYN_FUN_DIRECT_ctr INIT(0); +EXTERN StgInt ENT_STATIC_CON_ctr INIT(0); +EXTERN StgInt ENT_DYN_CON_ctr INIT(0); +EXTERN StgInt ENT_STATIC_IND_ctr INIT(0); +EXTERN StgInt ENT_DYN_IND_ctr INIT(0); +EXTERN StgInt ENT_PERM_IND_ctr INIT(0); +EXTERN StgInt ENT_PAP_ctr INIT(0); +EXTERN StgInt ENT_AP_ctr INIT(0); +EXTERN StgInt ENT_AP_STACK_ctr INIT(0); +EXTERN StgInt ENT_BH_ctr INIT(0); +EXTERN StgInt ENT_LNE_ctr INIT(0); + +EXTERN StgInt UNKNOWN_CALL_ctr INIT(0); + +EXTERN StgInt SLOW_CALL_fast_v16_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_v_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_f_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_d_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_l_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_n_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_p_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_pv_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_pp_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_ppv_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_ppp_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_pppv_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_pppp_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_ppppp_ctr INIT(0); +EXTERN StgInt SLOW_CALL_fast_pppppp_ctr INIT(0); +EXTERN StgInt VERY_SLOW_CALL_ctr INIT(0); + +EXTERN StgInt ticky_slow_call_unevald; +EXTERN StgInt SLOW_CALL_ctr INIT(0); +EXTERN StgInt MULTI_CHUNK_SLOW_CALL_ctr INIT(0); +EXTERN StgInt MULTI_CHUNK_SLOW_CALL_CHUNKS_ctr INIT(0); +EXTERN StgInt KNOWN_CALL_ctr INIT(0); +EXTERN StgInt KNOWN_CALL_TOO_FEW_ARGS_ctr INIT(0); +EXTERN StgInt KNOWN_CALL_EXTRA_ARGS_ctr INIT(0); +EXTERN StgInt SLOW_CALL_FUN_TOO_FEW_ctr INIT(0); +EXTERN StgInt SLOW_CALL_FUN_CORRECT_ctr INIT(0); +EXTERN StgInt SLOW_CALL_FUN_TOO_MANY_ctr INIT(0); +EXTERN StgInt SLOW_CALL_PAP_TOO_FEW_ctr INIT(0); +EXTERN StgInt SLOW_CALL_PAP_CORRECT_ctr INIT(0); +EXTERN StgInt SLOW_CALL_PAP_TOO_MANY_ctr INIT(0); +EXTERN StgInt SLOW_CALL_UNEVALD_ctr INIT(0); + + +EXTERN StgInt UPDF_OMITTED_ctr INIT(0); +EXTERN StgInt UPDF_PUSHED_ctr INIT(0); +EXTERN StgInt CATCHF_PUSHED_ctr INIT(0); +EXTERN StgInt UPDF_RCC_PUSHED_ctr INIT(0); +EXTERN StgInt UPDF_RCC_OMITTED_ctr INIT(0); + +EXTERN StgInt UPD_SQUEEZED_ctr INIT(0); +EXTERN StgInt UPD_CON_IN_NEW_ctr INIT(0); +EXTERN StgInt UPD_CON_IN_PLACE_ctr INIT(0); +EXTERN StgInt UPD_PAP_IN_NEW_ctr INIT(0); +EXTERN StgInt UPD_PAP_IN_PLACE_ctr INIT(0); + +EXTERN StgInt ALLOC_HEAP_ctr INIT(0); +EXTERN StgInt ALLOC_HEAP_tot INIT(0); + +EXTERN StgInt HEAP_CHK_ctr INIT(0); +EXTERN StgInt STK_CHK_ctr INIT(0); + +EXTERN StgInt ALLOC_RTS_ctr INIT(0); +EXTERN StgInt ALLOC_RTS_tot INIT(0); + +EXTERN StgInt ALLOC_FUN_ctr INIT(0); +EXTERN StgInt ALLOC_FUN_adm INIT(0); +EXTERN StgInt ALLOC_FUN_gds INIT(0); +EXTERN StgInt ALLOC_FUN_slp INIT(0); + +EXTERN StgInt UPD_NEW_IND_ctr INIT(0); +EXTERN StgInt UPD_NEW_PERM_IND_ctr INIT(0); +EXTERN StgInt UPD_OLD_IND_ctr INIT(0); +EXTERN StgInt UPD_OLD_PERM_IND_ctr INIT(0); + +EXTERN StgInt UPD_BH_UPDATABLE_ctr INIT(0); +EXTERN StgInt UPD_CAF_BH_UPDATABLE_ctr INIT(0); +EXTERN StgInt UPD_CAF_BH_SINGLE_ENTRY_ctr INIT(0); + +EXTERN StgInt GC_SEL_ABANDONED_ctr INIT(0); +EXTERN StgInt GC_SEL_MINOR_ctr INIT(0); +EXTERN StgInt GC_SEL_MAJOR_ctr INIT(0); + +EXTERN StgInt GC_FAILED_PROMOTION_ctr INIT(0); + +EXTERN StgInt ALLOC_UP_THK_ctr INIT(0); +EXTERN StgInt ALLOC_SE_THK_ctr INIT(0); +EXTERN StgInt ALLOC_THK_adm INIT(0); +EXTERN StgInt ALLOC_THK_gds INIT(0); +EXTERN StgInt ALLOC_THK_slp INIT(0); + +EXTERN StgInt ALLOC_CON_ctr INIT(0); +EXTERN StgInt ALLOC_CON_adm INIT(0); +EXTERN StgInt ALLOC_CON_gds INIT(0); +EXTERN StgInt ALLOC_CON_slp INIT(0); + +EXTERN StgInt ALLOC_TUP_ctr INIT(0); +EXTERN StgInt ALLOC_TUP_adm INIT(0); +EXTERN StgInt ALLOC_TUP_gds INIT(0); +EXTERN StgInt ALLOC_TUP_slp INIT(0); + +EXTERN StgInt ALLOC_BH_ctr INIT(0); +EXTERN StgInt ALLOC_BH_adm INIT(0); +EXTERN StgInt ALLOC_BH_gds INIT(0); +EXTERN StgInt ALLOC_BH_slp INIT(0); + +EXTERN StgInt ALLOC_PRIM_ctr INIT(0); +EXTERN StgInt ALLOC_PRIM_adm INIT(0); +EXTERN StgInt ALLOC_PRIM_gds INIT(0); +EXTERN StgInt ALLOC_PRIM_slp INIT(0); + +EXTERN StgInt ALLOC_PAP_ctr INIT(0); +EXTERN StgInt ALLOC_PAP_adm INIT(0); +EXTERN StgInt ALLOC_PAP_gds INIT(0); +EXTERN StgInt ALLOC_PAP_slp INIT(0); + +EXTERN StgInt ALLOC_TSO_ctr INIT(0); +EXTERN StgInt ALLOC_TSO_adm INIT(0); +EXTERN StgInt ALLOC_TSO_gds INIT(0); +EXTERN StgInt ALLOC_TSO_slp INIT(0); + +EXTERN StgInt RET_NEW_ctr INIT(0); +EXTERN StgInt RET_OLD_ctr INIT(0); +EXTERN StgInt RET_UNBOXED_TUP_ctr INIT(0); + +EXTERN StgInt RET_SEMI_loads_avoided INIT(0); +/* End of counter declarations. */ + +/* How many bins in ticky's histograms */ +#define TICKY_BIN_COUNT 9 + +/* Histogram declarations */ +EXTERN StgInt RET_NEW_hst[TICKY_BIN_COUNT] INIT({0}); +EXTERN StgInt RET_OLD_hst[TICKY_BIN_COUNT] INIT({0}); +EXTERN StgInt RET_UNBOXED_TUP_hst[TICKY_BIN_COUNT] INIT({0}); +/* End of histogram declarations */ + +/* This is ugly, but the story is: + We got rid of StgTicky.h, which was previously + defining these macros for the benefit of C code + so, we define them here instead (to be no-ops). + (since those macros are only defined in Cmm.h) + + Note that these macros must be defined whether + TICKY_TICKY is defined or not. */ + +#if !defined(CMINUSMINUS) +#if defined(TICKY_TICKY) +#define TICK_BUMP_BY(ctr,n) ctr = (StgInt) ctr + n +#else +#define TICK_BUMP_BY(ctr,n) /* nothing */ +#endif + +#define TICK_BUMP(ctr) TICK_BUMP_BY(ctr,1) + +#define TICK_ALLOC_PRIM(x,y,z) // FIXME: update counter +#define TICK_UPD_OLD_IND() TICK_BUMP(UPD_OLD_IND_ctr) +#define TICK_UPD_NEW_IND() TICK_BUMP(UPD_NEW_IND_ctr) +#define TICK_UPD_SQUEEZED() TICK_BUMP(UPD_SQUEEZED_ctr) +#define TICK_ALLOC_HEAP_NOCTR(bytes) // FIXME: update counter +#define TICK_GC_FAILED_PROMOTION() // FIXME: update counter +#define TICK_ALLOC_TSO() // FIXME: update counter +#define TICK_ALLOC_STACK(g) // FIXME: update counter +#define TICK_ALLOC_UP_THK(g,s) // FIXME: update counter +#define TICK_ALLOC_SE_THK(g,s) // FIXME: update counter + +#endif diff --git a/rts/include/stg/Types.h b/rts/include/stg/Types.h new file mode 100644 index 0000000000..696df3e206 --- /dev/null +++ b/rts/include/stg/Types.h @@ -0,0 +1,205 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2004 + * + * Various C datatypes used in the run-time system. This is the + * lowest-level include file, after ghcconfig.h and RtsConfig.h. + * + * Do not #include this file directly: #include "Rts.h" instead. + * + * To understand the structure of the RTS headers, see the wiki: + * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes + * + * NOTE: assumes #include "ghcconfig.h" + * + * Works with or without _POSIX_SOURCE. + * + * WARNING: Keep this file, MachDeps.h, and HsFFI.h in synch! + * + * ---------------------------------------------------------------------------*/ + +#pragma once + +#if defined(mingw32_HOST_OS) +# if defined(__USE_MINGW_ANSI_STDIO) +# if __USE_MINGW_ANSI_STDIO != 1 +# warning "Mismatch between __USE_MINGW_ANSI_STDIO definitions. \ +If using Rts.h make sure it is the first header included." +# endif +# else +/* Inform mingw we want the ISO rather than Windows printf format specifiers. */ +# define __USE_MINGW_ANSI_STDIO 1 +#endif +#endif + +/* ISO C 99 says: + * "C++ implementations should define these macros only when + * __STDC_LIMIT_MACROS is defined before <stdint.h> is included." + * + * So we need to define it for now to compile with C++ compilers. + * However, C++11 does not require it anymore so we can remove this once we + * upgrade to requiring C++11 or newer. + */ +#define __STDC_LIMIT_MACROS +#include <inttypes.h> + + +/* + * This module should define types *only*, all beginning with "Stg". + * + * Specifically: + + StgInt8, 16, 32, 64 + StgWord8, 16, 32, 64 + StgChar, StgFloat, StgDouble + + ***** All the same size (i.e. sizeof(void *)): ***** + StgPtr Basic pointer type + StgWord Unit of heap allocation + StgInt Signed version of StgWord + StgAddr Generic address type + + StgBool, StgVoid, StgPtr, StgOffset, + StgCode, StgStablePtr, StgFunPtr, + StgUnion. + */ + +/* + * First, platform-dependent definitions of size-specific integers. + */ + +typedef int8_t StgInt8; +typedef uint8_t StgWord8; + +#define STG_INT8_MIN INT8_MIN +#define STG_INT8_MAX INT8_MAX +#define STG_WORD8_MAX UINT8_MAX + +#define FMT_Word8 PRIu8 +#define FMT_HexWord8 PRIx8 + +typedef int16_t StgInt16; +typedef uint16_t StgWord16; + +#define STG_INT16_MIN INT16_MIN +#define STG_INT16_MAX INT16_MAX +#define STG_WORD16_MAX UINT16_MAX + +#define FMT_Word16 PRIu16 +#define FMT_HexWord16 PRIx16 + +typedef int32_t StgInt32; +typedef uint32_t StgWord32; + +#define STG_INT32_MIN INT32_MIN +#define STG_INT32_MAX INT32_MAX +#define STG_WORD32_MAX UINT32_MAX + +#define FMT_Word32 PRIu32 +#define FMT_HexWord32 PRIx32 +#define FMT_Int32 PRId32 + +typedef int64_t StgInt64; +typedef uint64_t StgWord64; + +#define STG_INT64_MIN INT64_MIN +#define STG_INT64_MAX INT64_MAX +#define STG_WORD64_MAX UINT64_MAX + +#define FMT_Word64 PRIu64 +#define FMT_HexWord64 PRIx64 +#define FMT_Int64 PRId64 + +typedef struct { StgWord64 h; StgWord64 l; } StgWord128; + +typedef struct { StgWord128 h; StgWord128 l; } StgWord256; + +typedef struct { StgWord256 h; StgWord256 l; } StgWord512; + +/* + * Stg{Int,Word} are defined such that they have the exact same size as a + * void pointer. + */ + +#if SIZEOF_VOID_P == 8 +typedef int64_t StgInt; +typedef uint64_t StgWord; + +typedef int32_t StgHalfInt; +typedef uint32_t StgHalfWord; + +#define STG_INT_MIN INT64_MIN +#define STG_INT_MAX INT64_MAX +#define STG_WORD_MAX UINT64_MAX + +#define FMT_Word FMT_Word64 +#define FMT_HexWord FMT_HexWord64 +#define FMT_Int FMT_Int64 + +#define strToStgWord strtoull + +#elif SIZEOF_VOID_P == 4 +typedef int32_t StgInt; +typedef uint32_t StgWord; + +typedef int16_t StgHalfInt; +typedef uint16_t StgHalfWord; + +#define STG_INT_MIN INT32_MIN +#define STG_INT_MAX INT32_MAX +#define STG_WORD_MAX UINT32_MAX + +#define FMT_Word FMT_Word32 +#define FMT_HexWord FMT_HexWord32 +#define FMT_Int FMT_Int32 + +#define strToStgWord strtoul + +#else +#error GHC untested on this architecture: sizeof(void *) != 4 or 8 +#endif + +#define W_MASK (sizeof(W_)-1) + +/* + * Other commonly-used STG datatypes. + */ + +typedef void* StgAddr; +typedef StgWord32 StgChar; +typedef int StgBool; +typedef float StgFloat; +typedef double StgDouble; +typedef StgWord* StgPtr; /* heap or stack pointer */ +typedef StgWord volatile* StgVolatilePtr; /* pointer to volatile word */ +typedef StgWord StgOffset; /* byte offset within closure */ +typedef StgWord8 StgCode; /* close enough */ +typedef void* StgStablePtr; +typedef StgWord8* StgByteArray; + +/* + Types for generated C functions when compiling via C. + + The C functions take no arguments, and return a pointer to the next + function to be called use: Ptr to Fun that returns a Ptr to Fun + which returns Ptr to void + + Note: Neither StgFunPtr not StgFun is quite right (that is, + StgFunPtr != StgFun*). So, the functions we define all have type + StgFun but we always have to cast them to StgFunPtr when we assign + them to something. + The only way round this would be to write a recursive type but + C only allows that if you're defining a struct or union. +*/ + +typedef void *(*(*StgFunPtr)(void))(void); +typedef StgFunPtr StgFun(void); + +/* + * Forward declarations for the unregisterised backend, which + * only depends upon Stg.h and not the entirety of Rts.h, which + * is where these are defined. + */ +struct StgClosure_; +struct StgThunk_; +struct Capability_; diff --git a/rts/package.conf.in b/rts/package.conf.in index b0796595ff..50a372b2f0 100644 --- a/rts/package.conf.in +++ b/rts/package.conf.in @@ -68,11 +68,11 @@ extra-libraries: include-dirs: INCLUDE_DIR FFI_INCLUDE_DIR #else /* !INSTALLING */ include-dirs: TOP"/rts/dist/build" - TOP"/includes" - TOP"/includes/dist-derivedconstants/header" + TOP"/rts/include" + TOP"/rts/include/dist-derivedconstants/header" FFI_INCLUDE_DIR LIBDW_INCLUDE_DIR - TOP"/includes/dist-install/build" + TOP"/rts/include/dist-install/build" #endif includes: Stg.h diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in index a08e007c2a..6e535a777c 100644 --- a/rts/rts.cabal.in +++ b/rts/rts.cabal.in @@ -51,7 +51,7 @@ flag thread-sanitizer description: Enable checking for data races using the ThreadSanitizer (TSAN) mechanism supported by GCC and Clang. See Note [ThreadSanitizer] - in @includes/rts/TSANUtils.h@. + in @rts/include/rts/TSANUtils.h@. default: False library @@ -141,13 +141,13 @@ library if !flag(smp) cpp-options: -DNOSMP - include-dirs: build ../includes includes - includes/dist-derivedconstants/header @FFIIncludeDir@ + include-dirs: build include + include/dist-derivedconstants/header @FFIIncludeDir@ @LibdwIncludeDir@ includes: Stg.h install-includes: Cmm.h HsFFI.h MachDeps.h Rts.h RtsAPI.h Stg.h ghcautoconf.h ghcconfig.h ghcplatform.h ghcversion.h - -- ^ from ../includes + -- ^ from include DerivedConstants.h ffi.h ffitarget.h -- ^ generated rts/Adjustor.h @@ -176,6 +176,7 @@ library rts/PrimFloat.h rts/Profiling.h rts/IPE.h + rts/PosixSource.h rts/Signals.h rts/SpinLock.h rts/StableName.h diff --git a/rts/sm/Compact.h b/rts/sm/Compact.h index be9a09d4ab..9f4b396005 100644 --- a/rts/sm/Compact.h +++ b/rts/sm/Compact.h @@ -48,7 +48,7 @@ where we allocate single-word heap objects (e.g. a non-top-level FUN with empty payload) we add one non-pointer field to the payload so that the object will have two words. The minimum amount of words in the payload is defined in - includes/rts/Constants.h as MIN_PAYLOAD_SIZE. + rts/include/rts/Constants.h as MIN_PAYLOAD_SIZE. (See also !1701 where we discussed lifting this restriction and allocating two bits per object) |