summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
Diffstat (limited to 'rts')
-rw-r--r--rts/.tsan-suppressions2
-rw-r--r--rts/AdjustorAsm.S2
-rw-r--r--rts/Capability.h4
-rw-r--r--rts/LdvProfile.c2
-rw-r--r--rts/Printer.c2
-rw-r--r--rts/RetainerProfile.c2
-rw-r--r--rts/RtsSymbols.c2
-rw-r--r--rts/Schedule.c2
-rw-r--r--rts/Schedule.h2
-rw-r--r--rts/StgCRun.c2
-rw-r--r--rts/StgPrimFloat.h2
-rw-r--r--rts/include/.dir-locals.el13
-rw-r--r--rts/include/Cmm.h924
-rw-r--r--rts/include/HsFFI.h148
-rw-r--r--rts/include/MachDeps.h119
-rw-r--r--rts/include/Makefile15
-rw-r--r--rts/include/Rts.h372
-rw-r--r--rts/include/RtsAPI.h607
-rw-r--r--rts/include/Stg.h600
-rw-r--r--rts/include/ghc.mk311
-rw-r--r--rts/include/ghcconfig.h4
-rw-r--r--rts/include/rts/Adjustor.h22
-rw-r--r--rts/include/rts/BlockSignals.h34
-rw-r--r--rts/include/rts/Bytecodes.h109
-rw-r--r--rts/include/rts/Config.h52
-rw-r--r--rts/include/rts/Constants.h340
-rw-r--r--rts/include/rts/EventLogFormat.h246
-rw-r--r--rts/include/rts/EventLogWriter.h75
-rw-r--r--rts/include/rts/ExecPage.h18
-rw-r--r--rts/include/rts/FileLock.h37
-rw-r--r--rts/include/rts/Flags.h330
-rw-r--r--rts/include/rts/ForeignExports.h38
-rw-r--r--rts/include/rts/GetTime.h16
-rw-r--r--rts/include/rts/Globals.h36
-rw-r--r--rts/include/rts/Hpc.h34
-rw-r--r--rts/include/rts/IOInterface.h38
-rw-r--r--rts/include/rts/IPE.h35
-rw-r--r--rts/include/rts/Libdw.h97
-rw-r--r--rts/include/rts/LibdwPool.h19
-rw-r--r--rts/include/rts/Linker.h114
-rw-r--r--rts/include/rts/Main.h18
-rw-r--r--rts/include/rts/Messages.h104
-rw-r--r--rts/include/rts/NonMoving.h43
-rw-r--r--rts/include/rts/OSThreads.h267
-rw-r--r--rts/include/rts/Parallel.h16
-rw-r--r--rts/include/rts/PosixSource.h38
-rw-r--r--rts/include/rts/PrimFloat.h17
-rw-r--r--rts/include/rts/Profiling.h17
-rw-r--r--rts/include/rts/Signals.h23
-rw-r--r--rts/include/rts/SpinLock.h75
-rw-r--r--rts/include/rts/StableName.h32
-rw-r--r--rts/include/rts/StablePtr.h39
-rw-r--r--rts/include/rts/StaticPtrTable.h44
-rw-r--r--rts/include/rts/TSANUtils.h67
-rw-r--r--rts/include/rts/TTY.h17
-rw-r--r--rts/include/rts/Threads.h79
-rw-r--r--rts/include/rts/Ticky.h32
-rw-r--r--rts/include/rts/Time.h45
-rw-r--r--rts/include/rts/Timer.h18
-rw-r--r--rts/include/rts/Types.h31
-rw-r--r--rts/include/rts/Utils.h16
-rw-r--r--rts/include/rts/prof/CCS.h226
-rw-r--r--rts/include/rts/prof/Heap.h24
-rw-r--r--rts/include/rts/prof/LDV.h44
-rw-r--r--rts/include/rts/storage/Block.h368
-rw-r--r--rts/include/rts/storage/ClosureMacros.h645
-rw-r--r--rts/include/rts/storage/ClosureTypes.h86
-rw-r--r--rts/include/rts/storage/Closures.h488
-rw-r--r--rts/include/rts/storage/FunTypes.h54
-rw-r--r--rts/include/rts/storage/GC.h261
-rw-r--r--rts/include/rts/storage/Heap.h31
-rw-r--r--rts/include/rts/storage/InfoTables.h405
-rw-r--r--rts/include/rts/storage/MBlock.h32
-rw-r--r--rts/include/rts/storage/TSO.h330
-rw-r--r--rts/include/stg/DLL.h72
-rw-r--r--rts/include/stg/MachRegs.h854
-rw-r--r--rts/include/stg/MachRegsForHost.h80
-rw-r--r--rts/include/stg/MiscClosures.h639
-rw-r--r--rts/include/stg/Prim.h139
-rw-r--r--rts/include/stg/Regs.h528
-rw-r--r--rts/include/stg/SMP.h579
-rw-r--r--rts/include/stg/Ticky.h221
-rw-r--r--rts/include/stg/Types.h205
-rw-r--r--rts/package.conf.in6
-rw-r--r--rts/rts.cabal.in9
-rw-r--r--rts/sm/Compact.h2
86 files changed, 12173 insertions, 20 deletions
diff --git a/rts/.tsan-suppressions b/rts/.tsan-suppressions
index 734faff5a6..dab647227e 100644
--- a/rts/.tsan-suppressions
+++ b/rts/.tsan-suppressions
@@ -1,5 +1,5 @@
# ThreadSanitizer suppressions.
-# See Note [ThreadSanitizer] in includes/rts/TSANUtils.h.
+# See Note [ThreadSanitizer] in rts/include/rts/TSANUtils.h.
# This is a known race but is benign
race:capability_is_busy
diff --git a/rts/AdjustorAsm.S b/rts/AdjustorAsm.S
index 2795b83b63..59ac2b49cb 100644
--- a/rts/AdjustorAsm.S
+++ b/rts/AdjustorAsm.S
@@ -1,4 +1,4 @@
-#include "../includes/ghcconfig.h"
+#include "include/ghcconfig.h"
/* ******************************** PowerPC ******************************** */
diff --git a/rts/Capability.h b/rts/Capability.h
index 14ba9ef2d7..4a27e618fe 100644
--- a/rts/Capability.h
+++ b/rts/Capability.h
@@ -253,10 +253,10 @@ INLINE_HEADER void releaseCapability_ (Capability* cap STG_UNUSED,
bool always_wakeup STG_UNUSED) {};
#endif
-// declared in includes/rts/Threads.h:
+// declared in rts/include/rts/Threads.h:
// extern Capability MainCapability;
-// declared in includes/rts/Threads.h:
+// declared in rts/include/rts/Threads.h:
// extern uint32_t n_capabilities;
// extern uint32_t enabled_capabilities;
diff --git a/rts/LdvProfile.c b/rts/LdvProfile.c
index aef1703c4a..b56c4d07bb 100644
--- a/rts/LdvProfile.c
+++ b/rts/LdvProfile.c
@@ -70,7 +70,7 @@ processHeapClosureForDead( const StgClosure *c )
if (IS_FORWARDING_PTR(info)) {
// The size of the evacuated closure is currently stored in
// the LDV field. See SET_EVACUAEE_FOR_LDV() in
- // includes/StgLdvProf.h.
+ // rts/include/StgLdvProf.h.
return LDVW(c);
}
info = INFO_PTR_TO_STRUCT(info);
diff --git a/rts/Printer.c b/rts/Printer.c
index 90a40f2626..d2a5c67ca4 100644
--- a/rts/Printer.c
+++ b/rts/Printer.c
@@ -996,7 +996,7 @@ void printObj( StgClosure *obj )
Closure types
NOTE: must be kept in sync with the closure types in
- includes/rts/storage/ClosureTypes.h
+ rts/include/rts/storage/ClosureTypes.h
-------------------------------------------------------------------------- */
const char *closure_type_names[] = {
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c
index 68e412c9c7..964cb99e9f 100644
--- a/rts/RetainerProfile.c
+++ b/rts/RetainerProfile.c
@@ -49,7 +49,7 @@ information about the retainers is still applicable.
/*
Note: what to change in order to plug-in a new retainer profiling scheme?
- (1) type retainer in ../includes/StgRetainerProf.h
+ (1) type retainer in include/StgRetainerProf.h
(2) retainer function R(), i.e., getRetainerFrom()
(3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(),
in RetainerSet.h, if needed.
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index 38e1b8071c..0632dfa6df 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -397,7 +397,7 @@
#endif
/* Modules compiled with -ticky may mention ticky counters */
-/* This list should marry up with the one in $(TOP)/includes/stg/Ticky.h */
+/* This list should marry up with the one in $(TOP)/rts/include/stg/Ticky.h */
#define RTS_TICKY_SYMBOLS \
SymI_NeedsDataProto(ticky_entry_ctrs) \
SymI_NeedsDataProto(top_ct) \
diff --git a/rts/Schedule.c b/rts/Schedule.c
index 4a4b2d494a..3306f00ff3 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -2621,7 +2621,7 @@ scheduleThreadOn(Capability *cap, StgWord cpu USED_IF_THREADS, StgTSO *tso)
#endif
}
-// See includes/rts/Threads.h
+// See rts/include/rts/Threads.h
void
scheduleWaitThread (StgTSO* tso, /*[out]*/HaskellObj* ret, Capability **pcap)
{
diff --git a/rts/Schedule.h b/rts/Schedule.h
index 4c692842e7..5aaafd9d88 100644
--- a/rts/Schedule.h
+++ b/rts/Schedule.h
@@ -130,7 +130,7 @@ void resurrectThreads (StgTSO *);
#if !IN_STG_CODE
-/* END_TSO_QUEUE and friends now defined in includes/stg/MiscClosures.h */
+/* END_TSO_QUEUE and friends now defined in rts/include/stg/MiscClosures.h */
/* Add a thread to the end of the run queue.
* NOTE: tso->link should be END_TSO_QUEUE before calling this macro.
diff --git a/rts/StgCRun.c b/rts/StgCRun.c
index b439b7e36a..25a931994d 100644
--- a/rts/StgCRun.c
+++ b/rts/StgCRun.c
@@ -880,7 +880,7 @@ StgRun(StgFunPtr f, StgRegTable *basereg) {
those regs not used in Thumb mode. Hard to judge if this is
needed, but certainly Haskell code is using them for
placing GHC's virtual registers there. See
- includes/stg/MachRegs.h Please note that Haskell code is
+ rts/include/stg/MachRegs.h Please note that Haskell code is
compiled by GHC/LLVM into ARM code (not Thumb!), at least
as of February 2012 */
: "%r4", "%r5", "%r6", "%r8", "%r9", "%r10", "%11", "%ip", "%lr"
diff --git a/rts/StgPrimFloat.h b/rts/StgPrimFloat.h
index 70ece8f074..4c77b3d995 100644
--- a/rts/StgPrimFloat.h
+++ b/rts/StgPrimFloat.h
@@ -16,6 +16,6 @@ void __decodeDouble_2Int (I_ *man_sign, W_ *man_high, W_ *man_low, I_ *exp,
void __decodeFloat_Int (I_ *man, I_ *exp, StgFloat flt);
// __{int,word}_encode{Float,Double} are public, declared in
-// includes/rts/PrimFloat.h.
+// rts/include/rts/PrimFloat.h.
#include "EndPrivate.h"
diff --git a/rts/include/.dir-locals.el b/rts/include/.dir-locals.el
new file mode 100644
index 0000000000..9e13ffa6ba
--- /dev/null
+++ b/rts/include/.dir-locals.el
@@ -0,0 +1,13 @@
+;;; Directory Local Variables
+;;; See Info node `(emacs) Directory Variables' for more information.
+
+;; Default mode settings: no tabs, 80 column, UTF8
+((nil
+ (indent-tabs-mode . nil)
+ (fill-column . 80)
+ (buffer-file-coding-system . utf-8-unix))
+
+ ;; c-mode settings: 'Allman' BSD style, 4 space indents
+ (c-mode
+ (c-file-style . "BSD")
+ (c-basic-offset . 4)))
diff --git a/rts/include/Cmm.h b/rts/include/Cmm.h
new file mode 100644
index 0000000000..94951bc9f8
--- /dev/null
+++ b/rts/include/Cmm.h
@@ -0,0 +1,924 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow 2004-2013
+ *
+ * This file is included at the top of all .cmm source files (and
+ * *only* .cmm files). It defines a collection of useful macros for
+ * making .cmm code a bit less error-prone to write, and a bit easier
+ * on the eye for the reader.
+ *
+ * For the syntax of .cmm files, see the parser in ghc/compiler/GHC/Cmm/Parser.y.
+ *
+ * Accessing fields of structures defined in the RTS header files is
+ * done via automatically-generated macros in DerivedConstants.h. For
+ * example, where previously we used
+ *
+ * CurrentTSO->what_next = x
+ *
+ * in C-- we now use
+ *
+ * StgTSO_what_next(CurrentTSO) = x
+ *
+ * where the StgTSO_what_next() macro is automatically generated by
+ * utils/deriveConstants. If you need to access a field that doesn't
+ * already have a macro, edit that program (it's pretty self-explanatory).
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/*
+ * In files that are included into both C and C-- (and perhaps
+ * Haskell) sources, we sometimes need to conditionally compile bits
+ * depending on the language. CMINUSMINUS==1 in .cmm sources:
+ */
+#define CMINUSMINUS 1
+
+#include "ghcconfig.h"
+
+/* -----------------------------------------------------------------------------
+ Types
+
+ The following synonyms for C-- types are declared here:
+
+ I8, I16, I32, I64 MachRep-style names for convenience
+
+ W_ is shorthand for the word type (== StgWord)
+ F_ shorthand for float (F_ == StgFloat == C's float)
+ D_ shorthand for double (D_ == StgDouble == C's double)
+
+ CInt has the same size as an int in C on this platform
+ CLong has the same size as a long in C on this platform
+ CBool has the same size as a bool in C on this platform
+
+ --------------------------------------------------------------------------- */
+
+#define I8 bits8
+#define I16 bits16
+#define I32 bits32
+#define I64 bits64
+#define P_ gcptr
+
+#if SIZEOF_VOID_P == 4
+#define W_ bits32
+/* Maybe it's better to include MachDeps.h */
+#define TAG_BITS 2
+#elif SIZEOF_VOID_P == 8
+#define W_ bits64
+/* Maybe it's better to include MachDeps.h */
+#define TAG_BITS 3
+#else
+#error Unknown word size
+#endif
+
+/*
+ * The RTS must sometimes UNTAG a pointer before dereferencing it.
+ * See the wiki page commentary/rts/haskell-execution/pointer-tagging
+ */
+#define TAG_MASK ((1 << TAG_BITS) - 1)
+#define UNTAG(p) (p & ~TAG_MASK)
+#define GETTAG(p) (p & TAG_MASK)
+
+#if SIZEOF_INT == 4
+#define CInt bits32
+#elif SIZEOF_INT == 8
+#define CInt bits64
+#else
+#error Unknown int size
+#endif
+
+#if SIZEOF_LONG == 4
+#define CLong bits32
+#elif SIZEOF_LONG == 8
+#define CLong bits64
+#else
+#error Unknown long size
+#endif
+
+#define CBool bits8
+
+#define F_ float32
+#define D_ float64
+#define L_ bits64
+#define V16_ bits128
+#define V32_ bits256
+#define V64_ bits512
+
+#define SIZEOF_StgDouble 8
+#define SIZEOF_StgWord64 8
+
+/* -----------------------------------------------------------------------------
+ Misc useful stuff
+ -------------------------------------------------------------------------- */
+
+#define ccall foreign "C"
+
+#define NULL (0::W_)
+
+#define STRING(name,str) \
+ section "rodata" { \
+ name : bits8[] str; \
+ } \
+
+#if defined(TABLES_NEXT_TO_CODE)
+#define RET_LBL(f) f##_info
+#else
+#define RET_LBL(f) f##_ret
+#endif
+
+#if defined(TABLES_NEXT_TO_CODE)
+#define ENTRY_LBL(f) f##_info
+#else
+#define ENTRY_LBL(f) f##_entry
+#endif
+
+/* -----------------------------------------------------------------------------
+ Byte/word macros
+
+ Everything in C-- is in byte offsets (well, most things). We use
+ some macros to allow us to express offsets in words and to try to
+ avoid byte/word confusion.
+ -------------------------------------------------------------------------- */
+
+#define SIZEOF_W SIZEOF_VOID_P
+#define W_MASK (SIZEOF_W-1)
+
+#if SIZEOF_W == 4
+#define W_SHIFT 2
+#elif SIZEOF_W == 8
+#define W_SHIFT 3
+#endif
+
+/* Converting quantities of words to bytes */
+#define WDS(n) ((n)*SIZEOF_W)
+
+/*
+ * Converting quantities of bytes to words
+ * NB. these work on *unsigned* values only
+ */
+#define BYTES_TO_WDS(n) ((n) / SIZEOF_W)
+#define ROUNDUP_BYTES_TO_WDS(n) (((n) + SIZEOF_W - 1) / SIZEOF_W)
+
+/*
+ * TO_W_(n) and TO_ZXW_(n) convert n to W_ type from a smaller type,
+ * with and without sign extension respectively
+ */
+#if SIZEOF_W == 4
+#define TO_I64(x) %sx64(x)
+#define TO_W_(x) %sx32(x)
+#define TO_ZXW_(x) %zx32(x)
+#define HALF_W_(x) %lobits16(x)
+#elif SIZEOF_W == 8
+#define TO_I64(x) (x)
+#define TO_W_(x) %sx64(x)
+#define TO_ZXW_(x) %zx64(x)
+#define HALF_W_(x) %lobits32(x)
+#endif
+
+#if SIZEOF_INT == 4 && SIZEOF_W == 8
+#define W_TO_INT(x) %lobits32(x)
+#elif SIZEOF_INT == SIZEOF_W
+#define W_TO_INT(x) (x)
+#endif
+
+#if SIZEOF_LONG == 4 && SIZEOF_W == 8
+#define W_TO_LONG(x) %lobits32(x)
+#elif SIZEOF_LONG == SIZEOF_W
+#define W_TO_LONG(x) (x)
+#endif
+
+/* -----------------------------------------------------------------------------
+ Atomic memory operations.
+ -------------------------------------------------------------------------- */
+
+#if SIZEOF_W == 4
+#define cmpxchgW cmpxchg32
+#elif SIZEOF_W == 8
+#define cmpxchgW cmpxchg64
+#endif
+
+/* -----------------------------------------------------------------------------
+ Heap/stack access, and adjusting the heap/stack pointers.
+ -------------------------------------------------------------------------- */
+
+#define Sp(n) W_[Sp + WDS(n)]
+#define Hp(n) W_[Hp + WDS(n)]
+
+#define Sp_adj(n) Sp = Sp + WDS(n) /* pronounced "spadge" */
+#define Hp_adj(n) Hp = Hp + WDS(n)
+
+/* -----------------------------------------------------------------------------
+ Assertions and Debuggery
+ -------------------------------------------------------------------------- */
+
+#if defined(DEBUG) || defined(USE_ASSERTS_ALL_WAYS)
+#define ASSERTS_ENABLED 1
+#else
+#undef ASSERTS_ENABLED
+#endif
+
+#if defined(ASSERTS_ENABLED)
+#define ASSERT(predicate) \
+ if (predicate) { \
+ /*null*/; \
+ } else { \
+ foreign "C" _assertFail(__FILE__, __LINE__) never returns; \
+ }
+#else
+#define ASSERT(p) /* nothing */
+#endif
+
+#if defined(DEBUG)
+#define DEBUG_ONLY(s) s
+#else
+#define DEBUG_ONLY(s) /* nothing */
+#endif
+
+/*
+ * The IF_DEBUG macro is useful for debug messages that depend on one
+ * of the RTS debug options. For example:
+ *
+ * IF_DEBUG(RtsFlags_DebugFlags_apply,
+ * foreign "C" fprintf(stderr, stg_ap_0_ret_str));
+ *
+ * Note the syntax is slightly different to the C version of this macro.
+ */
+#if defined(DEBUG)
+#define IF_DEBUG(c,s) if (RtsFlags_DebugFlags_##c(RtsFlags) != 0::CBool) { s; }
+#else
+#define IF_DEBUG(c,s) /* nothing */
+#endif
+
+/* -----------------------------------------------------------------------------
+ Entering
+
+ It isn't safe to "enter" every closure. Functions in particular
+ have no entry code as such; their entry point contains the code to
+ apply the function.
+
+ ToDo: range should end in N_CLOSURE_TYPES-1, not N_CLOSURE_TYPES,
+ but switch doesn't allow us to use exprs there yet.
+
+ If R1 points to a tagged object it points either to
+ * A constructor.
+ * A function with arity <= TAG_MASK.
+ In both cases the right thing to do is to return.
+ Note: it is rather lucky that we can use the tag bits to do this
+ for both objects. Maybe it points to a brittle design?
+
+ Indirections can contain tagged pointers, so their tag is checked.
+ -------------------------------------------------------------------------- */
+
+#if defined(PROFILING)
+
+// When profiling, we cannot shortcut ENTER() by checking the tag,
+// because LDV profiling relies on entering closures to mark them as
+// "used".
+
+#define LOAD_INFO(ret,x) \
+ info = %INFO_PTR(UNTAG(x));
+
+#define UNTAG_IF_PROF(x) UNTAG(x)
+
+#else
+
+#define LOAD_INFO(ret,x) \
+ if (GETTAG(x) != 0) { \
+ ret(x); \
+ } \
+ info = %INFO_PTR(x);
+
+#define UNTAG_IF_PROF(x) (x) /* already untagged */
+
+#endif
+
+// We need two versions of ENTER():
+// - ENTER(x) takes the closure as an argument and uses return(),
+// for use in civilized code where the stack is handled by GHC
+//
+// - ENTER_NOSTACK() where the closure is in R1, and returns are
+// explicit jumps, for use when we are doing the stack management
+// ourselves.
+
+#if defined(PROFILING)
+// See Note [Evaluating functions with profiling] in rts/Apply.cmm
+#define ENTER(x) jump stg_ap_0_fast(x);
+#else
+#define ENTER(x) ENTER_(return,x)
+#endif
+
+#define ENTER_R1() ENTER_(RET_R1,R1)
+
+#define RET_R1(x) jump %ENTRY_CODE(Sp(0)) [R1]
+
+#define ENTER_(ret,x) \
+ again: \
+ W_ info; \
+ LOAD_INFO(ret,x) \
+ /* See Note [Heap memory barriers] in SMP.h */ \
+ prim_read_barrier; \
+ switch [INVALID_OBJECT .. N_CLOSURE_TYPES] \
+ (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) { \
+ case \
+ IND, \
+ IND_STATIC: \
+ { \
+ x = StgInd_indirectee(x); \
+ goto again; \
+ } \
+ case \
+ FUN, \
+ FUN_1_0, \
+ FUN_0_1, \
+ FUN_2_0, \
+ FUN_1_1, \
+ FUN_0_2, \
+ FUN_STATIC, \
+ BCO, \
+ PAP: \
+ { \
+ ret(x); \
+ } \
+ default: \
+ { \
+ x = UNTAG_IF_PROF(x); \
+ jump %ENTRY_CODE(info) (x); \
+ } \
+ }
+
+// The FUN cases almost never happen: a pointer to a non-static FUN
+// should always be tagged. This unfortunately isn't true for the
+// interpreter right now, which leaves untagged FUNs on the stack.
+
+/* -----------------------------------------------------------------------------
+ Constants.
+ -------------------------------------------------------------------------- */
+
+#include "rts/Constants.h"
+#include "DerivedConstants.h"
+#include "rts/storage/ClosureTypes.h"
+#include "rts/storage/FunTypes.h"
+#include "rts/OSThreads.h"
+
+/*
+ * Need MachRegs, because some of the RTS code is conditionally
+ * compiled based on REG_R1, REG_R2, etc.
+ */
+#include "stg/MachRegsForHost.h"
+
+#include "rts/prof/LDV.h"
+
+#undef BLOCK_SIZE
+#undef MBLOCK_SIZE
+#include "rts/storage/Block.h" /* For Bdescr() */
+
+
+#define MyCapability() (BaseReg - OFFSET_Capability_r)
+
+/* -------------------------------------------------------------------------
+ Info tables
+ ------------------------------------------------------------------------- */
+
+#if defined(PROFILING)
+#define PROF_HDR_FIELDS(w_,hdr1,hdr2) \
+ w_ hdr1, \
+ w_ hdr2,
+#else
+#define PROF_HDR_FIELDS(w_,hdr1,hdr2) /* nothing */
+#endif
+
+/* -------------------------------------------------------------------------
+ Allocation and garbage collection
+ ------------------------------------------------------------------------- */
+
+/*
+ * ALLOC_PRIM is for allocating memory on the heap for a primitive
+ * object. It is used all over PrimOps.cmm.
+ *
+ * We make the simplifying assumption that the "admin" part of a
+ * primitive closure is just the header when calculating sizes for
+ * ticky-ticky. It's not clear whether eg. the size field of an array
+ * should be counted as "admin", or the various fields of a BCO.
+ */
+#define ALLOC_PRIM(bytes) \
+ HP_CHK_GEN_TICKY(bytes); \
+ TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
+ CCCS_ALLOC(bytes);
+
+#define HEAP_CHECK(bytes,failure) \
+ TICK_BUMP(HEAP_CHK_ctr); \
+ Hp = Hp + (bytes); \
+ if (Hp > HpLim) { HpAlloc = (bytes); failure; } \
+ TICK_ALLOC_HEAP_NOCTR(bytes);
+
+#define ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,failure) \
+ HEAP_CHECK(bytes,failure) \
+ TICK_ALLOC_PRIM(SIZEOF_StgHeader,bytes-SIZEOF_StgHeader,0); \
+ CCCS_ALLOC(bytes);
+
+#define ALLOC_PRIM_(bytes,fun) \
+ ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM(fun));
+
+#define ALLOC_PRIM_P(bytes,fun,arg) \
+ ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_P(fun,arg));
+
+#define ALLOC_PRIM_N(bytes,fun,arg) \
+ ALLOC_PRIM_WITH_CUSTOM_FAILURE(bytes,GC_PRIM_N(fun,arg));
+
+/* CCS_ALLOC wants the size in words, because ccs->mem_alloc is in words */
+#define CCCS_ALLOC(__alloc) CCS_ALLOC(BYTES_TO_WDS(__alloc), CCCS)
+
+#define HP_CHK_GEN_TICKY(bytes) \
+ HP_CHK_GEN(bytes); \
+ TICK_ALLOC_HEAP_NOCTR(bytes);
+
+#define HP_CHK_P(bytes, fun, arg) \
+ HEAP_CHECK(bytes, GC_PRIM_P(fun,arg))
+
+// TODO I'm not seeing where ALLOC_P_TICKY is used; can it be removed?
+// -NSF March 2013
+#define ALLOC_P_TICKY(bytes, fun, arg) \
+ HP_CHK_P(bytes); \
+ TICK_ALLOC_HEAP_NOCTR(bytes);
+
+#define CHECK_GC() \
+ (bdescr_link(CurrentNursery) == NULL || \
+ generation_n_new_large_words(W_[g0]) >= TO_W_(CLong[large_alloc_lim]))
+
+// allocate() allocates from the nursery, so we check to see
+// whether the nursery is nearly empty in any function that uses
+// allocate() - this includes many of the primops.
+//
+// HACK alert: the __L__ stuff is here to coax the common-block
+// eliminator into commoning up the call stg_gc_noregs() with the same
+// code that gets generated by a STK_CHK_GEN() in the same proc. We
+// also need an if (0) { goto __L__; } so that the __L__ label isn't
+// optimised away by the control-flow optimiser prior to common-block
+// elimination (it will be optimised away later).
+//
+// This saves some code in gmp-wrappers.cmm where we have lots of
+// MAYBE_GC() in the same proc as STK_CHK_GEN().
+//
+#define MAYBE_GC(retry) \
+ if (CHECK_GC()) { \
+ HpAlloc = 0; \
+ goto __L__; \
+ __L__: \
+ call stg_gc_noregs(); \
+ goto retry; \
+ } \
+ if (0) { goto __L__; }
+
+#define GC_PRIM(fun) \
+ jump stg_gc_prim(fun);
+
+// Version of GC_PRIM for use in low-level Cmm. We can call
+// stg_gc_prim, because it takes one argument and therefore has a
+// platform-independent calling convention (Note [Syntax of .cmm
+// files] in GHC.Cmm.Parser).
+#define GC_PRIM_LL(fun) \
+ R1 = fun; \
+ jump stg_gc_prim [R1];
+
+// We pass the fun as the second argument, because the arg is
+// usually already in the first argument position (R1), so this
+// avoids moving it to a different register / stack slot.
+#define GC_PRIM_N(fun,arg) \
+ jump stg_gc_prim_n(arg,fun);
+
+#define GC_PRIM_P(fun,arg) \
+ jump stg_gc_prim_p(arg,fun);
+
+#define GC_PRIM_P_LL(fun,arg) \
+ R1 = arg; \
+ R2 = fun; \
+ jump stg_gc_prim_p_ll [R1,R2];
+
+#define GC_PRIM_PP(fun,arg1,arg2) \
+ jump stg_gc_prim_pp(arg1,arg2,fun);
+
+#define MAYBE_GC_(fun) \
+ if (CHECK_GC()) { \
+ HpAlloc = 0; \
+ GC_PRIM(fun) \
+ }
+
+#define MAYBE_GC_N(fun,arg) \
+ if (CHECK_GC()) { \
+ HpAlloc = 0; \
+ GC_PRIM_N(fun,arg) \
+ }
+
+#define MAYBE_GC_P(fun,arg) \
+ if (CHECK_GC()) { \
+ HpAlloc = 0; \
+ GC_PRIM_P(fun,arg) \
+ }
+
+#define MAYBE_GC_PP(fun,arg1,arg2) \
+ if (CHECK_GC()) { \
+ HpAlloc = 0; \
+ GC_PRIM_PP(fun,arg1,arg2) \
+ }
+
+#define STK_CHK_LL(n, fun) \
+ TICK_BUMP(STK_CHK_ctr); \
+ if (Sp - (n) < SpLim) { \
+ GC_PRIM_LL(fun) \
+ }
+
+#define STK_CHK_P_LL(n, fun, arg) \
+ TICK_BUMP(STK_CHK_ctr); \
+ if (Sp - (n) < SpLim) { \
+ GC_PRIM_P_LL(fun,arg) \
+ }
+
+#define STK_CHK_PP(n, fun, arg1, arg2) \
+ TICK_BUMP(STK_CHK_ctr); \
+ if (Sp - (n) < SpLim) { \
+ GC_PRIM_PP(fun,arg1,arg2) \
+ }
+
+#define STK_CHK_ENTER(n, closure) \
+ TICK_BUMP(STK_CHK_ctr); \
+ if (Sp - (n) < SpLim) { \
+ jump __stg_gc_enter_1(closure); \
+ }
+
+// A funky heap check used by AutoApply.cmm
+
+#define HP_CHK_NP_ASSIGN_SP0(size,f) \
+ HEAP_CHECK(size, Sp(0) = f; jump __stg_gc_enter_1 [R1];)
+
+/* -----------------------------------------------------------------------------
+ Closure headers
+ -------------------------------------------------------------------------- */
+
+/*
+ * This is really ugly, since we don't do the rest of StgHeader this
+ * way. The problem is that values from DerivedConstants.h cannot be
+ * dependent on the way (SMP, PROF etc.). For SIZEOF_StgHeader we get
+ * the value from GHC, but it seems like too much trouble to do that
+ * for StgThunkHeader.
+ */
+#define SIZEOF_StgThunkHeader SIZEOF_StgHeader+SIZEOF_StgSMPThunkHeader
+
+#define StgThunk_payload(__ptr__,__ix__) \
+ W_[__ptr__+SIZEOF_StgThunkHeader+ WDS(__ix__)]
+
+/* -----------------------------------------------------------------------------
+ Closures
+ -------------------------------------------------------------------------- */
+
+/* The offset of the payload of an array */
+#define BYTE_ARR_CTS(arr) ((arr) + SIZEOF_StgArrBytes)
+
+/* The number of words allocated in an array payload */
+#define BYTE_ARR_WDS(arr) ROUNDUP_BYTES_TO_WDS(StgArrBytes_bytes(arr))
+
+/* Getting/setting the info pointer of a closure */
+#define SET_INFO(p,info) StgHeader_info(p) = info
+#define GET_INFO(p) StgHeader_info(p)
+
+/* Determine the size of an ordinary closure from its info table */
+#define sizeW_fromITBL(itbl) \
+ SIZEOF_StgHeader + WDS(%INFO_PTRS(itbl)) + WDS(%INFO_NPTRS(itbl))
+
+/* NB. duplicated from InfoTables.h! */
+#define BITMAP_SIZE(bitmap) ((bitmap) & BITMAP_SIZE_MASK)
+#define BITMAP_BITS(bitmap) ((bitmap) >> BITMAP_BITS_SHIFT)
+
+/* Debugging macros */
+#define LOOKS_LIKE_INFO_PTR(p) \
+ ((p) != NULL && \
+ LOOKS_LIKE_INFO_PTR_NOT_NULL(p))
+
+#define LOOKS_LIKE_INFO_PTR_NOT_NULL(p) \
+ ( (TO_W_(%INFO_TYPE(%STD_INFO(p))) != INVALID_OBJECT) && \
+ (TO_W_(%INFO_TYPE(%STD_INFO(p))) < N_CLOSURE_TYPES))
+
+#define LOOKS_LIKE_CLOSURE_PTR(p) (LOOKS_LIKE_INFO_PTR(GET_INFO(UNTAG(p))))
+
+/*
+ * The layout of the StgFunInfoExtra part of an info table changes
+ * depending on TABLES_NEXT_TO_CODE. So we define field access
+ * macros which use the appropriate version here:
+ */
+#if defined(TABLES_NEXT_TO_CODE)
+/*
+ * when TABLES_NEXT_TO_CODE, slow_apply is stored as an offset
+ * instead of the normal pointer.
+ */
+
+#define StgFunInfoExtra_slow_apply(fun_info) \
+ (TO_W_(StgFunInfoExtraRev_slow_apply_offset(fun_info)) \
+ + (fun_info) + SIZEOF_StgFunInfoExtraRev + SIZEOF_StgInfoTable)
+
+#define StgFunInfoExtra_fun_type(i) StgFunInfoExtraRev_fun_type(i)
+#define StgFunInfoExtra_arity(i) StgFunInfoExtraRev_arity(i)
+#define StgFunInfoExtra_bitmap(i) StgFunInfoExtraRev_bitmap(i)
+#else
+#define StgFunInfoExtra_slow_apply(i) StgFunInfoExtraFwd_slow_apply(i)
+#define StgFunInfoExtra_fun_type(i) StgFunInfoExtraFwd_fun_type(i)
+#define StgFunInfoExtra_arity(i) StgFunInfoExtraFwd_arity(i)
+#define StgFunInfoExtra_bitmap(i) StgFunInfoExtraFwd_bitmap(i)
+#endif
+
+#define mutArrCardMask ((1 << MUT_ARR_PTRS_CARD_BITS) - 1)
+#define mutArrPtrCardDown(i) ((i) >> MUT_ARR_PTRS_CARD_BITS)
+#define mutArrPtrCardUp(i) (((i) + mutArrCardMask) >> MUT_ARR_PTRS_CARD_BITS)
+#define mutArrPtrsCardWords(n) ROUNDUP_BYTES_TO_WDS(mutArrPtrCardUp(n))
+
+#if defined(PROFILING) || defined(DEBUG)
+#define OVERWRITING_CLOSURE_SIZE(c, size) foreign "C" overwritingClosureSize(c "ptr", size)
+#define OVERWRITING_CLOSURE(c) foreign "C" overwritingClosure(c "ptr")
+#define OVERWRITING_CLOSURE_MUTABLE(c, off) foreign "C" overwritingMutableClosureOfs(c "ptr", off)
+#else
+#define OVERWRITING_CLOSURE_SIZE(c, size) /* nothing */
+#define OVERWRITING_CLOSURE(c) /* nothing */
+/* This is used to zero slop after shrunk arrays. It is important that we do
+ * this whenever profiling is enabled as described in Note [slop on the heap]
+ * in Storage.c. */
+#define OVERWRITING_CLOSURE_MUTABLE(c, off) \
+ if (TO_W_(RtsFlags_ProfFlags_doHeapProfile(RtsFlags)) != 0) { foreign "C" overwritingMutableClosureOfs(c "ptr", off); }
+#endif
+
+// Memory barriers.
+// For discussion of how these are used to fence heap object
+// accesses see Note [Heap memory barriers] in SMP.h.
+#if defined(THREADED_RTS)
+#define prim_read_barrier prim %read_barrier()
+#else
+#define prim_read_barrier /* nothing */
+#endif
+#if defined(THREADED_RTS)
+#define prim_write_barrier prim %write_barrier()
+#else
+#define prim_write_barrier /* nothing */
+#endif
+
+/* -----------------------------------------------------------------------------
+ Ticky macros
+ -------------------------------------------------------------------------- */
+
+#if defined(TICKY_TICKY)
+#define TICK_BUMP_BY(ctr,n) W_[ctr] = W_[ctr] + n
+#else
+#define TICK_BUMP_BY(ctr,n) /* nothing */
+#endif
+
+#define TICK_BUMP(ctr) TICK_BUMP_BY(ctr,1)
+
+#define TICK_ENT_DYN_IND() TICK_BUMP(ENT_DYN_IND_ctr)
+// ENT_DYN_THK_ctr doesn't exist anymore. Could be ENT_DYN_THK_SINGLE_ctr or
+// ENT_DYN_THK_MANY_ctr
+// #define TICK_ENT_DYN_THK() TICK_BUMP(ENT_DYN_THK_ctr)
+#define TICK_ENT_DYN_THK()
+
+#define TICK_ENT_VIA_NODE() TICK_BUMP(ENT_VIA_NODE_ctr)
+#define TICK_ENT_STATIC_IND() TICK_BUMP(ENT_STATIC_IND_ctr)
+#define TICK_ENT_PERM_IND() TICK_BUMP(ENT_PERM_IND_ctr)
+#define TICK_ENT_PAP() TICK_BUMP(ENT_PAP_ctr)
+#define TICK_ENT_AP() TICK_BUMP(ENT_AP_ctr)
+#define TICK_ENT_AP_STACK() TICK_BUMP(ENT_AP_STACK_ctr)
+#define TICK_ENT_BH() TICK_BUMP(ENT_BH_ctr)
+#define TICK_ENT_LNE() TICK_BUMP(ENT_LNE_ctr)
+#define TICK_UNKNOWN_CALL() TICK_BUMP(UNKNOWN_CALL_ctr)
+#define TICK_UPDF_PUSHED() TICK_BUMP(UPDF_PUSHED_ctr)
+#define TICK_CATCHF_PUSHED() TICK_BUMP(CATCHF_PUSHED_ctr)
+#define TICK_UPDF_OMITTED() TICK_BUMP(UPDF_OMITTED_ctr)
+#define TICK_UPD_NEW_IND() TICK_BUMP(UPD_NEW_IND_ctr)
+#define TICK_UPD_NEW_PERM_IND() TICK_BUMP(UPD_NEW_PERM_IND_ctr)
+#define TICK_UPD_OLD_IND() TICK_BUMP(UPD_OLD_IND_ctr)
+#define TICK_UPD_OLD_PERM_IND() TICK_BUMP(UPD_OLD_PERM_IND_ctr)
+
+#define TICK_SLOW_CALL_FUN_TOO_FEW() TICK_BUMP(SLOW_CALL_FUN_TOO_FEW_ctr)
+#define TICK_SLOW_CALL_FUN_CORRECT() TICK_BUMP(SLOW_CALL_FUN_CORRECT_ctr)
+#define TICK_SLOW_CALL_FUN_TOO_MANY() TICK_BUMP(SLOW_CALL_FUN_TOO_MANY_ctr)
+#define TICK_SLOW_CALL_PAP_TOO_FEW() TICK_BUMP(SLOW_CALL_PAP_TOO_FEW_ctr)
+#define TICK_SLOW_CALL_PAP_CORRECT() TICK_BUMP(SLOW_CALL_PAP_CORRECT_ctr)
+#define TICK_SLOW_CALL_PAP_TOO_MANY() TICK_BUMP(SLOW_CALL_PAP_TOO_MANY_ctr)
+
+#define TICK_SLOW_CALL_fast_v16() TICK_BUMP(SLOW_CALL_fast_v16_ctr)
+#define TICK_SLOW_CALL_fast_v() TICK_BUMP(SLOW_CALL_fast_v_ctr)
+#define TICK_SLOW_CALL_fast_p() TICK_BUMP(SLOW_CALL_fast_p_ctr)
+#define TICK_SLOW_CALL_fast_pv() TICK_BUMP(SLOW_CALL_fast_pv_ctr)
+#define TICK_SLOW_CALL_fast_pp() TICK_BUMP(SLOW_CALL_fast_pp_ctr)
+#define TICK_SLOW_CALL_fast_ppv() TICK_BUMP(SLOW_CALL_fast_ppv_ctr)
+#define TICK_SLOW_CALL_fast_ppp() TICK_BUMP(SLOW_CALL_fast_ppp_ctr)
+#define TICK_SLOW_CALL_fast_pppv() TICK_BUMP(SLOW_CALL_fast_pppv_ctr)
+#define TICK_SLOW_CALL_fast_pppp() TICK_BUMP(SLOW_CALL_fast_pppp_ctr)
+#define TICK_SLOW_CALL_fast_ppppp() TICK_BUMP(SLOW_CALL_fast_ppppp_ctr)
+#define TICK_SLOW_CALL_fast_pppppp() TICK_BUMP(SLOW_CALL_fast_pppppp_ctr)
+#define TICK_VERY_SLOW_CALL() TICK_BUMP(VERY_SLOW_CALL_ctr)
+
+/* NOTE: TICK_HISTO_BY and TICK_HISTO
+ currently have no effect.
+ The old code for it didn't typecheck and I
+ just commented it out to get ticky to work.
+ - krc 1/2007 */
+
+#define TICK_HISTO_BY(histo,n,i) /* nothing */
+
+#define TICK_HISTO(histo,n) TICK_HISTO_BY(histo,n,1)
+
+/* An unboxed tuple with n components. */
+#define TICK_RET_UNBOXED_TUP(n) \
+ TICK_BUMP(RET_UNBOXED_TUP_ctr++); \
+ TICK_HISTO(RET_UNBOXED_TUP,n)
+
+/*
+ * A slow call with n arguments. In the unevald case, this call has
+ * already been counted once, so don't count it again.
+ */
+#define TICK_SLOW_CALL(n) \
+ TICK_BUMP(SLOW_CALL_ctr); \
+ TICK_HISTO(SLOW_CALL,n)
+
+/*
+ * This slow call was found to be to an unevaluated function; undo the
+ * ticks we did in TICK_SLOW_CALL.
+ */
+#define TICK_SLOW_CALL_UNEVALD(n) \
+ TICK_BUMP(SLOW_CALL_UNEVALD_ctr); \
+ TICK_BUMP_BY(SLOW_CALL_ctr,-1); \
+ TICK_HISTO_BY(SLOW_CALL,n,-1);
+
+/* Updating a closure with a new CON */
+#define TICK_UPD_CON_IN_NEW(n) \
+ TICK_BUMP(UPD_CON_IN_NEW_ctr); \
+ TICK_HISTO(UPD_CON_IN_NEW,n)
+
+#define TICK_ALLOC_HEAP_NOCTR(bytes) \
+ TICK_BUMP(ALLOC_RTS_ctr); \
+ TICK_BUMP_BY(ALLOC_RTS_tot,bytes)
+
+/* -----------------------------------------------------------------------------
+ Misc junk
+ -------------------------------------------------------------------------- */
+
+#define NO_TREC stg_NO_TREC_closure
+#define END_TSO_QUEUE stg_END_TSO_QUEUE_closure
+#define STM_AWOKEN stg_STM_AWOKEN_closure
+
+#define recordMutableCap(p, gen) \
+ W_ __bd; \
+ W_ mut_list; \
+ mut_list = Capability_mut_lists(MyCapability()) + WDS(gen); \
+ __bd = W_[mut_list]; \
+ if (bdescr_free(__bd) >= bdescr_start(__bd) + BLOCK_SIZE) { \
+ W_ __new_bd; \
+ ("ptr" __new_bd) = foreign "C" allocBlock_lock(); \
+ bdescr_link(__new_bd) = __bd; \
+ __bd = __new_bd; \
+ W_[mut_list] = __bd; \
+ } \
+ W_ free; \
+ free = bdescr_free(__bd); \
+ W_[free] = p; \
+ bdescr_free(__bd) = free + WDS(1);
+
+#define recordMutable(p) \
+ P_ __p; \
+ W_ __bd; \
+ W_ __gen; \
+ __p = p; \
+ __bd = Bdescr(__p); \
+ __gen = TO_W_(bdescr_gen_no(__bd)); \
+ if (__gen > 0) { recordMutableCap(__p, __gen); }
+
+/* -----------------------------------------------------------------------------
+ Update remembered set write barrier
+ -------------------------------------------------------------------------- */
+
+/* -----------------------------------------------------------------------------
+ Arrays
+ -------------------------------------------------------------------------- */
+
+/* Complete function body for the clone family of (mutable) array ops.
+ Defined as a macro to avoid function call overhead or code
+ duplication. */
+#define cloneArray(info, src, offset, n) \
+ W_ words, size; \
+ gcptr dst, dst_p, src_p; \
+ \
+ again: MAYBE_GC(again); \
+ \
+ size = n + mutArrPtrsCardWords(n); \
+ words = BYTES_TO_WDS(SIZEOF_StgMutArrPtrs) + size; \
+ ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \
+ TICK_ALLOC_PRIM(SIZEOF_StgMutArrPtrs, WDS(size), 0); \
+ \
+ SET_HDR(dst, info, CCCS); \
+ StgMutArrPtrs_ptrs(dst) = n; \
+ StgMutArrPtrs_size(dst) = size; \
+ \
+ dst_p = dst + SIZEOF_StgMutArrPtrs; \
+ src_p = src + SIZEOF_StgMutArrPtrs + WDS(offset); \
+ prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \
+ \
+ return (dst);
+
+#define copyArray(src, src_off, dst, dst_off, n) \
+ W_ dst_elems_p, dst_p, src_p, bytes; \
+ \
+ if ((n) != 0) { \
+ SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \
+ \
+ dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \
+ dst_p = dst_elems_p + WDS(dst_off); \
+ src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
+ bytes = WDS(n); \
+ \
+ prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
+ \
+ setCards(dst, dst_off, n); \
+ } \
+ \
+ return ();
+
+#define copyMutableArray(src, src_off, dst, dst_off, n) \
+ W_ dst_elems_p, dst_p, src_p, bytes; \
+ \
+ if ((n) != 0) { \
+ SET_HDR(dst, stg_MUT_ARR_PTRS_DIRTY_info, CCCS); \
+ \
+ dst_elems_p = (dst) + SIZEOF_StgMutArrPtrs; \
+ dst_p = dst_elems_p + WDS(dst_off); \
+ src_p = (src) + SIZEOF_StgMutArrPtrs + WDS(src_off); \
+ bytes = WDS(n); \
+ \
+ if ((src) == (dst)) { \
+ prim %memmove(dst_p, src_p, bytes, SIZEOF_W); \
+ } else { \
+ prim %memcpy(dst_p, src_p, bytes, SIZEOF_W); \
+ } \
+ \
+ setCards(dst, dst_off, n); \
+ } \
+ \
+ return ();
+
+/*
+ * Set the cards in the array pointed to by arr for an
+ * update to n elements, starting at element dst_off.
+ */
+#define setCards(arr, dst_off, n) \
+ setCardsValue(arr, dst_off, n, 1)
+
+/*
+ * Set the cards in the array pointed to by arr for an
+ * update to n elements, starting at element dst_off to value (0 to indicate
+ * clean, 1 to indicate dirty).
+ */
+#define setCardsValue(arr, dst_off, n, value) \
+ W_ __start_card, __end_card, __cards, __dst_cards_p; \
+ __dst_cards_p = (arr) + SIZEOF_StgMutArrPtrs + WDS(StgMutArrPtrs_ptrs(arr)); \
+ __start_card = mutArrPtrCardDown(dst_off); \
+ __end_card = mutArrPtrCardDown((dst_off) + (n) - 1); \
+ __cards = __end_card - __start_card + 1; \
+ prim %memset(__dst_cards_p + __start_card, (value), __cards, 1)
+
+/* Complete function body for the clone family of small (mutable)
+ array ops. Defined as a macro to avoid function call overhead or
+ code duplication. */
+#define cloneSmallArray(info, src, offset, n) \
+ W_ words, size; \
+ gcptr dst, dst_p, src_p; \
+ \
+ again: MAYBE_GC(again); \
+ \
+ words = BYTES_TO_WDS(SIZEOF_StgSmallMutArrPtrs) + n; \
+ ("ptr" dst) = ccall allocate(MyCapability() "ptr", words); \
+ TICK_ALLOC_PRIM(SIZEOF_StgSmallMutArrPtrs, WDS(n), 0); \
+ \
+ SET_HDR(dst, info, CCCS); \
+ StgSmallMutArrPtrs_ptrs(dst) = n; \
+ \
+ dst_p = dst + SIZEOF_StgSmallMutArrPtrs; \
+ src_p = src + SIZEOF_StgSmallMutArrPtrs + WDS(offset); \
+ prim %memcpy(dst_p, src_p, n * SIZEOF_W, SIZEOF_W); \
+ \
+ return (dst);
+
+
+//
+// Nonmoving write barrier helpers
+//
+// See Note [Update remembered set] in NonMovingMark.c.
+
+#if defined(THREADED_RTS)
+#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
+ if (W_[nonmoving_write_barrier_enabled] != 0) (likely: False)
+#else
+// A similar measure is also taken in rts/NonMoving.h, but that isn't visible from C--
+#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
+ if (0)
+#define nonmoving_write_barrier_enabled 0
+#endif
+
+// A useful helper for pushing a pointer to the update remembered set.
+#define updateRemembSetPushPtr(p) \
+ IF_NONMOVING_WRITE_BARRIER_ENABLED { \
+ ccall updateRemembSetPushClosure_(BaseReg "ptr", p "ptr"); \
+ }
diff --git a/rts/include/HsFFI.h b/rts/include/HsFFI.h
new file mode 100644
index 0000000000..7c25599f83
--- /dev/null
+++ b/rts/include/HsFFI.h
@@ -0,0 +1,148 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2000
+ *
+ * A mapping for Haskell types to C types, including the corresponding bounds.
+ * Intended to be used in conjunction with the FFI.
+ *
+ * WARNING: Keep this file and StgTypes.h in synch!
+ *
+ * ---------------------------------------------------------------------------*/
+
+/* N.B. Only use C-style multi-line comments in this file to avoid upsetting
+ * dtrace on SmartOS, which doesn't support C++-style single-line comments.
+ */
+
+#pragma once
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* get types from GHC's runtime system */
+#include "ghcconfig.h"
+#include "stg/Types.h"
+
+/* get limits for floating point types */
+#include <float.h>
+
+typedef StgChar HsChar;
+typedef StgInt HsInt;
+typedef StgInt8 HsInt8;
+typedef StgInt16 HsInt16;
+typedef StgInt32 HsInt32;
+typedef StgInt64 HsInt64;
+typedef StgWord HsWord;
+typedef StgWord8 HsWord8;
+typedef StgWord16 HsWord16;
+typedef StgWord32 HsWord32;
+typedef StgWord64 HsWord64;
+typedef StgFloat HsFloat;
+typedef StgDouble HsDouble;
+typedef StgInt HsBool;
+typedef void* HsPtr; /* this should better match StgAddr */
+typedef void (*HsFunPtr)(void); /* this should better match StgAddr */
+typedef void* HsStablePtr;
+
+/* this should correspond to the type of StgChar in StgTypes.h */
+#define HS_CHAR_MIN 0
+#define HS_CHAR_MAX 0x10FFFF
+
+/* is it true or not? */
+#define HS_BOOL_FALSE 0
+#define HS_BOOL_TRUE 1
+
+#define HS_BOOL_MIN HS_BOOL_FALSE
+#define HS_BOOL_MAX HS_BOOL_TRUE
+
+
+#define HS_INT_MIN STG_INT_MIN
+#define HS_INT_MAX STG_INT_MAX
+#define HS_WORD_MAX STG_WORD_MAX
+
+#define HS_INT8_MIN STG_INT8_MIN
+#define HS_INT8_MAX STG_INT8_MAX
+#define HS_INT16_MIN STG_INT16_MIN
+#define HS_INT16_MAX STG_INT16_MAX
+#define HS_INT32_MIN STG_INT32_MIN
+#define HS_INT32_MAX STG_INT32_MAX
+#define HS_INT64_MIN STG_INT64_MIN
+#define HS_INT64_MAX STG_INT64_MAX
+#define HS_WORD8_MAX STG_WORD8_MAX
+#define HS_WORD16_MAX STG_WORD16_MAX
+#define HS_WORD32_MAX STG_WORD32_MAX
+#define HS_WORD64_MAX STG_WORD64_MAX
+
+#define HS_FLOAT_RADIX FLT_RADIX
+#define HS_FLOAT_ROUNDS FLT_ROUNDS
+#define HS_FLOAT_EPSILON FLT_EPSILON
+#define HS_FLOAT_DIG FLT_DIG
+#define HS_FLOAT_MANT_DIG FLT_MANT_DIG
+#define HS_FLOAT_MIN FLT_MIN
+#define HS_FLOAT_MIN_EXP FLT_MIN_EXP
+#define HS_FLOAT_MIN_10_EXP FLT_MIN_10_EXP
+#define HS_FLOAT_MAX FLT_MAX
+#define HS_FLOAT_MAX_EXP FLT_MAX_EXP
+#define HS_FLOAT_MAX_10_EXP FLT_MAX_10_EXP
+
+#define HS_DOUBLE_RADIX DBL_RADIX
+#define HS_DOUBLE_ROUNDS DBL_ROUNDS
+#define HS_DOUBLE_EPSILON DBL_EPSILON
+#define HS_DOUBLE_DIG DBL_DIG
+#define HS_DOUBLE_MANT_DIG DBL_MANT_DIG
+#define HS_DOUBLE_MIN DBL_MIN
+#define HS_DOUBLE_MIN_EXP DBL_MIN_EXP
+#define HS_DOUBLE_MIN_10_EXP DBL_MIN_10_EXP
+#define HS_DOUBLE_MAX DBL_MAX
+#define HS_DOUBLE_MAX_EXP DBL_MAX_EXP
+#define HS_DOUBLE_MAX_10_EXP DBL_MAX_10_EXP
+
+extern void hs_init (int *argc, char **argv[]);
+extern void hs_exit (void);
+extern void hs_exit_nowait(void);
+extern void hs_set_argv (int argc, char *argv[]);
+extern void hs_thread_done (void);
+extern void hs_restoreConsoleCP (void);
+
+extern void hs_perform_gc (void);
+
+/* Lock the stable pointer table. The table must be unlocked
+ * again before calling any Haskell functions, even if those
+ * functions do not manipulate stable pointers. The Haskell
+ * garbage collector will not be able to run until this lock
+ * is released! It is also forbidden to call hs_free_fun_ptr
+ * or any stable pointer-related FFI functions other than
+ * hs_free_stable_ptr_unsafe while the table is locked.
+ */
+extern void hs_lock_stable_ptr_table (void);
+
+/* A deprecated synonym. */
+extern void hs_lock_stable_tables (void);
+
+/* Unlock the stable pointer table. */
+extern void hs_unlock_stable_ptr_table (void);
+
+/* A deprecated synonym. */
+extern void hs_unlock_stable_tables (void);
+
+/* Free a stable pointer assuming that the stable pointer
+ * table is already locked.
+ */
+extern void hs_free_stable_ptr_unsafe (HsStablePtr sp);
+
+extern void hs_free_stable_ptr (HsStablePtr sp);
+extern void hs_free_fun_ptr (HsFunPtr fp);
+
+extern StgPtr hs_spt_lookup(StgWord64 key[2]);
+extern int hs_spt_keys(StgPtr keys[], int szKeys);
+extern int hs_spt_key_count (void);
+
+extern void hs_try_putmvar (int capability, HsStablePtr sp);
+
+/* -------------------------------------------------------------------------- */
+
+
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/rts/include/MachDeps.h b/rts/include/MachDeps.h
new file mode 100644
index 0000000000..98a90814d9
--- /dev/null
+++ b/rts/include/MachDeps.h
@@ -0,0 +1,119 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow 2002
+ *
+ * Definitions that characterise machine specific properties of basic
+ * types (C & Haskell) of a target platform.
+ *
+ * NB: Keep in sync with HsFFI.h and StgTypes.h.
+ * NB: THIS FILE IS INCLUDED IN HASKELL SOURCE!
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* Don't allow stage1 (cross-)compiler embed assumptions about target
+ * platform. When ghc-stage1 is being built by ghc-stage0 is should not
+ * refer to target defines. A few past examples:
+ * - https://gitlab.haskell.org/ghc/ghc/issues/13491
+ * - https://phabricator.haskell.org/D3122
+ * - https://phabricator.haskell.org/D3405
+ *
+ * In those cases code change assumed target defines like SIZEOF_HSINT
+ * are applied to host platform, not target platform.
+ *
+ * So what should be used instead in GHC_STAGE=1?
+ *
+ * To get host's equivalent of SIZEOF_HSINT you can use Bits instances:
+ * Data.Bits.finiteBitSize (0 :: Int)
+ *
+ * To get target's values it is preferred to use runtime target
+ * configuration from 'targetPlatform :: DynFlags -> Platform'
+ * record.
+ *
+ * Hence we hide these macros from GHC_STAGE=1
+ */
+
+/* Sizes of C types come from here... */
+#include "ghcautoconf.h"
+
+/* Sizes of Haskell types follow. These sizes correspond to:
+ * - the number of bytes in the primitive type (eg. Int#)
+ * - the number of bytes in the external representation (eg. HsInt)
+ * - the scale offset used by writeFooOffAddr#
+ *
+ * In the heap, the type may take up more space: eg. SIZEOF_INT8 == 1,
+ * but it takes up SIZEOF_HSWORD (4 or 8) bytes in the heap.
+ */
+
+#define SIZEOF_HSCHAR SIZEOF_WORD32
+#define ALIGNMENT_HSCHAR ALIGNMENT_WORD32
+
+#define SIZEOF_HSINT SIZEOF_VOID_P
+#define ALIGNMENT_HSINT ALIGNMENT_VOID_P
+
+#define SIZEOF_HSWORD SIZEOF_VOID_P
+#define ALIGNMENT_HSWORD ALIGNMENT_VOID_P
+
+#define SIZEOF_HSDOUBLE SIZEOF_DOUBLE
+#define ALIGNMENT_HSDOUBLE ALIGNMENT_DOUBLE
+
+#define SIZEOF_HSFLOAT SIZEOF_FLOAT
+#define ALIGNMENT_HSFLOAT ALIGNMENT_FLOAT
+
+#define SIZEOF_HSPTR SIZEOF_VOID_P
+#define ALIGNMENT_HSPTR ALIGNMENT_VOID_P
+
+#define SIZEOF_HSFUNPTR SIZEOF_VOID_P
+#define ALIGNMENT_HSFUNPTR ALIGNMENT_VOID_P
+
+#define SIZEOF_HSSTABLEPTR SIZEOF_VOID_P
+#define ALIGNMENT_HSSTABLEPTR ALIGNMENT_VOID_P
+
+#define SIZEOF_INT8 SIZEOF_INT8_T
+#define ALIGNMENT_INT8 ALIGNMENT_INT8_T
+
+#define SIZEOF_WORD8 SIZEOF_UINT8_T
+#define ALIGNMENT_WORD8 ALIGNMENT_UINT8_T
+
+#define SIZEOF_INT16 SIZEOF_INT16_T
+#define ALIGNMENT_INT16 ALIGNMENT_INT16_T
+
+#define SIZEOF_WORD16 SIZEOF_UINT16_T
+#define ALIGNMENT_WORD16 ALIGNMENT_UINT16_T
+
+#define SIZEOF_INT32 SIZEOF_INT32_T
+#define ALIGNMENT_INT32 ALIGNMENT_INT32_T
+
+#define SIZEOF_WORD32 SIZEOF_UINT32_T
+#define ALIGNMENT_WORD32 ALIGNMENT_UINT32_T
+
+#define SIZEOF_INT64 SIZEOF_INT64_T
+#define ALIGNMENT_INT64 ALIGNMENT_INT64_T
+
+#define SIZEOF_WORD64 SIZEOF_UINT64_T
+#define ALIGNMENT_WORD64 ALIGNMENT_UINT64_T
+
+#if !defined(WORD_SIZE_IN_BITS)
+#if SIZEOF_HSWORD == 4
+#define WORD_SIZE_IN_BITS 32
+#define WORD_SIZE_IN_BITS_FLOAT 32.0
+#else
+#define WORD_SIZE_IN_BITS 64
+#define WORD_SIZE_IN_BITS_FLOAT 64.0
+#endif
+#endif
+
+#if !defined(TAG_BITS)
+#if SIZEOF_HSWORD == 4
+#define TAG_BITS 2
+#else
+#define TAG_BITS 3
+#endif
+#endif
+
+#define TAG_MASK ((1 << TAG_BITS) - 1)
+
diff --git a/rts/include/Makefile b/rts/include/Makefile
new file mode 100644
index 0000000000..d0dc985cbe
--- /dev/null
+++ b/rts/include/Makefile
@@ -0,0 +1,15 @@
+# -----------------------------------------------------------------------------
+#
+# (c) 2009 The University of Glasgow
+#
+# This file is part of the GHC build system.
+#
+# To understand how the build system works and how to modify it, see
+# https://gitlab.haskell.org/ghc/ghc/wikis/building/architecture
+# https://gitlab.haskell.org/ghc/ghc/wikis/building/modifying
+#
+# -----------------------------------------------------------------------------
+
+dir = rts/include
+TOP = ../..
+include $(TOP)/mk/sub-makefile.mk
diff --git a/rts/include/Rts.h b/rts/include/Rts.h
new file mode 100644
index 0000000000..e3471cf333
--- /dev/null
+++ b/rts/include/Rts.h
@@ -0,0 +1,372 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * RTS external APIs. This file declares everything that the GHC RTS
+ * exposes externally.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* get types from GHC's runtime system */
+#include "ghcconfig.h"
+/* We have to include Types.h before everything else as this defines some
+ macros that will change the behaviour of system headers. */
+#include "stg/Types.h"
+
+/* We include windows.h very early, as on Win64 the CONTEXT type has
+ fields "R8", "R9" and "R10", which goes bad if we've already
+ #define'd those names for our own purposes (in stg/Regs.h) */
+#if defined(HAVE_WINDOWS_H)
+#include <windows.h>
+#endif
+
+#if !defined(IN_STG_CODE)
+#define IN_STG_CODE 0
+#endif
+#include "Stg.h"
+
+#include "HsFFI.h"
+#include "RtsAPI.h"
+
+// Disencourage gcc from inlining when debugging - it obfuscates things
+#if defined(DEBUG)
+# undef STATIC_INLINE
+# define STATIC_INLINE static
+#endif
+
+// Fine grained inlining control helpers.
+#define ATTR_ALWAYS_INLINE __attribute__((always_inline))
+#define ATTR_NOINLINE __attribute__((noinline))
+
+
+#include "rts/Types.h"
+#include "rts/Time.h"
+
+#if __GNUC__ >= 3
+#define ATTRIBUTE_ALIGNED(n) __attribute__((aligned(n)))
+#else
+#define ATTRIBUTE_ALIGNED(n) /*nothing*/
+#endif
+
+// Symbols that are extern, but private to the RTS, are declared
+// with visibility "hidden" to hide them outside the RTS shared
+// library.
+#if defined(HAS_VISIBILITY_HIDDEN)
+#define RTS_PRIVATE GNUC3_ATTRIBUTE(visibility("hidden"))
+#else
+#define RTS_PRIVATE /* disabled: RTS_PRIVATE */
+#endif
+
+#if __GNUC__ >= 4
+#define RTS_UNLIKELY(p) __builtin_expect((p),0)
+#else
+#define RTS_UNLIKELY(p) (p)
+#endif
+
+#if __GNUC__ >= 4
+#define RTS_LIKELY(p) __builtin_expect(!!(p), 1)
+#else
+#define RTS_LIKELY(p) (p)
+#endif
+
+/* __builtin_unreachable is supported since GNU C 4.5 */
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
+#define RTS_UNREACHABLE __builtin_unreachable()
+#else
+#define RTS_UNREACHABLE abort()
+#endif
+
+/* Prefetch primitives */
+#define prefetchForRead(ptr) __builtin_prefetch(ptr, 0)
+#define prefetchForWrite(ptr) __builtin_prefetch(ptr, 1)
+
+/* Fix for mingw stat problem (done here so it's early enough) */
+#if defined(mingw32_HOST_OS)
+#define __MSVCRT__ 1
+#endif
+
+/* Needed to get the macro version of errno on some OSs, and also to
+ get prototypes for the _r versions of C library functions. */
+#if !defined(_REENTRANT)
+#define _REENTRANT 1
+#endif
+
+/*
+ * We often want to know the size of something in units of an
+ * StgWord... (rounded up, of course!)
+ */
+#define ROUNDUP_BYTES_TO_WDS(n) (((n) + sizeof(W_) - 1) / sizeof(W_))
+
+#define sizeofW(t) ROUNDUP_BYTES_TO_WDS(sizeof(t))
+
+/* -----------------------------------------------------------------------------
+ Assertions and Debuggery
+
+ CHECK(p) evaluates p and terminates with an error if p is false
+ ASSERT(p) like CHECK(p) a no-op, unless ASSERTS_ENABLED is on. Either
+ because we're building in the DEBUG way or USE_ASSERTS_ALL_WAYS
+ (aka --enable-asserts-all-ways) was enabled at ./configure time.
+ -------------------------------------------------------------------------- */
+
+void _assertFail(const char *filename, unsigned int linenum)
+ GNUC3_ATTRIBUTE(__noreturn__);
+
+#define CHECK(predicate) \
+ if (RTS_LIKELY(predicate)) \
+ /*null*/; \
+ else \
+ _assertFail(__FILE__, __LINE__)
+
+#define CHECKM(predicate, msg, ...) \
+ if (RTS_LIKELY(predicate)) \
+ /*null*/; \
+ else \
+ barf(msg, ##__VA_ARGS__)
+
+#if defined(DEBUG) || defined(USE_ASSERTS_ALL_WAYS)
+#define ASSERTS_ENABLED 1
+#else
+#undef ASSERTS_ENABLED
+#endif
+
+#if defined(ASSERTS_ENABLED)
+#define ASSERT(predicate) \
+ do { CHECK(predicate); } while(0)
+#define ASSERTM(predicate,msg,...) \
+ do { CHECKM(predicate, msg, ##__VA_ARGS__); } while(0)
+#else
+#define ASSERT(predicate) \
+ do { (void) sizeof(predicate); } while(0)
+#define ASSERTM(predicate,msg,...) \
+ do { (void) sizeof(predicate); (void) sizeof(msg); } while(0)
+#endif /* DEBUG */
+
+/*
+ * Use this on the RHS of macros which expand to nothing
+ * to make sure that the macro can be used in a context which
+ * demands a non-empty statement.
+ */
+
+#define doNothing() do { } while (0)
+
+#if defined(DEBUG)
+#define USED_IF_DEBUG
+#define USED_IF_NOT_DEBUG STG_UNUSED
+#else
+#define USED_IF_DEBUG STG_UNUSED
+#define USED_IF_NOT_DEBUG
+#endif
+
+#if defined(THREADED_RTS)
+#define USED_IF_THREADS
+#define USED_IF_NOT_THREADS STG_UNUSED
+#else
+#define USED_IF_THREADS STG_UNUSED
+#define USED_IF_NOT_THREADS
+#endif
+
+#if defined(PROFILING)
+#define USED_IF_PROFILING
+#define USED_IF_NOT_PROFILING STG_UNUSED
+#else
+#define USED_IF_PROFILING STG_UNUSED
+#define USED_IF_NOT_PROFILING
+#endif
+
+#define FMT_SizeT "zu"
+#define FMT_HexSizeT "zx"
+
+/* -----------------------------------------------------------------------------
+ Include everything STG-ish
+ -------------------------------------------------------------------------- */
+
+/* System headers: stdlib.h is needed so that we can use NULL. It must
+ * come after MachRegs.h, because stdlib.h might define some inline
+ * functions which may only be defined after register variables have
+ * been declared.
+ */
+#include <stdlib.h>
+
+#include "rts/Config.h"
+
+/* Global constraints */
+#include "rts/Constants.h"
+
+/* Runtime flags */
+#include "rts/Flags.h"
+
+/* Profiling information */
+#include "rts/prof/CCS.h"
+#include "rts/prof/Heap.h"
+#include "rts/prof/LDV.h"
+
+/* Parallel information */
+#include "rts/OSThreads.h"
+#include "rts/TSANUtils.h"
+#include "rts/SpinLock.h"
+
+#include "rts/Messages.h"
+#include "rts/Threads.h"
+
+/* Storage format definitions */
+#include "rts/storage/FunTypes.h"
+#include "rts/storage/InfoTables.h"
+#include "rts/storage/Closures.h"
+#include "rts/storage/Heap.h"
+#include "rts/storage/ClosureTypes.h"
+#include "rts/storage/TSO.h"
+#include "stg/MiscClosures.h" /* InfoTables, closures etc. defined in the RTS */
+#include "rts/storage/Block.h"
+#include "rts/storage/ClosureMacros.h"
+#include "rts/storage/MBlock.h"
+#include "rts/storage/GC.h"
+#include "rts/NonMoving.h"
+
+/* Foreign exports */
+#include "rts/ForeignExports.h"
+
+/* Other RTS external APIs */
+#include "rts/ExecPage.h"
+#include "rts/Parallel.h"
+#include "rts/Signals.h"
+#include "rts/BlockSignals.h"
+#include "rts/Hpc.h"
+#include "rts/Adjustor.h"
+#include "rts/FileLock.h"
+#include "rts/GetTime.h"
+#include "rts/Globals.h"
+#include "rts/IOInterface.h"
+#include "rts/Linker.h"
+#include "rts/Ticky.h"
+#include "rts/Timer.h"
+#include "rts/StablePtr.h"
+#include "rts/StableName.h"
+#include "rts/TTY.h"
+#include "rts/Utils.h"
+#include "rts/PrimFloat.h"
+#include "rts/Main.h"
+#include "rts/Profiling.h"
+#include "rts/IPE.h"
+#include "rts/StaticPtrTable.h"
+#include "rts/Libdw.h"
+#include "rts/LibdwPool.h"
+
+/* Misc stuff without a home */
+DLL_IMPORT_RTS extern char **prog_argv; /* so we can get at these from Haskell */
+DLL_IMPORT_RTS extern int prog_argc;
+DLL_IMPORT_RTS extern char *prog_name;
+
+void reportStackOverflow(StgTSO* tso);
+void reportHeapOverflow(void);
+
+void stg_exit(int n) GNU_ATTRIBUTE(__noreturn__);
+
+#if !defined(mingw32_HOST_OS)
+int stg_sig_install (int, int, void *);
+#endif
+
+/* -----------------------------------------------------------------------------
+ Ways
+ -------------------------------------------------------------------------- */
+
+// Returns non-zero if the RTS is a profiling version
+int rts_isProfiled(void);
+
+// Returns non-zero if the RTS is a dynamically-linked version
+int rts_isDynamic(void);
+
+// Returns non-zero if the RTS is a threaded version
+int rts_isThreaded(void);
+
+// Returns non-zero if the RTS is a debugged version
+int rts_isDebugged(void);
+
+// Returns non-zero if the RTS is a tracing version (event log)
+int rts_isTracing(void);
+
+/* -----------------------------------------------------------------------------
+ RTS Exit codes
+ -------------------------------------------------------------------------- */
+
+/* 255 is allegedly used by dynamic linkers to report linking failure */
+#define EXIT_INTERNAL_ERROR 254
+#define EXIT_DEADLOCK 253
+#define EXIT_INTERRUPTED 252
+#define EXIT_HEAPOVERFLOW 251
+#define EXIT_KILLED 250
+
+/* -----------------------------------------------------------------------------
+ Miscellaneous garbage
+ -------------------------------------------------------------------------- */
+
+#if defined(DEBUG)
+#define TICK_VAR(arity) \
+ extern StgInt SLOW_CALLS_##arity; \
+ extern StgInt RIGHT_ARITY_##arity; \
+ extern StgInt TAGGED_PTR_##arity;
+
+extern StgInt TOTAL_CALLS;
+
+TICK_VAR(1)
+TICK_VAR(2)
+#endif
+
+/* -----------------------------------------------------------------------------
+ Assertions and Debuggery
+ -------------------------------------------------------------------------- */
+
+#define IF_RTSFLAGS(c,s) if (RtsFlags.c) { s; } doNothing()
+
+#if defined(DEBUG)
+/* See Note [RtsFlags is a pointer in STG code] */
+#if IN_STG_CODE
+#define IF_DEBUG(c,s) if (RtsFlags[0].DebugFlags.c) { s; } doNothing()
+#else
+#define IF_DEBUG(c,s) if (RtsFlags.DebugFlags.c) { s; } doNothing()
+#endif /* IN_STG_CODE */
+#else
+#define IF_DEBUG(c,s) doNothing()
+#endif /* DEBUG */
+
+#if defined(DEBUG)
+#define DEBUG_ONLY(s) s
+#else
+#define DEBUG_ONLY(s) doNothing()
+#endif /* DEBUG */
+
+#if defined(DEBUG)
+#define DEBUG_IS_ON 1
+#else
+#define DEBUG_IS_ON 0
+#endif /* DEBUG */
+
+/* -----------------------------------------------------------------------------
+ Useful macros and inline functions
+ -------------------------------------------------------------------------- */
+
+#if defined(__GNUC__)
+#define SUPPORTS_TYPEOF
+#endif
+
+#if defined(SUPPORTS_TYPEOF)
+#define stg_min(a,b) ({typeof(a) _a = (a), _b = (b); _a <= _b ? _a : _b; })
+#define stg_max(a,b) ({typeof(a) _a = (a), _b = (b); _a <= _b ? _b : _a; })
+#else
+#define stg_min(a,b) ((a) <= (b) ? (a) : (b))
+#define stg_max(a,b) ((a) <= (b) ? (b) : (a))
+#endif
+
+/* -------------------------------------------------------------------------- */
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/rts/include/RtsAPI.h b/rts/include/RtsAPI.h
new file mode 100644
index 0000000000..e2d1845819
--- /dev/null
+++ b/rts/include/RtsAPI.h
@@ -0,0 +1,607 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * API for invoking Haskell functions via the RTS
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * --------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include "HsFFI.h"
+#include "rts/Time.h"
+#include "rts/Types.h"
+
+/*
+ * Running the scheduler
+ */
+typedef enum {
+ NoStatus, /* not finished yet */
+ Success, /* completed successfully */
+ Killed, /* uncaught exception */
+ Interrupted, /* stopped in response to a call to interruptStgRts */
+ HeapExhausted /* out of memory */
+} SchedulerStatus;
+
+typedef struct StgClosure_ *HaskellObj;
+
+/*
+ * An abstract type representing the token returned by rts_lock() and
+ * used when allocating objects and threads in the RTS.
+ */
+typedef struct Capability_ Capability;
+
+/*
+ * An abstract type representing the token returned by rts_pause().
+ */
+typedef struct PauseToken_ PauseToken;
+
+/*
+ * From a PauseToken, get a Capability token used when allocating objects and
+ * threads in the RTS.
+ */
+Capability *pauseTokenCapability(PauseToken *pauseToken);
+
+/*
+ * The public view of a Capability: we can be sure it starts with
+ * these two components (but it may have more private fields).
+ */
+typedef struct CapabilityPublic_ {
+ StgFunTable f;
+ StgRegTable r;
+} CapabilityPublic;
+
+/* N.B. this needs the Capability declaration above. */
+#include "rts/EventLogWriter.h"
+
+/* ----------------------------------------------------------------------------
+ RTS configuration settings, for passing to hs_init_ghc()
+ ------------------------------------------------------------------------- */
+
+typedef enum {
+ RtsOptsNone, // +RTS causes an error
+ RtsOptsIgnore, // Ignore command line arguments
+ RtsOptsIgnoreAll, // Ignore command line and Environment arguments
+ RtsOptsSafeOnly, // safe RTS options allowed; others cause an error
+ RtsOptsAll // all RTS options allowed
+ } RtsOptsEnabledEnum;
+
+struct GCDetails_;
+
+// The RtsConfig struct is passed (by value) to hs_init_ghc(). The
+// reason for using a struct is extensibility: we can add more
+// fields to this later without breaking existing client code.
+typedef struct {
+
+ // Whether to interpret +RTS options on the command line
+ RtsOptsEnabledEnum rts_opts_enabled;
+
+ // Whether to give RTS flag suggestions
+ HsBool rts_opts_suggestions;
+
+ // additional RTS options
+ const char *rts_opts;
+
+ // True if GHC was not passed -no-hs-main
+ HsBool rts_hs_main;
+
+ // Whether to retain CAFs (default: false)
+ HsBool keep_cafs;
+
+ // Writer a for eventlog.
+ const EventLogWriter *eventlog_writer;
+
+ // Called before processing command-line flags, so that default
+ // settings for RtsFlags can be provided.
+ void (* defaultsHook) (void);
+
+ // Called just before exiting
+ void (* onExitHook) (void);
+
+ // Called on a stack overflow, before exiting
+ void (* stackOverflowHook) (W_ stack_size);
+
+ // Called on heap overflow, before exiting
+ void (* outOfHeapHook) (W_ request_size, W_ heap_size);
+
+ // Called when malloc() fails, before exiting
+ void (* mallocFailHook) (W_ request_size /* in bytes */, const char *msg);
+
+ // Called for every GC
+ void (* gcDoneHook) (const struct GCDetails_ *stats);
+
+ // Called when GC sync takes too long (+RTS --long-gc-sync=<time>)
+ void (* longGCSync) (uint32_t this_cap, Time time_ns);
+ void (* longGCSyncEnd) (Time time_ns);
+} RtsConfig;
+
+// Clients should start with defaultRtsConfig and then customise it.
+// Bah, I really wanted this to be a const struct value, but it seems
+// you can't do that in C (it generates code).
+extern const RtsConfig defaultRtsConfig;
+
+/* -----------------------------------------------------------------------------
+ Statistics
+ -------------------------------------------------------------------------- */
+
+//
+// Stats about a single GC
+//
+typedef struct GCDetails_ {
+ // The generation number of this GC
+ uint32_t gen;
+ // Number of threads used in this GC
+ uint32_t threads;
+ // Number of bytes allocated since the previous GC
+ uint64_t allocated_bytes;
+ // Total amount of live data in the heap (incliudes large + compact data).
+ // Updated after every GC. Data in uncollected generations (in minor GCs)
+ // are considered live.
+ uint64_t live_bytes;
+ // Total amount of live data in large objects
+ uint64_t large_objects_bytes;
+ // Total amount of live data in compact regions
+ uint64_t compact_bytes;
+ // Total amount of slop (wasted memory)
+ uint64_t slop_bytes;
+ // Total amount of memory in use by the RTS
+ uint64_t mem_in_use_bytes;
+ // Total amount of data copied during this GC
+ uint64_t copied_bytes;
+ // In parallel GC, the max amount of data copied by any one thread
+ uint64_t par_max_copied_bytes;
+ // In parallel GC, the amount of balanced data copied by all threads
+ uint64_t par_balanced_copied_bytes;
+ // The time elapsed during synchronisation before GC
+ Time sync_elapsed_ns;
+ // The CPU time used during GC itself
+ Time cpu_ns;
+ // The time elapsed during GC itself
+ Time elapsed_ns;
+
+ //
+ // Concurrent garbage collector
+ //
+
+ // The CPU time used during the post-mark pause phase of the concurrent
+ // nonmoving GC.
+ Time nonmoving_gc_sync_cpu_ns;
+ // The time elapsed during the post-mark pause phase of the concurrent
+ // nonmoving GC.
+ Time nonmoving_gc_sync_elapsed_ns;
+ // The CPU time used during the post-mark pause phase of the concurrent
+ // nonmoving GC.
+ Time nonmoving_gc_cpu_ns;
+ // The time elapsed during the post-mark pause phase of the concurrent
+ // nonmoving GC.
+ Time nonmoving_gc_elapsed_ns;
+} GCDetails;
+
+//
+// Stats about the RTS currently, and since the start of execution
+//
+typedef struct _RTSStats {
+
+ // -----------------------------------
+ // Cumulative stats about memory use
+
+ // Total number of GCs
+ uint32_t gcs;
+ // Total number of major (oldest generation) GCs
+ uint32_t major_gcs;
+ // Total bytes allocated
+ uint64_t allocated_bytes;
+ // Maximum live data (including large objects + compact regions) in the
+ // heap. Updated after a major GC.
+ uint64_t max_live_bytes;
+ // Maximum live data in large objects
+ uint64_t max_large_objects_bytes;
+ // Maximum live data in compact regions
+ uint64_t max_compact_bytes;
+ // Maximum slop
+ uint64_t max_slop_bytes;
+ // Maximum memory in use by the RTS
+ uint64_t max_mem_in_use_bytes;
+ // Sum of live bytes across all major GCs. Divided by major_gcs
+ // gives the average live data over the lifetime of the program.
+ uint64_t cumulative_live_bytes;
+ // Sum of copied_bytes across all GCs
+ uint64_t copied_bytes;
+ // Sum of copied_bytes across all parallel GCs
+ uint64_t par_copied_bytes;
+ // Sum of par_max_copied_bytes across all parallel GCs
+ uint64_t cumulative_par_max_copied_bytes;
+ // Sum of par_balanced_copied_byes across all parallel GCs.
+ uint64_t cumulative_par_balanced_copied_bytes;
+
+ // -----------------------------------
+ // Cumulative stats about time use
+ // (we use signed values here because due to inaccuracies in timers
+ // the values can occasionally go slightly negative)
+
+ // Total CPU time used by the init phase
+ Time init_cpu_ns;
+ // Total elapsed time used by the init phase
+ Time init_elapsed_ns;
+ // Total CPU time used by the mutator
+ Time mutator_cpu_ns;
+ // Total elapsed time used by the mutator
+ Time mutator_elapsed_ns;
+ // Total CPU time used by the GC
+ Time gc_cpu_ns;
+ // Total elapsed time used by the GC
+ Time gc_elapsed_ns;
+ // Total CPU time (at the previous GC)
+ Time cpu_ns;
+ // Total elapsed time (at the previous GC)
+ Time elapsed_ns;
+
+ // -----------------------------------
+ // Stats about the most recent GC
+
+ GCDetails gc;
+
+ // -----------------------------------
+ // Internal Counters
+
+ uint64_t any_work;
+ // The number of times a GC thread has iterated it's outer loop across all
+ // parallel GCs
+ uint64_t scav_find_work;
+
+ uint64_t max_n_todo_overflow;
+
+ // ----------------------------------
+ // Concurrent garbage collector
+
+ // The CPU time used during the post-mark pause phase of the concurrent
+ // nonmoving GC.
+ Time nonmoving_gc_sync_cpu_ns;
+ // The time elapsed during the post-mark pause phase of the concurrent
+ // nonmoving GC.
+ Time nonmoving_gc_sync_elapsed_ns;
+ // The maximum time elapsed during the post-mark pause phase of the
+ // concurrent nonmoving GC.
+ Time nonmoving_gc_sync_max_elapsed_ns;
+ // The CPU time used during the post-mark pause phase of the concurrent
+ // nonmoving GC.
+ Time nonmoving_gc_cpu_ns;
+ // The time elapsed during the post-mark pause phase of the concurrent
+ // nonmoving GC.
+ Time nonmoving_gc_elapsed_ns;
+ // The maximum time elapsed during the post-mark pause phase of the
+ // concurrent nonmoving GC.
+ Time nonmoving_gc_max_elapsed_ns;
+} RTSStats;
+
+void getRTSStats (RTSStats *s);
+int getRTSStatsEnabled (void);
+
+// Returns the total number of bytes allocated since the start of the program.
+// TODO: can we remove this?
+uint64_t getAllocations (void);
+
+/* ----------------------------------------------------------------------------
+ Starting up and shutting down the Haskell RTS.
+ ------------------------------------------------------------------------- */
+
+/* DEPRECATED, use hs_init() or hs_init_ghc() instead */
+extern void startupHaskell ( int argc, char *argv[],
+ void (*init_root)(void) );
+
+/* DEPRECATED, use hs_exit() instead */
+extern void shutdownHaskell ( void );
+
+/* Like hs_init(), but allows rtsopts. For more complicated usage,
+ * use hs_init_ghc. */
+extern void hs_init_with_rtsopts (int *argc, char **argv[]);
+
+/*
+ * GHC-specific version of hs_init() that allows specifying whether
+ * +RTS ... -RTS options are allowed or not (default: only "safe"
+ * options are allowed), and allows passing an option string that is
+ * to be interpreted by the RTS only, not passed to the program.
+ */
+extern void hs_init_ghc (int *argc, char **argv[], // program arguments
+ RtsConfig rts_config); // RTS configuration
+
+extern void shutdownHaskellAndExit (int exitCode, int fastExit)
+ GNUC3_ATTRIBUTE(__noreturn__);
+
+#if !defined(mingw32_HOST_OS)
+extern void shutdownHaskellAndSignal (int sig, int fastExit)
+ GNUC3_ATTRIBUTE(__noreturn__);
+#endif
+
+extern void getProgArgv ( int *argc, char **argv[] );
+extern void setProgArgv ( int argc, char *argv[] );
+extern void getFullProgArgv ( int *argc, char **argv[] );
+extern void setFullProgArgv ( int argc, char *argv[] );
+extern void freeFullProgArgv ( void ) ;
+
+/* exit() override */
+extern void (*exitFn)(int);
+
+/* Note [Locking and Pausing the RTS]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You have to surround all access to the RtsAPI with rts_lock/rts_unlock or
+with rts_pause/rts_resume.
+
+
+# rts_lock / rts_unlock
+
+Use `rts_lock` to acquire a token which may be used to call other RtsAPI
+functions and call `rts_unlock` to return the token. When locked, garbage
+collection will not occur. As long as 1 or more capabilities are not locked,
+haskell threads will continue to execute. If you want to pause execution of
+all haskell threads then use rts_pause/rts_resume instead.
+
+The implementation of `rts_lock` acquires a capability for this thread. Hence,
+at most n locks can be held simultaneously, where n is the number of
+capabilities. It is an error to call `rts_lock` when the rts is already
+paused by the current OS thread (see rts_pause/rts_resume below).
+
+
+# rts_pause / rts_resume
+
+Use `rts_pause` to pause execution of all Haskell threads and `rts_resume` to
+resume them. The implementation acquires all capabilities. `rts_resume`
+must be called on the same thread as `rts_pause`. `rts_pause`, much like
+rts_lock, returns a token. A `Capability` can be extracted from that token using
+`pauseTokenCapability()`. The `Capability` can then be used to call other RtsAPI
+functions.
+
+* With the RTS paused, garbage collections will not occur and haskell threads
+ will not execute, allocate, nor mutate their stacks.
+* Non-Haskell (i.e. non-worker) threads such as those running safe FFI calls
+ will NOT be paused and can still mutate pinned mutable data such as pinned
+ `MutableByteArray#`s.
+* You may call `rts_pause` from within a non-worker OS thread.
+* You may call `rts_pause` from within a *safe* FFI call. In this case, make
+ sure to call `rts_resume` within the same FFI call or the RTS will deadlock.
+* Calling `rts_pause` from an *unsafe* FFI call will cause an error.
+* On return, the rts will be fully paused: all haskell threads are stopped
+ and all capabilities are acquired by the current OS thread.
+* Calling `rts_pause` in between rts_lock/rts_unlock on the same thread will
+ cause an error.
+* Calling `rts_pause` results in an error if the RTS is already paused by the
+ current OS thread.
+* Only one OS thread at a time can keep the rts paused.
+* `rts_pause` will block while another thread is pausing the RTS, and
+ continue when the current thread is given exclusive permission to pause the
+ RTS.
+
+## Note on implementation.
+
+Thread safety is achieved almost entirely by the mechanism of acquiring and
+releasing Capabilities, resulting in a sort of mutex / critical section pattern.
+This has the following consequences:
+
+* There are at most `n_capabilities` threads currently in a
+ rts_lock/rts_unlock section.
+* There is at most 1 threads in a rts_pause/rts_resume section. In that case
+ there will be no threads in a rts_lock/rts_unlock section.
+* rts_pause and rts_lock may block in order to enforce the above 2
+ invariants.
+
+*/
+
+// Acquires a token which may be used to create new objects and evaluate them.
+// See Note [Locking and Pausing the RTS] for correct usage.
+Capability *rts_lock (void);
+
+// releases the token acquired with rts_lock().
+// See Note [Locking and Pausing the RTS] for correct usage.
+void rts_unlock (Capability *token);
+
+// If you are in a context where you know you have a current capability but
+// do not know what it is, then use this to get it. Basically this only
+// applies to "unsafe" foreign calls (as unsafe foreign calls are made with
+// the capability held).
+//
+// WARNING: There is *no* guarantee this returns anything sensible (eg NULL)
+// when there is no current capability.
+Capability *rts_unsafeGetMyCapability (void);
+
+/* ----------------------------------------------------------------------------
+ Which cpu should the OS thread and Haskell thread run on?
+
+ 1. Run the current thread on the given capability:
+ rts_setInCallCapability(cap, 0);
+
+ 2. Run the current thread on the given capability and set the cpu affinity
+ for this thread:
+ rts_setInCallCapability(cap, 1);
+
+ 3. Run the current thread on the given numa node:
+ rts_pinThreadToNumaNode(node);
+
+ 4. Run the current thread on the given capability and on the given numa node:
+ rts_setInCallCapability(cap, 0);
+ rts_pinThreadToNumaNode(cap);
+ ------------------------------------------------------------------------- */
+
+// Specify the Capability that the current OS thread should run on when it calls
+// into Haskell. The actual capability will be calculated as the supplied
+// value modulo the number of enabled Capabilities.
+//
+// If affinity is non-zero, the current thread will be bound to
+// specific CPUs according to the prevailing affinity policy for the
+// specified capability, set by either +RTS -qa or +RTS --numa.
+void rts_setInCallCapability (int preferred_capability, int affinity);
+
+// Specify the CPU Node that the current OS thread should run on when it calls
+// into Haskell. The argument can be either a node number or capability number.
+// The actual node will be calculated as the supplied value modulo the number
+// of numa nodes.
+void rts_pinThreadToNumaNode (int node);
+
+/* ----------------------------------------------------------------------------
+ Building Haskell objects from C datatypes.
+ ------------------------------------------------------------------------- */
+HaskellObj rts_mkChar ( Capability *, HsChar c );
+HaskellObj rts_mkInt ( Capability *, HsInt i );
+HaskellObj rts_mkInt8 ( Capability *, HsInt8 i );
+HaskellObj rts_mkInt16 ( Capability *, HsInt16 i );
+HaskellObj rts_mkInt32 ( Capability *, HsInt32 i );
+HaskellObj rts_mkInt64 ( Capability *, HsInt64 i );
+HaskellObj rts_mkWord ( Capability *, HsWord w );
+HaskellObj rts_mkWord8 ( Capability *, HsWord8 w );
+HaskellObj rts_mkWord16 ( Capability *, HsWord16 w );
+HaskellObj rts_mkWord32 ( Capability *, HsWord32 w );
+HaskellObj rts_mkWord64 ( Capability *, HsWord64 w );
+HaskellObj rts_mkPtr ( Capability *, HsPtr a );
+HaskellObj rts_mkFunPtr ( Capability *, HsFunPtr a );
+HaskellObj rts_mkFloat ( Capability *, HsFloat f );
+HaskellObj rts_mkDouble ( Capability *, HsDouble f );
+HaskellObj rts_mkStablePtr ( Capability *, HsStablePtr s );
+HaskellObj rts_mkBool ( Capability *, HsBool b );
+HaskellObj rts_mkString ( Capability *, char *s );
+
+HaskellObj rts_apply ( Capability *, HaskellObj, HaskellObj );
+
+/* ----------------------------------------------------------------------------
+ Deconstructing Haskell objects
+ ------------------------------------------------------------------------- */
+HsChar rts_getChar ( HaskellObj );
+HsInt rts_getInt ( HaskellObj );
+HsInt8 rts_getInt8 ( HaskellObj );
+HsInt16 rts_getInt16 ( HaskellObj );
+HsInt32 rts_getInt32 ( HaskellObj );
+HsInt64 rts_getInt64 ( HaskellObj );
+HsWord rts_getWord ( HaskellObj );
+HsWord8 rts_getWord8 ( HaskellObj );
+HsWord16 rts_getWord16 ( HaskellObj );
+HsWord32 rts_getWord32 ( HaskellObj );
+HsWord64 rts_getWord64 ( HaskellObj );
+HsPtr rts_getPtr ( HaskellObj );
+HsFunPtr rts_getFunPtr ( HaskellObj );
+HsFloat rts_getFloat ( HaskellObj );
+HsDouble rts_getDouble ( HaskellObj );
+HsStablePtr rts_getStablePtr ( HaskellObj );
+HsBool rts_getBool ( HaskellObj );
+
+/* ----------------------------------------------------------------------------
+ Evaluating Haskell expressions
+
+ The versions ending in '_' allow you to specify an initial stack size.
+ Note that these calls may cause Garbage Collection, so all HaskellObj
+ references are rendered invalid by these calls.
+
+ All of these functions take a (Capability **) - there is a
+ Capability pointer both input and output. We use an inout
+ parameter because this is less error-prone for the client than a
+ return value - the client could easily forget to use the return
+ value, whereas incorrectly using an inout parameter will usually
+ result in a type error.
+ ------------------------------------------------------------------------- */
+
+void rts_eval (/* inout */ Capability **,
+ /* in */ HaskellObj p,
+ /* out */ HaskellObj *ret);
+
+void rts_eval_ (/* inout */ Capability **,
+ /* in */ HaskellObj p,
+ /* in */ unsigned int stack_size,
+ /* out */ HaskellObj *ret);
+
+void rts_evalIO (/* inout */ Capability **,
+ /* in */ HaskellObj p,
+ /* out */ HaskellObj *ret);
+
+void rts_evalStableIOMain (/* inout */ Capability **,
+ /* in */ HsStablePtr s,
+ /* out */ HsStablePtr *ret);
+
+void rts_evalStableIO (/* inout */ Capability **,
+ /* in */ HsStablePtr s,
+ /* out */ HsStablePtr *ret);
+
+void rts_evalLazyIO (/* inout */ Capability **,
+ /* in */ HaskellObj p,
+ /* out */ HaskellObj *ret);
+
+void rts_evalLazyIO_ (/* inout */ Capability **,
+ /* in */ HaskellObj p,
+ /* in */ unsigned int stack_size,
+ /* out */ HaskellObj *ret);
+
+void rts_inCall (/* inout */ Capability **,
+ /* in */ HaskellObj p,
+ /* out */ HaskellObj *ret);
+
+void rts_checkSchedStatus (char* site, Capability *);
+
+SchedulerStatus rts_getSchedStatus (Capability *cap);
+
+// Halt execution of all Haskell threads.
+// See Note [Locking and Pausing the RTS] for correct usage.
+PauseToken *rts_pause (void);
+
+// Counterpart of rts_pause: Continue from a pause.
+// See Note [Locking and Pausing the RTS] for correct usage.
+// [in] pauseToken: the token returned by rts_pause.
+void rts_resume (PauseToken *pauseToken);
+
+// Returns true if the rts is paused. See rts_pause() and rts_resume().
+bool rts_isPaused(void);
+
+// List all live threads. The RTS must be paused and this must be called on the
+// same thread that called rts_pause().
+typedef void (*ListThreadsCb)(void *user, StgTSO *);
+void rts_listThreads(ListThreadsCb cb, void *user);
+
+// List all non-thread GC roots. The RTS must be paused and this must be called
+// on the same thread that called rts_pause().
+typedef void (*ListRootsCb)(void *user, StgClosure *);
+void rts_listMiscRoots(ListRootsCb cb, void *user);
+
+/*
+ * The RTS allocates some thread-local data when you make a call into
+ * Haskell using one of the rts_eval() functions. This data is not
+ * normally freed until hs_exit(). If you want to free it earlier
+ * than this, perhaps because the thread is about to exit, then call
+ * rts_done() from the thread.
+ *
+ * It is safe to make more rts_eval() calls after calling rts_done(),
+ * but the next one will cause allocation of the thread-local memory
+ * again.
+ */
+void rts_done (void);
+
+/* --------------------------------------------------------------------------
+ Wrapper closures
+
+ These are used by foreign export and foreign import "wrapper" stubs.
+ ----------------------------------------------------------------------- */
+
+// When producing Windows DLLs the we need to know which symbols are in the
+// local package/DLL vs external ones.
+//
+// Note that RtsAPI.h is also included by foreign export stubs in
+// the base package itself.
+//
+#if defined(COMPILING_WINDOWS_DLL) && !defined(COMPILING_BASE_PACKAGE)
+__declspec(dllimport) extern StgWord base_GHCziTopHandler_runIO_closure[];
+__declspec(dllimport) extern StgWord base_GHCziTopHandler_runNonIO_closure[];
+#else
+extern StgWord base_GHCziTopHandler_runIO_closure[];
+extern StgWord base_GHCziTopHandler_runNonIO_closure[];
+#endif
+
+#define runIO_closure base_GHCziTopHandler_runIO_closure
+#define runNonIO_closure base_GHCziTopHandler_runNonIO_closure
+
+/* ------------------------------------------------------------------------ */
+
+#if defined(__cplusplus)
+}
+#endif
diff --git a/rts/include/Stg.h b/rts/include/Stg.h
new file mode 100644
index 0000000000..46f71c0241
--- /dev/null
+++ b/rts/include/Stg.h
@@ -0,0 +1,600 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Top-level include file for everything required when compiling .hc
+ * code. NOTE: in .hc files, Stg.h must be included *before* any
+ * other headers, because we define some register variables which must
+ * be done before any inline functions are defined (some system
+ * headers have been known to define the odd inline function).
+ *
+ * We generally try to keep as little visible as possible when
+ * compiling .hc files. So for example the definitions of the
+ * InfoTable structs, closure structs and other RTS types are not
+ * visible here. The compiler knows enough about the representations
+ * of these types to generate code which manipulates them directly
+ * with pointer arithmetic.
+ *
+ * In ordinary C code, do not #include this file directly: #include
+ * "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if !(__STDC_VERSION__ >= 199901L) && !(__cplusplus >= 201103L)
+# error __STDC_VERSION__ does not advertise C99, C++11 or later
+#endif
+
+/*
+ * If we are compiling a .hc file, then we want all the register
+ * variables. This is the what happens if you #include "Stg.h" first:
+ * we assume this is a .hc file, and set IN_STG_CODE==1, which later
+ * causes the register variables to be enabled in stg/Regs.h.
+ *
+ * If instead "Rts.h" is included first, then we are compiling a
+ * vanilla C file. Everything from Stg.h is provided, except that
+ * IN_STG_CODE is not defined, and the register variables will not be
+ * active.
+ */
+#if !defined(IN_STG_CODE)
+# define IN_STG_CODE 1
+
+// Turn on C99 for .hc code. This gives us the INFINITY and NAN
+// constants from math.h, which we occasionally need to use in .hc (#1861)
+# define _ISOC99_SOURCE
+
+// We need _BSD_SOURCE so that math.h defines things like gamma
+// on Linux
+# define _BSD_SOURCE
+
+// On AIX we need _BSD defined, otherwise <math.h> includes <stdlib.h>
+# if defined(_AIX)
+# define _BSD 1
+# endif
+
+// '_BSD_SOURCE' is deprecated since glibc-2.20
+// in favour of '_DEFAULT_SOURCE'
+# define _DEFAULT_SOURCE
+#endif
+
+#if IN_STG_CODE == 0 || defined(CC_LLVM_BACKEND)
+// C compilers that use an LLVM back end (clang or llvm-gcc) do not
+// correctly support global register variables so we make sure that
+// we do not declare them for these compilers.
+# define NO_GLOBAL_REG_DECLS /* don't define fixed registers */
+#endif
+
+/* Configuration */
+#include "ghcconfig.h"
+
+/* The code generator calls the math functions directly in .hc code.
+ NB. after configuration stuff above, because this sets #defines
+ that depend on config info, such as __USE_FILE_OFFSET64 */
+#include <math.h>
+
+// On Solaris, we don't get the INFINITY and NAN constants unless we
+// #define _STDC_C99, and we can't do that unless we also use -std=c99,
+// because _STDC_C99 causes the headers to use C99 syntax (e.g. restrict).
+// We aren't ready for -std=c99 yet, so define INFINITY/NAN by hand using
+// the gcc builtins.
+#if !defined(INFINITY)
+#if defined(__GNUC__)
+#define INFINITY __builtin_inf()
+#else
+#error No definition for INFINITY
+#endif
+#endif
+
+#if !defined(NAN)
+#if defined(__GNUC__)
+#define NAN __builtin_nan("")
+#else
+#error No definition for NAN
+#endif
+#endif
+
+/* -----------------------------------------------------------------------------
+ Useful definitions
+ -------------------------------------------------------------------------- */
+
+/*
+ * The C backend likes to refer to labels by just mentioning their
+ * names. However, when a symbol is declared as a variable in C, the
+ * C compiler will implicitly dereference it when it occurs in source.
+ * So we must subvert this behaviour for .hc files by declaring
+ * variables as arrays, which eliminates the implicit dereference.
+ */
+#if IN_STG_CODE
+#define RTS_VAR(x) (x)[]
+#define RTS_DEREF(x) (*(x))
+#else
+#define RTS_VAR(x) x
+#define RTS_DEREF(x) x
+#endif
+
+/* bit macros
+ */
+#define BITS_PER_BYTE 8
+#define BITS_IN(x) (BITS_PER_BYTE * sizeof(x))
+
+/* Compute offsets of struct fields
+ */
+#define STG_FIELD_OFFSET(s_type, field) ((StgWord)&(((s_type*)0)->field))
+
+/*
+ * 'Portable' inlining:
+ * INLINE_HEADER is for inline functions in header files (macros)
+ * STATIC_INLINE is for inline functions in source files
+ * EXTERN_INLINE is for functions that we want to inline sometimes
+ * (we also compile a static version of the function; see Inlines.c)
+ */
+
+// We generally assume C99 semantics albeit these two definitions work fine even
+// when gnu90 semantics are active (i.e. when __GNUC_GNU_INLINE__ is defined or
+// when a GCC older than 4.2 is used)
+//
+// The problem, however, is with 'extern inline' whose semantics significantly
+// differs between gnu90 and C99
+#define INLINE_HEADER static inline
+#define STATIC_INLINE static inline
+
+// Figure out whether `__attributes__((gnu_inline))` is needed
+// to force gnu90-style 'external inline' semantics.
+#if defined(FORCE_GNU_INLINE)
+// disable auto-detection since HAVE_GNU_INLINE has been defined externally
+#elif defined(__GNUC_GNU_INLINE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2
+// GCC 4.2.x didn't properly support C99 inline semantics (GCC 4.3 was the first
+// release to properly support C99 inline semantics), and therefore warned when
+// using 'extern inline' while in C99 mode unless `__attributes__((gnu_inline))`
+// was explicitly set.
+# define FORCE_GNU_INLINE 1
+#endif
+
+#if defined(FORCE_GNU_INLINE)
+// Force compiler into gnu90 semantics
+# if defined(KEEP_INLINES)
+# define EXTERN_INLINE inline __attribute__((gnu_inline))
+# else
+# define EXTERN_INLINE extern inline __attribute__((gnu_inline))
+# endif
+#elif defined(__GNUC_GNU_INLINE__)
+// we're currently in gnu90 inline mode by default and
+// __attribute__((gnu_inline)) may not be supported, so better leave it off
+# if defined(KEEP_INLINES)
+# define EXTERN_INLINE inline
+# else
+# define EXTERN_INLINE extern inline
+# endif
+#else
+// Assume C99 semantics (yes, this curiously results in swapped definitions!)
+// This is the preferred branch, and at some point we may drop support for
+// compilers not supporting C99 semantics altogether.
+# if defined(KEEP_INLINES)
+# define EXTERN_INLINE extern inline
+# else
+# define EXTERN_INLINE inline
+# endif
+#endif
+
+
+/*
+ * GCC attributes
+ */
+#if defined(__GNUC__)
+#define GNU_ATTRIBUTE(at) __attribute__((at))
+#else
+#define GNU_ATTRIBUTE(at)
+#endif
+
+#if __GNUC__ >= 3
+#define GNUC3_ATTRIBUTE(at) __attribute__((at))
+#else
+#define GNUC3_ATTRIBUTE(at)
+#endif
+
+/* Used to mark a switch case that falls-through */
+#if (defined(__GNUC__) && __GNUC__ >= 7)
+// N.B. Don't enable fallthrough annotations when compiling with Clang.
+// Apparently clang doesn't enable implicitly fallthrough warnings by default
+// http://llvm.org/viewvc/llvm-project?revision=167655&view=revision
+// when compiling C and the attribute cause warnings of their own (#16019).
+#define FALLTHROUGH GNU_ATTRIBUTE(fallthrough)
+#else
+#define FALLTHROUGH ((void)0)
+#endif /* __GNUC__ >= 7 */
+
+#if !defined(DEBUG) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+#define GNUC_ATTR_HOT __attribute__((hot))
+#else
+#define GNUC_ATTR_HOT /* nothing */
+#endif
+
+#define STG_UNUSED GNUC3_ATTRIBUTE(__unused__)
+
+/* Prevent functions from being optimized.
+ See Note [Windows Stack allocations] */
+#if defined(__clang__)
+#define STG_NO_OPTIMIZE __attribute__((optnone))
+#elif defined(__GNUC__) || defined(__GNUG__)
+#define STG_NO_OPTIMIZE __attribute__((optimize("O0")))
+#else
+#define STG_NO_OPTIMIZE /* nothing */
+#endif
+
+/* -----------------------------------------------------------------------------
+ Global type definitions
+ -------------------------------------------------------------------------- */
+
+#include "MachDeps.h"
+#include "stg/Types.h"
+
+/* -----------------------------------------------------------------------------
+ Shorthand forms
+ -------------------------------------------------------------------------- */
+
+typedef StgChar C_;
+typedef StgWord W_;
+typedef StgWord* P_;
+typedef StgInt I_;
+typedef StgWord StgWordArray[];
+typedef StgFunPtr F_;
+
+/* byte arrays (and strings): */
+#define EB_(X) extern const char X[]
+#define IB_(X) static const char X[]
+/* static (non-heap) closures (requires alignment for pointer tagging): */
+#define EC_(X) extern StgWordArray (X) GNU_ATTRIBUTE(aligned (8))
+#define IC_(X) static StgWordArray (X) GNU_ATTRIBUTE(aligned (8))
+/* writable data (does not require alignment): */
+#define ERW_(X) extern StgWordArray (X)
+#define IRW_(X) static StgWordArray (X)
+/* read-only data (does not require alignment): */
+#define ERO_(X) extern const StgWordArray (X)
+#define IRO_(X) static const StgWordArray (X)
+/* stg-native functions: */
+#define IF_(f) static StgFunPtr GNUC3_ATTRIBUTE(used) f(void)
+#define FN_(f) StgFunPtr f(void)
+#define EF_(f) StgFunPtr f(void) /* External Cmm functions */
+/* foreign functions: */
+#define EFF_(f) void f() /* See Note [External function prototypes] */
+
+/* Note [External function prototypes] See #8965, #11395
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In generated C code we need to distinct between two types
+of external symbols:
+1. Cmm functions declared by 'EF_' macro (External Functions)
+2. C functions declared by 'EFF_' macro (External Foreign Functions)
+
+Cmm functions are simple as they are internal to GHC.
+
+C functions are trickier:
+
+The external-function macro EFF_(F) used to be defined as
+ extern StgFunPtr f(void)
+i.e a function of zero arguments. On most platforms this doesn't
+matter very much: calls to these functions put the parameters in the
+usual places anyway, and (with the exception of varargs) things just
+work.
+
+However, the ELFv2 ABI on ppc64 optimises stack allocation
+(http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01149.html): a call to a
+function that has a prototype, is not varargs, and receives all parameters
+in registers rather than on the stack does not require the caller to
+allocate an argument save area. The incorrect prototypes cause GCC to
+believe that all functions declared this way can be called without an
+argument save area, but if the callee has sufficiently many arguments then
+it will expect that area to be present, and will thus corrupt the caller's
+stack. This happens in particular with calls to runInteractiveProcess in
+libraries/process/cbits/runProcess.c, and led to #8965.
+
+The simplest fix appears to be to declare these external functions with an
+unspecified argument list rather than a void argument list. This is no
+worse for platforms that don't care either way, and allows a successful
+bootstrap of GHC 7.8 on little-endian Linux ppc64 (which uses the ELFv2
+ABI).
+
+Another case is m68k ABI where 'void*' return type is returned by 'a0'
+register while 'long' return type is returned by 'd0'. Thus we trick
+external prototype return neither of these types to workaround #11395.
+*/
+
+
+/* -----------------------------------------------------------------------------
+ Tail calls
+ -------------------------------------------------------------------------- */
+
+#define JMP_(cont) return((StgFunPtr)(cont))
+
+/* -----------------------------------------------------------------------------
+ Other Stg stuff...
+ -------------------------------------------------------------------------- */
+
+#include "stg/DLL.h"
+#include "stg/MachRegsForHost.h"
+#include "stg/Regs.h"
+#include "stg/Ticky.h"
+
+#if IN_STG_CODE
+/*
+ * This is included later for RTS sources, after definitions of
+ * StgInfoTable, StgClosure and so on.
+ */
+#include "stg/MiscClosures.h"
+#endif
+
+#include "stg/Prim.h" /* ghc-prim fallbacks */
+#include "stg/SMP.h" // write_barrier() inline is required
+
+/* -----------------------------------------------------------------------------
+ Moving Floats and Doubles
+
+ ASSIGN_FLT is for assigning a float to memory (usually the
+ stack/heap). The memory address is guaranteed to be
+ StgWord aligned (currently == sizeof(void *)).
+
+ PK_FLT is for pulling a float out of memory. The memory is
+ guaranteed to be StgWord aligned.
+ -------------------------------------------------------------------------- */
+
+INLINE_HEADER void ASSIGN_FLT (W_ [], StgFloat);
+INLINE_HEADER StgFloat PK_FLT (W_ []);
+
+#if ALIGNMENT_FLOAT <= ALIGNMENT_VOID_P
+
+INLINE_HEADER void ASSIGN_FLT(W_ p_dest[], StgFloat src) { *(StgFloat *)p_dest = src; }
+INLINE_HEADER StgFloat PK_FLT (W_ p_src[]) { return *(StgFloat *)p_src; }
+
+#else /* ALIGNMENT_FLOAT > ALIGNMENT_UNSIGNED_INT */
+
+INLINE_HEADER void ASSIGN_FLT(W_ p_dest[], StgFloat src)
+{
+ float_thing y;
+ y.f = src;
+ *p_dest = y.fu;
+}
+
+INLINE_HEADER StgFloat PK_FLT(W_ p_src[])
+{
+ float_thing y;
+ y.fu = *p_src;
+ return(y.f);
+}
+
+#endif /* ALIGNMENT_FLOAT > ALIGNMENT_VOID_P */
+
+#if ALIGNMENT_DOUBLE <= ALIGNMENT_VOID_P
+
+INLINE_HEADER void ASSIGN_DBL (W_ [], StgDouble);
+INLINE_HEADER StgDouble PK_DBL (W_ []);
+
+INLINE_HEADER void ASSIGN_DBL(W_ p_dest[], StgDouble src) { *(StgDouble *)p_dest = src; }
+INLINE_HEADER StgDouble PK_DBL (W_ p_src[]) { return *(StgDouble *)p_src; }
+
+#else /* ALIGNMENT_DOUBLE > ALIGNMENT_VOID_P */
+
+/* Sparc uses two floating point registers to hold a double. We can
+ * write ASSIGN_DBL and PK_DBL by directly accessing the registers
+ * independently - unfortunately this code isn't writable in C, we
+ * have to use inline assembler.
+ */
+#if defined(sparc_HOST_ARCH)
+
+#define ASSIGN_DBL(dst0,src) \
+ { StgPtr dst = (StgPtr)(dst0); \
+ __asm__("st %2,%0\n\tst %R2,%1" : "=m" (((P_)(dst))[0]), \
+ "=m" (((P_)(dst))[1]) : "f" (src)); \
+ }
+
+#define PK_DBL(src0) \
+ ( { StgPtr src = (StgPtr)(src0); \
+ register double d; \
+ __asm__("ld %1,%0\n\tld %2,%R0" : "=f" (d) : \
+ "m" (((P_)(src))[0]), "m" (((P_)(src))[1])); d; \
+ } )
+
+#else /* ! sparc_HOST_ARCH */
+
+INLINE_HEADER void ASSIGN_DBL (W_ [], StgDouble);
+INLINE_HEADER StgDouble PK_DBL (W_ []);
+
+typedef struct
+ { StgWord dhi;
+ StgWord dlo;
+ } unpacked_double;
+
+typedef union
+ { StgDouble d;
+ unpacked_double du;
+ } double_thing;
+
+INLINE_HEADER void ASSIGN_DBL(W_ p_dest[], StgDouble src)
+{
+ double_thing y;
+ y.d = src;
+ p_dest[0] = y.du.dhi;
+ p_dest[1] = y.du.dlo;
+}
+
+/* GCC also works with this version, but it generates
+ the same code as the previous one, and is not ANSI
+
+#define ASSIGN_DBL( p_dest, src ) \
+ *p_dest = ((double_thing) src).du.dhi; \
+ *(p_dest+1) = ((double_thing) src).du.dlo \
+*/
+
+INLINE_HEADER StgDouble PK_DBL(W_ p_src[])
+{
+ double_thing y;
+ y.du.dhi = p_src[0];
+ y.du.dlo = p_src[1];
+ return(y.d);
+}
+
+#endif /* ! sparc_HOST_ARCH */
+
+#endif /* ALIGNMENT_DOUBLE > ALIGNMENT_UNSIGNED_INT */
+
+
+/* -----------------------------------------------------------------------------
+ Moving 64-bit quantities around
+
+ ASSIGN_Word64 assign an StgWord64/StgInt64 to a memory location
+ PK_Word64 load an StgWord64/StgInt64 from a amemory location
+
+ In both cases the memory location might not be 64-bit aligned.
+ -------------------------------------------------------------------------- */
+
+#if SIZEOF_HSWORD == 4
+
+typedef struct
+ { StgWord dhi;
+ StgWord dlo;
+ } unpacked_double_word;
+
+typedef union
+ { StgInt64 i;
+ unpacked_double_word iu;
+ } int64_thing;
+
+typedef union
+ { StgWord64 w;
+ unpacked_double_word wu;
+ } word64_thing;
+
+INLINE_HEADER void ASSIGN_Word64(W_ p_dest[], StgWord64 src)
+{
+ word64_thing y;
+ y.w = src;
+ p_dest[0] = y.wu.dhi;
+ p_dest[1] = y.wu.dlo;
+}
+
+INLINE_HEADER StgWord64 PK_Word64(W_ p_src[])
+{
+ word64_thing y;
+ y.wu.dhi = p_src[0];
+ y.wu.dlo = p_src[1];
+ return(y.w);
+}
+
+INLINE_HEADER void ASSIGN_Int64(W_ p_dest[], StgInt64 src)
+{
+ int64_thing y;
+ y.i = src;
+ p_dest[0] = y.iu.dhi;
+ p_dest[1] = y.iu.dlo;
+}
+
+INLINE_HEADER StgInt64 PK_Int64(W_ p_src[])
+{
+ int64_thing y;
+ y.iu.dhi = p_src[0];
+ y.iu.dlo = p_src[1];
+ return(y.i);
+}
+
+#elif SIZEOF_VOID_P == 8
+
+INLINE_HEADER void ASSIGN_Word64(W_ p_dest[], StgWord64 src)
+{
+ p_dest[0] = src;
+}
+
+INLINE_HEADER StgWord64 PK_Word64(W_ p_src[])
+{
+ return p_src[0];
+}
+
+INLINE_HEADER void ASSIGN_Int64(W_ p_dest[], StgInt64 src)
+{
+ p_dest[0] = src;
+}
+
+INLINE_HEADER StgInt64 PK_Int64(W_ p_src[])
+{
+ return p_src[0];
+}
+
+#endif /* SIZEOF_HSWORD == 4 */
+
+/* -----------------------------------------------------------------------------
+ Integer multiply with overflow
+ -------------------------------------------------------------------------- */
+
+/* Multiply with overflow checking.
+ *
+ * This is tricky - the usual sign rules for add/subtract don't apply.
+ *
+ * On 32-bit machines we use gcc's 'long long' types, finding
+ * overflow with some careful bit-twiddling.
+ *
+ * On 64-bit machines where gcc's 'long long' type is also 64-bits,
+ * we use a crude approximation, testing whether either operand is
+ * larger than 32-bits; if neither is, then we go ahead with the
+ * multiplication.
+ *
+ * Return non-zero if there is any possibility that the signed multiply
+ * of a and b might overflow. Return zero only if you are absolutely sure
+ * that it won't overflow. If in doubt, return non-zero.
+ */
+
+#if SIZEOF_VOID_P == 4
+
+#if defined(WORDS_BIGENDIAN)
+#define RTS_CARRY_IDX__ 0
+#define RTS_REM_IDX__ 1
+#else
+#define RTS_CARRY_IDX__ 1
+#define RTS_REM_IDX__ 0
+#endif
+
+typedef union {
+ StgInt64 l;
+ StgInt32 i[2];
+} long_long_u ;
+
+#define mulIntMayOflo(a,b) \
+({ \
+ StgInt32 r, c; \
+ long_long_u z; \
+ z.l = (StgInt64)a * (StgInt64)b; \
+ r = z.i[RTS_REM_IDX__]; \
+ c = z.i[RTS_CARRY_IDX__]; \
+ if (c == 0 || c == -1) { \
+ c = ((StgWord)((a^b) ^ r)) \
+ >> (BITS_IN (I_) - 1); \
+ } \
+ c; \
+})
+
+/* Careful: the carry calculation above is extremely delicate. Make sure
+ * you test it thoroughly after changing it.
+ */
+
+#else
+
+/* Approximate version when we don't have long arithmetic (on 64-bit archs) */
+
+/* If we have n-bit words then we have n-1 bits after accounting for the
+ * sign bit, so we can fit the result of multiplying 2 (n-1)/2-bit numbers */
+#define HALF_POS_INT (((I_)1) << ((BITS_IN (I_) - 1) / 2))
+#define HALF_NEG_INT (-HALF_POS_INT)
+
+#define mulIntMayOflo(a,b) \
+({ \
+ I_ c; \
+ if ((I_)a <= HALF_NEG_INT || a >= HALF_POS_INT \
+ || (I_)b <= HALF_NEG_INT || b >= HALF_POS_INT) {\
+ c = 1; \
+ } else { \
+ c = 0; \
+ } \
+ c; \
+})
+#endif
+
diff --git a/rts/include/ghc.mk b/rts/include/ghc.mk
new file mode 100644
index 0000000000..88cb12caff
--- /dev/null
+++ b/rts/include/ghc.mk
@@ -0,0 +1,311 @@
+# -----------------------------------------------------------------------------
+#
+# (c) 2009 The University of Glasgow
+#
+# This file is part of the GHC build system.
+#
+# To understand how the build system works and how to modify it, see
+# https://gitlab.haskell.org/ghc/ghc/wikis/building/architecture
+# https://gitlab.haskell.org/ghc/ghc/wikis/building/modifying
+#
+# -----------------------------------------------------------------------------
+
+#
+# Header files built from the configure script's findings
+#
+includes_0_H_CONFIG = rts/include/dist/build/ghcautoconf.h
+includes_1_H_CONFIG = rts/include/dist-install/build/ghcautoconf.h
+includes_2_H_CONFIG = $(includes_1_H_CONFIG)
+
+includes_0_H_PLATFORM = rts/include/dist/build/ghcplatform.h
+includes_1_H_PLATFORM = rts/include/dist-install/build/ghcplatform.h
+includes_2_H_PLATFORM = $(includes_1_H_PLATFORM)
+
+includes_0_H_VERSION = rts/include/dist/build/ghcversion.h
+includes_1_H_VERSION = rts/include/dist-install/build/ghcversion.h
+includes_2_H_VERSION = $(includes_1_H_VERSION)
+
+BUILD_0_INCLUDE_DIR = rts/include/dist/build
+BUILD_1_INCLUDE_DIR = rts/include/dist-install/build
+BUILD_2_INCLUDE_DIR = $(BUILD_1_INCLUDE_DIR)
+
+#
+# All header files are in rts/include/{one of these subdirectories}
+#
+includes_H_SUBDIRS += .
+includes_H_SUBDIRS += rts
+includes_H_SUBDIRS += rts/prof
+includes_H_SUBDIRS += rts/storage
+includes_H_SUBDIRS += stg
+
+includes_H_FILES := $(wildcard $(patsubst %,rts/include/%/*.h,$(includes_H_SUBDIRS)))
+# This isn't necessary, but it makes the paths look a little prettier
+includes_H_FILES := $(subst /./,/,$(includes_H_FILES))
+
+#
+# Options
+#
+
+includes_CC_OPTS += $(SRC_CC_OPTS)
+includes_CC_OPTS += $(SRC_CC_WARNING_OPTS)
+includes_CC_OPTS += $(CONF_CC_OPTS_STAGE1)
+
+ifeq "$(GhcUnregisterised)" "YES"
+includes_CC_OPTS += -DUSE_MINIINTERPRETER
+endif
+
+includes_CC_OPTS += $(addprefix -I,$(GHC_INCLUDE_DIRS))
+includes_CC_OPTS += -I$(BUILD_1_INCLUDE_DIR)
+includes_CC_OPTS += -Irts
+
+ifneq "$(GhcWithSMP)" "YES"
+includes_CC_OPTS += -DNOSMP
+endif
+
+define includesHeaderVersion
+# $1 = stage
+$$(includes_$1_H_VERSION) : mk/project.mk | $$$$(dir $$$$@)/.
+ $$(call removeFiles,$$@)
+ @echo "Creating $$@..."
+ @echo "#if !defined(__GHCVERSION_H__)" > $$@
+ @echo "#define __GHCVERSION_H__" >> $$@
+ @echo >> $$@
+ @echo "#define __GLASGOW_HASKELL__ $$(ProjectVersionInt)" >> $$@
+ @echo "#define __GLASGOW_HASKELL_FULL_VERSION__ \"$$(ProjectVersion)\"" >> $$@
+ @echo >> $$@
+ @if [ -n "$$(ProjectPatchLevel1)" ]; then \
+ echo "#define __GLASGOW_HASKELL_PATCHLEVEL1__ $$(ProjectPatchLevel1)" >> $$@; \
+ fi
+ @if [ -n "$$(ProjectPatchLevel2)" ]; then \
+ echo "#define __GLASGOW_HASKELL_PATCHLEVEL2__ $$(ProjectPatchLevel2)" >> $$@; \
+ fi
+ @echo >> $$@
+ @echo '#define MIN_VERSION_GLASGOW_HASKELL(ma,mi,pl1,pl2) (\' >> $$@
+ @echo ' ((ma)*100+(mi)) < __GLASGOW_HASKELL__ || \' >> $$@
+ @echo ' ((ma)*100+(mi)) == __GLASGOW_HASKELL__ \' >> $$@
+ @echo ' && (pl1) < __GLASGOW_HASKELL_PATCHLEVEL1__ || \' >> $$@
+ @echo ' ((ma)*100+(mi)) == __GLASGOW_HASKELL__ \' >> $$@
+ @echo ' && (pl1) == __GLASGOW_HASKELL_PATCHLEVEL1__ \' >> $$@
+ @echo ' && (pl2) <= __GLASGOW_HASKELL_PATCHLEVEL2__ )' >> $$@
+ @echo >> $$@
+ @echo "#endif /* __GHCVERSION_H__ */" >> $$@
+ @echo "Done."
+
+endef
+
+$(eval $(call includesHeaderVersion,0))
+$(eval $(call includesHeaderVersion,1))
+
+ifneq "$(BINDIST)" "YES"
+
+define includesHeaderConfig
+# $1 = stage
+$$(includes_$1_H_CONFIG) : mk/config.h mk/config.mk rts/include/ghc.mk | $$$$(dir $$$$@)/.
+ $$(call removeFiles,$$@)
+ @echo "Creating $$@..."
+ @echo "#if !defined(__GHCAUTOCONF_H__)" > $$@
+ @echo "#define __GHCAUTOCONF_H__" >> $$@
+#
+# Copy the contents of mk/config.h, turning '#define PACKAGE_FOO
+# "blah"' into '/* #undef PACKAGE_FOO */' to avoid clashes.
+#
+ @sed 's,^\([ ]*\)#[ ]*define[ ][ ]*\(PACKAGE_[A-Z]*\)[ ][ ]*".*".*$$$$,\1/* #undef \2 */,' mk/config.h >> $$@
+#
+ @echo "#endif /* __GHCAUTOCONF_H__ */" >> $$@
+ @echo "Done."
+
+endef
+
+$(eval $(call includesHeaderConfig,0))
+$(eval $(call includesHeaderConfig,1))
+
+BuildPlatform_0_CPP = $(BuildPlatform_CPP)
+BuildPlatform_1_CPP = $(HostPlatform_CPP)
+BuildPlatform_2_CPP = $(TargetPlatform_CPP)
+
+HostPlatform_0_CPP = $(HostPlatform_CPP)
+HostPlatform_1_CPP = $(TargetPlatform_CPP)
+HostPlatform_2_CPP = $(TargetPlatform_CPP)
+
+BuildArch_0_CPP = $(BuildArch_CPP)
+BuildArch_1_CPP = $(HostArch_CPP)
+BuildArch_2_CPP = $(TargetArch_CPP)
+
+HostArch_0_CPP = $(HostArch_CPP)
+HostArch_1_CPP = $(TargetArch_CPP)
+HostArch_2_CPP = $(TargetArch_CPP)
+
+BuildOS_0_CPP = $(BuildOS_CPP)
+BuildOS_1_CPP = $(HostOS_CPP)
+BuildOS_2_CPP = $(TargetOS_CPP)
+
+HostOS_0_CPP = $(HostOS_CPP)
+HostOS_1_CPP = $(TargetOS_CPP)
+HostOS_2_CPP = $(TargetOS_CPP)
+
+BuildVendor_0_CPP = $(BuildVendor_CPP)
+BuildVendor_1_CPP = $(HostVendor_CPP)
+BuildVendor_2_CPP = $(TargetVendor_CPP)
+
+HostVendor_0_CPP = $(HostVendor_CPP)
+HostVendor_1_CPP = $(TargetVendor_CPP)
+HostVendor_2_CPP = $(TargetVendor_CPP)
+
+define includesHeaderPlatform
+# $1 = stage
+$$(includes_$1_H_PLATFORM) : rts/include/ghc.mk rts/include/Makefile | $$$$(dir $$$$@)/.
+ $$(call removeFiles,$$@)
+ @echo "Creating $$@..."
+ @echo "#if !defined(__GHCPLATFORM_H__)" > $$@
+ @echo "#define __GHCPLATFORM_H__" >> $$@
+ @echo >> $$@
+ @echo "#define GHC_STAGE ($1 + 1)" >> $$@
+ @echo >> $$@
+ @echo "#define BuildPlatform_TYPE $(BuildPlatform_$1_CPP)" >> $$@
+ @echo "#define HostPlatform_TYPE $(HostPlatform_$1_CPP)" >> $$@
+ @echo >> $$@
+ @echo "#define $(BuildPlatform_$1_CPP)_BUILD 1" >> $$@
+ @echo "#define $(HostPlatform_$1_CPP)_HOST 1" >> $$@
+ @echo >> $$@
+ @echo "#define $(BuildArch_$1_CPP)_BUILD_ARCH 1" >> $$@
+ @echo "#define $(HostArch_$1_CPP)_HOST_ARCH 1" >> $$@
+ @echo "#define BUILD_ARCH \"$(BuildArch_$1_CPP)\"" >> $$@
+ @echo "#define HOST_ARCH \"$(HostArch_$1_CPP)\"" >> $$@
+ @echo >> $$@
+ @echo "#define $(BuildOS_$1_CPP)_BUILD_OS 1" >> $$@
+ @echo "#define $(HostOS_$1_CPP)_HOST_OS 1" >> $$@
+ @echo "#define BUILD_OS \"$(BuildOS_$1_CPP)\"" >> $$@
+ @echo "#define HOST_OS \"$(HostOS_$1_CPP)\"" >> $$@
+ @echo >> $$@
+ @echo "#define $(BuildVendor_$1_CPP)_BUILD_VENDOR 1" >> $$@
+ @echo "#define $(HostVendor_$1_CPP)_HOST_VENDOR 1" >> $$@
+ @echo "#define BUILD_VENDOR \"$(BuildVendor_$1_CPP)\"" >> $$@
+ @echo "#define HOST_VENDOR \"$(HostVendor_$1_CPP)\"" >> $$@
+ @echo >> $$@
+ifeq "$$(SettingsUseDistroMINGW)" "YES"
+ @echo "#define USE_INPLACE_MINGW_TOOLCHAIN 1" >> $$@
+endif
+ifeq "$$(GhcUnregisterised)" "YES"
+ @echo "#define UnregisterisedCompiler 1" >> $$@
+endif
+ @echo >> $$@
+ @echo "#endif /* __GHCPLATFORM_H__ */" >> $$@
+ @echo "Done."
+endef
+
+endif
+
+$(eval $(call includesHeaderPlatform,0))
+$(eval $(call includesHeaderPlatform,1))
+
+# -----------------------------------------------------------------------------
+# Settings
+
+# These settings are read by GHC at runtime, so as to not cause spurious
+# rebuilds.
+# See Note [tooldir: How GHC finds mingw on Windows]
+
+includes_SETTINGS = rts/include/dist/build/settings
+
+$(includes_SETTINGS) : rts/include/Makefile | $$(dir $$@)/.
+ $(call removeFiles,$@)
+ @echo '[("GCC extra via C opts", "$(GccExtraViaCOpts)")' >> $@
+ @echo ',("C compiler command", "$(SettingsCCompilerCommand)")' >> $@
+ @echo ',("C compiler flags", "$(SettingsCCompilerFlags)")' >> $@
+ @echo ',("C++ compiler flags", "$(SettingsCxxCompilerFlags)")' >> $@
+ @echo ',("C compiler link flags", "$(SettingsCCompilerLinkFlags)")' >> $@
+ @echo ',("C compiler supports -no-pie", "$(SettingsCCompilerSupportsNoPie)")' >> $@
+ @echo ',("Haskell CPP command", "$(SettingsHaskellCPPCommand)")' >> $@
+ @echo ',("Haskell CPP flags", "$(SettingsHaskellCPPFlags)")' >> $@
+ @echo ',("ld command", "$(SettingsLdCommand)")' >> $@
+ @echo ',("ld flags", "$(SettingsLdFlags)")' >> $@
+ @echo ',("ld supports compact unwind", "$(LdHasNoCompactUnwind)")' >> $@
+ @echo ',("ld supports build-id", "$(LdHasBuildId)")' >> $@
+ @echo ',("ld supports filelist", "$(LdHasFilelist)")' >> $@
+ @echo ',("ld is GNU ld", "$(LdIsGNULd)")' >> $@
+ @echo ',("Merge objects command", "$(SettingsMergeObjectsCommand)")' >> $@
+ @echo ',("Merge objects flags", "$(SettingsMergeObjectsFlags)")' >> $@
+ @echo ',("ar command", "$(SettingsArCommand)")' >> $@
+ @echo ',("ar flags", "$(ArArgs)")' >> $@
+ @echo ',("ar supports at file", "$(ArSupportsAtFile)")' >> $@
+ @echo ',("ranlib command", "$(SettingsRanlibCommand)")' >> $@
+ @echo ',("otool command", "$(SettingsOtoolCommand)")' >> $@
+ @echo ',("install_name_tool command", "$(SettingsInstallNameToolCommand)")' >> $@
+ @echo ',("touch command", "$(SettingsTouchCommand)")' >> $@
+ @echo ',("dllwrap command", "$(SettingsDllWrapCommand)")' >> $@
+ @echo ',("windres command", "$(SettingsWindresCommand)")' >> $@
+ @echo ',("libtool command", "$(SettingsLibtoolCommand)")' >> $@
+ @echo ',("unlit command", "$$topdir/bin/$(utils/unlit_dist_PROG)")' >> $@
+ @echo ',("cross compiling", "$(CrossCompiling)")' >> $@
+ @echo ',("target platform string", "$(TARGETPLATFORM)")' >> $@
+ @echo ',("target os", "$(HaskellTargetOs)")' >> $@
+ @echo ',("target arch", "$(HaskellTargetArch)")' >> $@
+ @echo ',("target word size", "$(TargetWordSize)")' >> $@
+ @echo ',("target word big endian", "$(TargetWordBigEndian)")' >> $@
+ @echo ',("target has GNU nonexec stack", "$(TargetHasGnuNonexecStack)")' >> $@
+ @echo ',("target has .ident directive", "$(TargetHasIdentDirective)")' >> $@
+ @echo ',("target has subsections via symbols", "$(TargetHasSubsectionsViaSymbols)")' >> $@
+ @echo ',("target has RTS linker", "$(TargetHasRTSLinker)")' >> $@
+ @echo ',("Unregisterised", "$(GhcUnregisterised)")' >> $@
+ @echo ',("LLVM target", "$(LLVMTarget_CPP)")' >> $@
+ @echo ',("LLVM llc command", "$(SettingsLlcCommand)")' >> $@
+ @echo ',("LLVM opt command", "$(SettingsOptCommand)")' >> $@
+ @echo ',("LLVM clang command", "$(SettingsClangCommand)")' >> $@
+ @echo
+ @echo ',("bignum backend", "$(BIGNUM_BACKEND)")' >> $@
+ @echo ',("Use interpreter", "$(GhcWithInterpreter)")' >> $@
+ @echo ',("Support SMP", "$(GhcWithSMP)")' >> $@
+ @echo ',("RTS ways", "$(GhcRTSWays)")' >> $@
+ @echo ',("Tables next to code", "$(TablesNextToCode)")' >> $@
+ @echo ',("Leading underscore", "$(LeadingUnderscore)")' >> $@
+ @echo ',("Use LibFFI", "$(UseLibffiForAdjustors)")' >> $@
+ @echo ",(\"RTS expects libdw\", \"$(GhcRtsWithLibdw)\")" >> $@
+ @echo "]" >> $@
+
+
+# ---------------------------------------------------------------------------
+# Make DerivedConstants.h for the compiler
+
+includes_DERIVEDCONSTANTS = rts/include/dist-derivedconstants/header/DerivedConstants.h
+
+DERIVE_CONSTANTS_FLAGS_FOR_HEADER += --gcc-program "$(CC)"
+DERIVE_CONSTANTS_FLAGS_FOR_HEADER += $(addprefix --gcc-flag$(space),$(includes_CC_OPTS) -fcommon)
+DERIVE_CONSTANTS_FLAGS_FOR_HEADER += --nm-program "$(NM)"
+ifneq "$(OBJDUMP)" ""
+DERIVE_CONSTANTS_FLAGS_FOR_HEADER += --objdump-program "$(OBJDUMP)"
+endif
+DERIVE_CONSTANTS_FLAGS_FOR_HEADER += --target-os "$(TargetOS_CPP)"
+
+ifneq "$(BINDIST)" "YES"
+$(includes_DERIVEDCONSTANTS): $$(includes_H_FILES) $$(rts_H_FILES)
+
+$(includes_DERIVEDCONSTANTS): $(deriveConstants_INPLACE) $(includes_1_H_CONFIG) $(includes_1_H_PLATFORM) | $$(dir $$@)/.
+ $< --gen-header -o $@ --tmpdir $(dir $@) $(DERIVE_CONSTANTS_FLAGS_FOR_HEADER)
+endif
+
+# ---------------------------------------------------------------------------
+# Install all header files
+
+$(eval $(call clean-target,includes,,\
+ $(includes_0_H_CONFIG) $(includes_0_H_PLATFORM) $(includes_0_H_VERSION) \
+ $(includes_1_H_CONFIG) $(includes_1_H_PLATFORM) $(includes_1_H_VERSION)))
+
+$(eval $(call all-target,includes,\
+ $(includes_0_H_CONFIG) $(includes_0_H_PLATFORM) $(includes_0_H_VERSION) \
+ $(includes_1_H_CONFIG) $(includes_1_H_PLATFORM) $(includes_1_H_VERSION) \
+ $(includes_DERIVEDCONSTANTS)))
+
+install: install_includes
+
+.PHONY: install_includes
+install_includes : $(includes_1_H_CONFIG) $(includes_1_H_PLATFORM) $(includes_1_H_VERSION)
+ $(INSTALL_DIR) "$(DESTDIR)$(ghcheaderdir)"
+ $(foreach d,$(includes_H_SUBDIRS), \
+ $(INSTALL_DIR) "$(DESTDIR)$(ghcheaderdir)/$d" && \
+ $(INSTALL_HEADER) $(INSTALL_OPTS) rts/include/$d/*.h "$(DESTDIR)$(ghcheaderdir)/$d/" && \
+ ) true
+ $(INSTALL_HEADER) $(INSTALL_OPTS) \
+ $(includes_1_H_CONFIG) $(includes_1_H_PLATFORM) $(includes_1_H_VERSION) \
+ $(includes_DERIVEDCONSTANTS) \
+ "$(DESTDIR)$(ghcheaderdir)/"
+
diff --git a/rts/include/ghcconfig.h b/rts/include/ghcconfig.h
new file mode 100644
index 0000000000..7b99835a23
--- /dev/null
+++ b/rts/include/ghcconfig.h
@@ -0,0 +1,4 @@
+#pragma once
+
+#include "ghcautoconf.h"
+#include "ghcplatform.h"
diff --git a/rts/include/rts/Adjustor.h b/rts/include/rts/Adjustor.h
new file mode 100644
index 0000000000..8965c7c8bb
--- /dev/null
+++ b/rts/include/rts/Adjustor.h
@@ -0,0 +1,22 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Adjustor API
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/* Creating and destroying an adjustor thunk */
+void* createAdjustor (int cconv,
+ StgStablePtr hptr,
+ StgFunPtr wptr,
+ char *typeString);
+
+void freeHaskellFunctionPtr (void* ptr);
diff --git a/rts/include/rts/BlockSignals.h b/rts/include/rts/BlockSignals.h
new file mode 100644
index 0000000000..46b0b0f562
--- /dev/null
+++ b/rts/include/rts/BlockSignals.h
@@ -0,0 +1,34 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * RTS signal handling
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* Used by runProcess() in the process package
+ */
+
+/*
+ * Function: blockUserSignals()
+ *
+ * Temporarily block the delivery of further console events. Needed to
+ * avoid race conditions when GCing the queue of outstanding handlers or
+ * when emptying the queue by running the handlers.
+ *
+ */
+void blockUserSignals(void);
+
+/*
+ * Function: unblockUserSignals()
+ *
+ * The inverse of blockUserSignals(); re-enable the deliver of console events.
+ */
+void unblockUserSignals(void);
diff --git a/rts/include/rts/Bytecodes.h b/rts/include/rts/Bytecodes.h
new file mode 100644
index 0000000000..b97d4d4f60
--- /dev/null
+++ b/rts/include/rts/Bytecodes.h
@@ -0,0 +1,109 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Bytecode definitions.
+ *
+ * ---------------------------------------------------------------------------*/
+
+/* --------------------------------------------------------------------------
+ * Instructions
+ *
+ * Notes:
+ * o CASEFAIL is generated by the compiler whenever it tests an "irrefutable"
+ * pattern which fails. If we don't see too many of these, we could
+ * optimise out the redundant test.
+ * ------------------------------------------------------------------------*/
+
+/* NOTE:
+
+ THIS FILE IS INCLUDED IN HASKELL SOURCES (ghc/compiler/GHC/ByteCode/Asm.hs).
+ DO NOT PUT C-SPECIFIC STUFF IN HERE!
+
+ I hope that's clear :-)
+*/
+
+#define bci_STKCHECK 1
+#define bci_PUSH_L 2
+#define bci_PUSH_LL 3
+#define bci_PUSH_LLL 4
+#define bci_PUSH8 5
+#define bci_PUSH16 6
+#define bci_PUSH32 7
+#define bci_PUSH8_W 8
+#define bci_PUSH16_W 9
+#define bci_PUSH32_W 10
+#define bci_PUSH_G 11
+#define bci_PUSH_ALTS 12
+#define bci_PUSH_ALTS_P 13
+#define bci_PUSH_ALTS_N 14
+#define bci_PUSH_ALTS_F 15
+#define bci_PUSH_ALTS_D 16
+#define bci_PUSH_ALTS_L 17
+#define bci_PUSH_ALTS_V 18
+#define bci_PUSH_PAD8 19
+#define bci_PUSH_PAD16 20
+#define bci_PUSH_PAD32 21
+#define bci_PUSH_UBX8 22
+#define bci_PUSH_UBX16 23
+#define bci_PUSH_UBX32 24
+#define bci_PUSH_UBX 25
+#define bci_PUSH_APPLY_N 26
+#define bci_PUSH_APPLY_F 27
+#define bci_PUSH_APPLY_D 28
+#define bci_PUSH_APPLY_L 29
+#define bci_PUSH_APPLY_V 30
+#define bci_PUSH_APPLY_P 31
+#define bci_PUSH_APPLY_PP 32
+#define bci_PUSH_APPLY_PPP 33
+#define bci_PUSH_APPLY_PPPP 34
+#define bci_PUSH_APPLY_PPPPP 35
+#define bci_PUSH_APPLY_PPPPPP 36
+/* #define bci_PUSH_APPLY_PPPPPPP 37 */
+#define bci_SLIDE 38
+#define bci_ALLOC_AP 39
+#define bci_ALLOC_AP_NOUPD 40
+#define bci_ALLOC_PAP 41
+#define bci_MKAP 42
+#define bci_MKPAP 43
+#define bci_UNPACK 44
+#define bci_PACK 45
+#define bci_TESTLT_I 46
+#define bci_TESTEQ_I 47
+#define bci_TESTLT_F 48
+#define bci_TESTEQ_F 49
+#define bci_TESTLT_D 50
+#define bci_TESTEQ_D 51
+#define bci_TESTLT_P 52
+#define bci_TESTEQ_P 53
+#define bci_CASEFAIL 54
+#define bci_JMP 55
+#define bci_CCALL 56
+#define bci_SWIZZLE 57
+#define bci_ENTER 58
+#define bci_RETURN 59
+#define bci_RETURN_P 60
+#define bci_RETURN_N 61
+#define bci_RETURN_F 62
+#define bci_RETURN_D 63
+#define bci_RETURN_L 64
+#define bci_RETURN_V 65
+#define bci_BRK_FUN 66
+#define bci_TESTLT_W 67
+#define bci_TESTEQ_W 68
+
+#define bci_RETURN_T 69
+#define bci_PUSH_ALTS_T 70
+/* If you need to go past 255 then you will run into the flags */
+
+/* If you need to go below 0x0100 then you will run into the instructions */
+#define bci_FLAG_LARGE_ARGS 0x8000
+
+/* If a BCO definitely requires less than this many words of stack,
+ don't include an explicit STKCHECK insn in it. The interpreter
+ will check for this many words of stack before running each BCO,
+ rendering an explicit check unnecessary in the majority of
+ cases. */
+#define INTERP_STACK_CHECK_THRESH 50
+
+/*-------------------------------------------------------------------------*/
diff --git a/rts/include/rts/Config.h b/rts/include/rts/Config.h
new file mode 100644
index 0000000000..289950af45
--- /dev/null
+++ b/rts/include/rts/Config.h
@@ -0,0 +1,52 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Rts settings.
+ *
+ * NOTE: assumes #include "ghcconfig.h"
+ *
+ * NB: THIS FILE IS INCLUDED IN NON-C CODE AND DATA! #defines only please.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(TICKY_TICKY) && defined(THREADED_RTS)
+#error TICKY_TICKY is incompatible with THREADED_RTS
+#endif
+
+/*
+ * Whether the runtime system will use libbfd for debugging purposes.
+ */
+#if defined(DEBUG) && defined(HAVE_BFD_H) && defined(HAVE_LIBBFD) && !defined(_WIN32)
+#define USING_LIBBFD 1
+#endif
+
+/* DEBUG and PROFILING both imply TRACING */
+#if defined(DEBUG) || defined(PROFILING)
+#if !defined(TRACING)
+#define TRACING
+#endif
+#endif
+
+/* DEBUG implies TICKY_TICKY */
+#if defined(DEBUG)
+#if !defined(TICKY_TICKY)
+#define TICKY_TICKY
+#endif
+#endif
+
+
+/* -----------------------------------------------------------------------------
+ Signals - supported on non-PAR versions of the runtime. See RtsSignals.h.
+ -------------------------------------------------------------------------- */
+
+#define RTS_USER_SIGNALS 1
+
+/* Profile spin locks */
+
+#define PROF_SPIN
diff --git a/rts/include/rts/Constants.h b/rts/include/rts/Constants.h
new file mode 100644
index 0000000000..9cbe47752e
--- /dev/null
+++ b/rts/include/rts/Constants.h
@@ -0,0 +1,340 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Constants
+ *
+ * NOTE: this information is used by both the compiler and the RTS.
+ * Some of it is tweakable, and some of it must be kept up to date
+ * with various other parts of the system.
+ *
+ * Constants which are derived automatically from other definitions in
+ * the system (eg. structure sizes) are generated into the file
+ * DerivedConstants.h by a C program (utils/deriveConstants).
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+ Minimum closure sizes
+
+ This is the minimum number of words in the payload of a heap-allocated
+ closure, so that the closure has two bits in the bitmap for mark-compact
+ collection.
+
+ See Note [Mark bits in mark-compact collector] in rts/sm/Compact.h
+ -------------------------------------------------------------------------- */
+
+#define MIN_PAYLOAD_SIZE 1
+
+/* -----------------------------------------------------------------------------
+ Constants to do with specialised closure types.
+ -------------------------------------------------------------------------- */
+
+/* We have some pre-compiled selector thunks defined in rts/StgStdThunks.hc.
+ * This constant defines the highest selectee index that we can replace with a
+ * reference to the pre-compiled code.
+ */
+
+#define MAX_SPEC_SELECTEE_SIZE 15
+
+/* Vector-apply thunks. These thunks just push their free variables
+ * on the stack and enter the first one. They're a bit like PAPs, but
+ * don't have a dynamic size. We've pre-compiled a few to save
+ * space.
+ */
+
+#define MAX_SPEC_AP_SIZE 7
+
+/* Specialised FUN/THUNK/CONSTR closure types */
+
+#define MAX_SPEC_THUNK_SIZE 2
+#define MAX_SPEC_FUN_SIZE 2
+#define MAX_SPEC_CONSTR_SIZE 2
+
+/* Range of built-in table of static small int-like and char-like closures.
+ *
+ * NB. This corresponds with the number of actual INTLIKE/CHARLIKE
+ * closures defined in rts/StgMiscClosures.cmm.
+ */
+#define MAX_INTLIKE 255
+#define MIN_INTLIKE (-16)
+
+#define MAX_CHARLIKE 255
+#define MIN_CHARLIKE 0
+
+/* Each byte in the card table for an StgMutaArrPtrs covers
+ * (1<<MUT_ARR_PTRS_CARD_BITS) elements in the array. To find a good
+ * value for this, I used the benchmarks nofib/gc/hash,
+ * nofib/gc/graph, and nofib/gc/gc_bench.
+ */
+#define MUT_ARR_PTRS_CARD_BITS 7
+
+/* -----------------------------------------------------------------------------
+ STG Registers.
+
+ Note that in MachRegs.h we define how many of these registers are
+ *real* machine registers, and not just offsets in the Register Table.
+ -------------------------------------------------------------------------- */
+
+#define MAX_VANILLA_REG 10
+#define MAX_FLOAT_REG 6
+#define MAX_DOUBLE_REG 6
+#define MAX_LONG_REG 1
+#define MAX_XMM_REG 6
+
+/* -----------------------------------------------------------------------------
+ Semi-Tagging constants
+
+ Old Comments about this stuff:
+
+ Tags for indirection nodes and ``other'' (probably unevaluated) nodes;
+ normal-form values of algebraic data types will have tags 0, 1, ...
+
+ @INFO_IND_TAG@ is different from @INFO_OTHER_TAG@ just so we can count
+ how often we bang into indirection nodes; that's all. (WDP 95/11)
+
+ ToDo: find out if we need any of this.
+ -------------------------------------------------------------------------- */
+
+#define INFO_OTHER_TAG (-1)
+#define INFO_IND_TAG (-2)
+#define INFO_FIRST_TAG 0
+
+/* -----------------------------------------------------------------------------
+ How much C stack to reserve for local temporaries when in the STG
+ world. Used in StgCRun.c.
+ -------------------------------------------------------------------------- */
+
+#define RESERVED_C_STACK_BYTES (2048 * SIZEOF_LONG)
+
+/* -----------------------------------------------------------------------------
+ How large is the stack frame saved by StgRun?
+ world. Used in StgCRun.c.
+
+ The size has to be enough to save the registers (see StgCRun)
+ plus padding if the result is not 16 byte aligned.
+ See the Note [Stack Alignment on X86] in StgCRun.c for details.
+
+ -------------------------------------------------------------------------- */
+#if defined(x86_64_HOST_ARCH)
+# if defined(mingw32_HOST_OS)
+# define STG_RUN_STACK_FRAME_SIZE 144
+# else
+# define STG_RUN_STACK_FRAME_SIZE 48
+# endif
+#endif
+
+/* -----------------------------------------------------------------------------
+ StgRun related labels shared between StgCRun.c and StgStartup.cmm.
+ -------------------------------------------------------------------------- */
+
+#if defined(LEADING_UNDERSCORE)
+#define STG_RUN "_StgRun"
+#define STG_RUN_JMP _StgRunJmp
+#define STG_RETURN "_StgReturn"
+#else
+#define STG_RUN "StgRun"
+#define STG_RUN_JMP StgRunJmp
+#define STG_RETURN "StgReturn"
+#endif
+
+/* -----------------------------------------------------------------------------
+ How much Haskell stack space to reserve for the saving of registers
+ etc. in the case of a stack/heap overflow.
+
+ This must be large enough to accommodate the largest stack frame
+ pushed in one of the heap check fragments in HeapStackCheck.hc
+ (ie. currently the generic heap checks - 3 words for StgRetDyn,
+ 18 words for the saved registers, see StgMacros.h).
+ -------------------------------------------------------------------------- */
+
+#define RESERVED_STACK_WORDS 21
+
+/* -----------------------------------------------------------------------------
+ The limit on the size of the stack check performed when we enter an
+ AP_STACK, in words. See raiseAsync() and bug #1466.
+ -------------------------------------------------------------------------- */
+
+#define AP_STACK_SPLIM 1024
+
+/* -----------------------------------------------------------------------------
+ Storage manager constants
+ -------------------------------------------------------------------------- */
+
+/* The size of a block (2^BLOCK_SHIFT bytes) */
+#define BLOCK_SHIFT 12
+
+/* The size of a megablock (2^MBLOCK_SHIFT bytes) */
+#define MBLOCK_SHIFT 20
+
+/* -----------------------------------------------------------------------------
+ Bitmap/size fields (used in info tables)
+ -------------------------------------------------------------------------- */
+
+/* In a 32-bit bitmap field, we use 5 bits for the size, and 27 bits
+ * for the bitmap. If the bitmap requires more than 27 bits, then we
+ * store it in a separate array, and leave a pointer in the bitmap
+ * field. On a 64-bit machine, the sizes are extended accordingly.
+ */
+#if SIZEOF_VOID_P == 4
+#define BITMAP_SIZE_MASK 0x1f
+#define BITMAP_BITS_SHIFT 5
+#elif SIZEOF_VOID_P == 8
+#define BITMAP_SIZE_MASK 0x3f
+#define BITMAP_BITS_SHIFT 6
+#else
+#error unknown SIZEOF_VOID_P
+#endif
+
+/* -----------------------------------------------------------------------------
+ Lag/Drag/Void constants
+ -------------------------------------------------------------------------- */
+
+/*
+ An LDV word is divided into 3 parts: state bits (LDV_STATE_MASK), creation
+ time bits (LDV_CREATE_MASK), and last use time bits (LDV_LAST_MASK).
+ */
+#if SIZEOF_VOID_P == 8
+#define LDV_SHIFT 30
+#define LDV_STATE_MASK 0x1000000000000000
+#define LDV_CREATE_MASK 0x0FFFFFFFC0000000
+#define LDV_LAST_MASK 0x000000003FFFFFFF
+#define LDV_STATE_CREATE 0x0000000000000000
+#define LDV_STATE_USE 0x1000000000000000
+#else
+#define LDV_SHIFT 15
+#define LDV_STATE_MASK 0x40000000
+#define LDV_CREATE_MASK 0x3FFF8000
+#define LDV_LAST_MASK 0x00007FFF
+#define LDV_STATE_CREATE 0x00000000
+#define LDV_STATE_USE 0x40000000
+#endif /* SIZEOF_VOID_P */
+
+/* -----------------------------------------------------------------------------
+ TSO related constants
+ -------------------------------------------------------------------------- */
+
+/*
+ * Constants for the what_next field of a TSO, which indicates how it
+ * is to be run.
+ */
+#define ThreadRunGHC 1 /* return to address on top of stack */
+#define ThreadInterpret 2 /* interpret this thread */
+#define ThreadKilled 3 /* thread has died, don't run it */
+#define ThreadComplete 4 /* thread has finished */
+
+/*
+ * Constants for the why_blocked field of a TSO
+ * NB. keep these in sync with GHC/Conc/Sync.hs: threadStatus
+ */
+#define NotBlocked 0
+#define BlockedOnMVar 1
+#define BlockedOnMVarRead 14 /* TODO: renumber me, see #9003 */
+#define BlockedOnBlackHole 2
+#define BlockedOnRead 3
+#define BlockedOnWrite 4
+#define BlockedOnDelay 5
+#define BlockedOnSTM 6
+
+/* Win32 only: */
+#define BlockedOnDoProc 7
+
+/* Only relevant for THREADED_RTS: */
+#define BlockedOnCCall 10
+#define BlockedOnCCall_Interruptible 11
+ /* same as above but permit killing the worker thread */
+
+/* Involved in a message sent to tso->msg_cap */
+#define BlockedOnMsgThrowTo 12
+
+/* The thread is not on any run queues, but can be woken up
+ by tryWakeupThread() */
+#define ThreadMigrating 13
+
+/* Lightweight non-deadlock checked version of MVar. Used for the why_blocked
+ field of a TSO. Threads blocked for this reason are not forcibly release by
+ the GC, as we expect them to be unblocked in the future based on outstanding
+ IO events. */
+#define BlockedOnIOCompletion 15
+
+/* Next number is 16. */
+
+/*
+ * These constants are returned to the scheduler by a thread that has
+ * stopped for one reason or another. See typedef StgThreadReturnCode
+ * in TSO.h.
+ */
+#define HeapOverflow 1 /* might also be StackOverflow */
+#define StackOverflow 2
+#define ThreadYielding 3
+#define ThreadBlocked 4
+#define ThreadFinished 5
+
+/*
+ * Flags for the tso->flags field.
+ */
+
+/*
+ * TSO_LOCKED is set when a TSO is locked to a particular Capability.
+ */
+#define TSO_LOCKED 2
+
+/*
+ * TSO_BLOCKEX: the TSO is blocking exceptions
+ *
+ * TSO_INTERRUPTIBLE: the TSO can be interrupted if it blocks
+ * interruptibly (eg. with BlockedOnMVar).
+ *
+ * TSO_STOPPED_ON_BREAKPOINT: the thread is currently stopped in a breakpoint
+ */
+#define TSO_BLOCKEX 4
+#define TSO_INTERRUPTIBLE 8
+#define TSO_STOPPED_ON_BREAKPOINT 16
+
+/*
+ * Used by the sanity checker to check whether TSOs are on the correct
+ * mutable list.
+ */
+#define TSO_MARKED 64
+
+/*
+ * Used to communicate between stackSqueeze() and
+ * threadStackOverflow() that a thread's stack was squeezed and the
+ * stack may not need to be expanded.
+ */
+#define TSO_SQUEEZED 128
+
+/*
+ * Enables the AllocationLimitExceeded exception when the thread's
+ * allocation limit goes negative.
+ */
+#define TSO_ALLOC_LIMIT 256
+
+/*
+ * The number of times we spin in a spin lock before yielding (see
+ * #3758). To tune this value, use the benchmark in #3758: run the
+ * server with -N2 and the client both on a dual-core. Also make sure
+ * that the chosen value doesn't slow down any of the parallel
+ * benchmarks in nofib/parallel.
+ */
+#define SPIN_COUNT 1000
+
+/* -----------------------------------------------------------------------------
+ Spare workers per Capability in the threaded RTS
+
+ No more than MAX_SPARE_WORKERS will be kept in the thread pool
+ associated with each Capability.
+ -------------------------------------------------------------------------- */
+
+#define MAX_SPARE_WORKERS 6
+
+/*
+ * The maximum number of NUMA nodes we support. This is a fixed limit so that
+ * we can have static arrays of this size in the RTS for speed.
+ */
+#define MAX_NUMA_NODES 16
diff --git a/rts/include/rts/EventLogFormat.h b/rts/include/rts/EventLogFormat.h
new file mode 100644
index 0000000000..4a2b339a9e
--- /dev/null
+++ b/rts/include/rts/EventLogFormat.h
@@ -0,0 +1,246 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2008-2009
+ *
+ * Event log format
+ *
+ * The log format is designed to be extensible: old tools should be
+ * able to parse (but not necessarily understand all of) new versions
+ * of the format, and new tools will be able to understand old log
+ * files.
+ *
+ * The canonical documentation for the event log format and record layouts is
+ * the "Eventlog encodings" section of the GHC User's Guide.
+ *
+ * To add a new event
+ * ------------------
+ *
+ * - In this file:
+ * - give it a new number, add a new #define EVENT_XXX
+ * below. Do not reuse event ids from deprecated event types.
+ *
+ * - In EventLog.c
+ * - add it to the EventDesc array
+ * - emit the event type in initEventLogging()
+ * - emit the new event in postEvent_()
+ * - generate the event itself by calling postEvent() somewhere
+ *
+ * - Describe the meaning and encoding of the event in the users guide
+ * (docs/user_guide/eventlog-formats.rst)
+ *
+ * - In the Haskell code to parse the event log file:
+ * - add types and code to read the new event
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/*
+ * Markers for begin/end of the Header.
+ */
+#define EVENT_HEADER_BEGIN 0x68647262 /* 'h' 'd' 'r' 'b' */
+#define EVENT_HEADER_END 0x68647265 /* 'h' 'd' 'r' 'e' */
+
+#define EVENT_DATA_BEGIN 0x64617462 /* 'd' 'a' 't' 'b' */
+#define EVENT_DATA_END 0xffff
+
+/*
+ * Markers for begin/end of the list of Event Types in the Header.
+ * Header, Event Type, Begin = hetb
+ * Header, Event Type, End = hete
+ */
+#define EVENT_HET_BEGIN 0x68657462 /* 'h' 'e' 't' 'b' */
+#define EVENT_HET_END 0x68657465 /* 'h' 'e' 't' 'e' */
+
+#define EVENT_ET_BEGIN 0x65746200 /* 'e' 't' 'b' 0 */
+#define EVENT_ET_END 0x65746500 /* 'e' 't' 'e' 0 */
+
+/*
+ * Types of event
+ */
+#define EVENT_CREATE_THREAD 0 /* (thread) */
+#define EVENT_RUN_THREAD 1 /* (thread) */
+#define EVENT_STOP_THREAD 2 /* (thread, status, blockinfo) */
+#define EVENT_THREAD_RUNNABLE 3 /* (thread) */
+#define EVENT_MIGRATE_THREAD 4 /* (thread, new_cap) */
+/* 5, 6, 7 deprecated */
+#define EVENT_THREAD_WAKEUP 8 /* (thread, other_cap) */
+#define EVENT_GC_START 9 /* () */
+#define EVENT_GC_END 10 /* () */
+#define EVENT_REQUEST_SEQ_GC 11 /* () */
+#define EVENT_REQUEST_PAR_GC 12 /* () */
+/* 13, 14 deprecated */
+#define EVENT_CREATE_SPARK_THREAD 15 /* (spark_thread) */
+#define EVENT_LOG_MSG 16 /* (message ...) */
+/* 17 deprecated */
+#define EVENT_BLOCK_MARKER 18 /* (size, end_time, capability) */
+#define EVENT_USER_MSG 19 /* (message ...) */
+#define EVENT_GC_IDLE 20 /* () */
+#define EVENT_GC_WORK 21 /* () */
+#define EVENT_GC_DONE 22 /* () */
+/* 23, 24 used by eden */
+#define EVENT_CAPSET_CREATE 25 /* (capset, capset_type) */
+#define EVENT_CAPSET_DELETE 26 /* (capset) */
+#define EVENT_CAPSET_ASSIGN_CAP 27 /* (capset, cap) */
+#define EVENT_CAPSET_REMOVE_CAP 28 /* (capset, cap) */
+/* the RTS identifier is in the form of "GHC-version rts_way" */
+#define EVENT_RTS_IDENTIFIER 29 /* (capset, name_version_string) */
+/* the vectors in these events are null separated strings */
+#define EVENT_PROGRAM_ARGS 30 /* (capset, commandline_vector) */
+#define EVENT_PROGRAM_ENV 31 /* (capset, environment_vector) */
+#define EVENT_OSPROCESS_PID 32 /* (capset, pid) */
+#define EVENT_OSPROCESS_PPID 33 /* (capset, parent_pid) */
+#define EVENT_SPARK_COUNTERS 34 /* (crt,dud,ovf,cnv,gcd,fiz,rem) */
+#define EVENT_SPARK_CREATE 35 /* () */
+#define EVENT_SPARK_DUD 36 /* () */
+#define EVENT_SPARK_OVERFLOW 37 /* () */
+#define EVENT_SPARK_RUN 38 /* () */
+#define EVENT_SPARK_STEAL 39 /* (victim_cap) */
+#define EVENT_SPARK_FIZZLE 40 /* () */
+#define EVENT_SPARK_GC 41 /* () */
+#define EVENT_INTERN_STRING 42 /* (string, id) {not used by ghc} */
+#define EVENT_WALL_CLOCK_TIME 43 /* (capset, unix_epoch_seconds, nanoseconds) */
+#define EVENT_THREAD_LABEL 44 /* (thread, name_string) */
+#define EVENT_CAP_CREATE 45 /* (cap) */
+#define EVENT_CAP_DELETE 46 /* (cap) */
+#define EVENT_CAP_DISABLE 47 /* (cap) */
+#define EVENT_CAP_ENABLE 48 /* (cap) */
+#define EVENT_HEAP_ALLOCATED 49 /* (heap_capset, alloc_bytes) */
+#define EVENT_HEAP_SIZE 50 /* (heap_capset, size_bytes) */
+#define EVENT_HEAP_LIVE 51 /* (heap_capset, live_bytes) */
+#define EVENT_HEAP_INFO_GHC 52 /* (heap_capset, n_generations,
+ max_heap_size, alloc_area_size,
+ mblock_size, block_size) */
+#define EVENT_GC_STATS_GHC 53 /* (heap_capset, generation,
+ copied_bytes, slop_bytes, frag_bytes,
+ par_n_threads,
+ par_max_copied,
+ par_tot_copied, par_balanced_copied) */
+#define EVENT_GC_GLOBAL_SYNC 54 /* () */
+#define EVENT_TASK_CREATE 55 /* (taskID, cap, tid) */
+#define EVENT_TASK_MIGRATE 56 /* (taskID, cap, new_cap) */
+#define EVENT_TASK_DELETE 57 /* (taskID) */
+#define EVENT_USER_MARKER 58 /* (marker_name) */
+#define EVENT_HACK_BUG_T9003 59 /* Hack: see trac #9003 */
+
+/* Range 60 - 80 is used by eden for parallel tracing
+ * see http://www.mathematik.uni-marburg.de/~eden/
+ */
+
+#define EVENT_MEM_RETURN 90 /* (cap, current_mblocks, needed_mblocks, returned_mblocks) */
+#define EVENT_BLOCKS_SIZE 91 /* (heapcapset, size_bytes) */
+
+/* Range 100 - 139 is reserved for Mercury. */
+
+/* Range 140 - 159 is reserved for Perf events. */
+
+/* Range 160 - 180 is reserved for cost-centre heap profiling events. */
+
+#define EVENT_HEAP_PROF_BEGIN 160
+#define EVENT_HEAP_PROF_COST_CENTRE 161
+#define EVENT_HEAP_PROF_SAMPLE_BEGIN 162
+#define EVENT_HEAP_PROF_SAMPLE_COST_CENTRE 163
+#define EVENT_HEAP_PROF_SAMPLE_STRING 164
+#define EVENT_HEAP_PROF_SAMPLE_END 165
+#define EVENT_HEAP_BIO_PROF_SAMPLE_BEGIN 166
+#define EVENT_PROF_SAMPLE_COST_CENTRE 167
+#define EVENT_PROF_BEGIN 168
+#define EVENT_IPE 169
+
+#define EVENT_USER_BINARY_MSG 181
+
+#define EVENT_CONC_MARK_BEGIN 200
+#define EVENT_CONC_MARK_END 201
+#define EVENT_CONC_SYNC_BEGIN 202
+#define EVENT_CONC_SYNC_END 203
+#define EVENT_CONC_SWEEP_BEGIN 204
+#define EVENT_CONC_SWEEP_END 205
+#define EVENT_CONC_UPD_REM_SET_FLUSH 206
+#define EVENT_NONMOVING_HEAP_CENSUS 207
+
+#define EVENT_TICKY_COUNTER_DEF 210
+#define EVENT_TICKY_COUNTER_SAMPLE 211
+#define EVENT_TICKY_COUNTER_BEGIN_SAMPLE 212
+
+/*
+ * The highest event code +1 that ghc itself emits. Note that some event
+ * ranges higher than this are reserved but not currently emitted by ghc.
+ * This must match the size of the EventDesc[] array in EventLog.c
+ */
+#define NUM_GHC_EVENT_TAGS 213
+
+#if 0 /* DEPRECATED EVENTS: */
+/* we don't actually need to record the thread, it's implicit */
+#define EVENT_RUN_SPARK 5 /* (thread) */
+#define EVENT_STEAL_SPARK 6 /* (thread, victim_cap) */
+/* shutdown replaced by EVENT_CAP_DELETE */
+#define EVENT_SHUTDOWN 7 /* () */
+/* ghc changed how it handles sparks so these are no longer applicable */
+#define EVENT_CREATE_SPARK 13 /* (cap, thread) */
+#define EVENT_SPARK_TO_THREAD 14 /* (cap, thread, spark_thread) */
+#define EVENT_STARTUP 17 /* (num_capabilities) */
+/* these are used by eden but are replaced by new alternatives for ghc */
+#define EVENT_VERSION 23 /* (version_string) */
+#define EVENT_PROGRAM_INVOCATION 24 /* (commandline_string) */
+#endif
+
+/*
+ * Status values for EVENT_STOP_THREAD
+ *
+ * 1-5 are the StgRun return values (from rts/include/Constants.h):
+ *
+ * #define HeapOverflow 1
+ * #define StackOverflow 2
+ * #define ThreadYielding 3
+ * #define ThreadBlocked 4
+ * #define ThreadFinished 5
+ * #define ForeignCall 6
+ * #define BlockedOnMVar 7
+ * #define BlockedOnBlackHole 8
+ * #define BlockedOnRead 9
+ * #define BlockedOnWrite 10
+ * #define BlockedOnDelay 11
+ * #define BlockedOnSTM 12
+ * #define BlockedOnDoProc 13
+ * #define BlockedOnCCall -- not used (see ForeignCall)
+ * #define BlockedOnCCall_NoUnblockExc -- not used (see ForeignCall)
+ * #define BlockedOnMsgThrowTo 16
+ */
+#define THREAD_SUSPENDED_FOREIGN_CALL 6
+
+/*
+ * Capset type values for EVENT_CAPSET_CREATE
+ */
+#define CAPSET_TYPE_CUSTOM 1 /* reserved for end-user applications */
+#define CAPSET_TYPE_OSPROCESS 2 /* caps belong to the same OS process */
+#define CAPSET_TYPE_CLOCKDOMAIN 3 /* caps share a local clock/time */
+
+/*
+ * Heap profile breakdown types. See EVENT_HEAP_PROF_BEGIN.
+ */
+typedef enum {
+ HEAP_PROF_BREAKDOWN_COST_CENTRE = 0x1,
+ HEAP_PROF_BREAKDOWN_MODULE,
+ HEAP_PROF_BREAKDOWN_CLOSURE_DESCR,
+ HEAP_PROF_BREAKDOWN_TYPE_DESCR,
+ HEAP_PROF_BREAKDOWN_RETAINER,
+ HEAP_PROF_BREAKDOWN_BIOGRAPHY,
+ HEAP_PROF_BREAKDOWN_CLOSURE_TYPE,
+ HEAP_PROF_BREAKDOWN_INFO_TABLE
+} HeapProfBreakdown;
+
+#if !defined(EVENTLOG_CONSTANTS_ONLY)
+
+typedef StgWord16 EventTypeNum;
+typedef StgWord64 EventTimestamp; /* in nanoseconds */
+typedef StgWord32 EventThreadID;
+typedef StgWord16 EventCapNo;
+typedef StgWord16 EventPayloadSize; /* variable-size events */
+typedef StgWord16 EventThreadStatus; /* status for EVENT_STOP_THREAD */
+typedef StgWord32 EventCapsetID;
+typedef StgWord16 EventCapsetType; /* types for EVENT_CAPSET_CREATE */
+typedef StgWord64 EventTaskId; /* for EVENT_TASK_* */
+typedef StgWord64 EventKernelThreadId; /* for EVENT_TASK_CREATE */
+
+#define EVENT_PAYLOAD_SIZE_MAX STG_WORD16_MAX
+#endif
diff --git a/rts/include/rts/EventLogWriter.h b/rts/include/rts/EventLogWriter.h
new file mode 100644
index 0000000000..73a2aec64c
--- /dev/null
+++ b/rts/include/rts/EventLogWriter.h
@@ -0,0 +1,75 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2008-2017
+ *
+ * Support for fast binary event logging.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <stddef.h>
+#include <stdbool.h>
+
+/*
+ * Abstraction for writing eventlog data.
+ */
+typedef struct {
+ // Initialize an EventLogWriter (may be NULL)
+ void (* initEventLogWriter) (void);
+
+ // Write a series of events returning true on success.
+ // Note that this may be called by multiple threads simultaneously.
+ // The writer is responsible for concurrency control.
+ bool (* writeEventLog) (void *eventlog, size_t eventlog_size);
+
+ // Flush possibly existing buffers (may be NULL)
+ // Note that this may be called by multiple threads simultaneously.
+ // The writer is responsible for concurrency control.
+ void (* flushEventLog) (void);
+
+ // Close an initialized EventLogOutput (may be NULL)
+ void (* stopEventLogWriter) (void);
+} EventLogWriter;
+
+/*
+ * An EventLogWriter which writes eventlogs to
+ * a file `program.eventlog`.
+ */
+extern const EventLogWriter FileEventLogWriter;
+
+enum EventLogStatus {
+ /* The runtime system wasn't compiled with eventlog support. */
+ EVENTLOG_NOT_SUPPORTED,
+ /* An EventLogWriter has not yet been configured */
+ EVENTLOG_NOT_CONFIGURED,
+ /* An EventLogWriter has been configured and is running. */
+ EVENTLOG_RUNNING,
+};
+
+/*
+ * Query whether the current runtime system supports eventlogging.
+ */
+enum EventLogStatus eventLogStatus(void);
+
+/*
+ * Initialize event logging using the given EventLogWriter.
+ * Returns true on success or false if an EventLogWriter is already configured
+ * or eventlogging isn't supported by the runtime.
+ */
+bool startEventLogging(const EventLogWriter *writer);
+
+/*
+ * Stop event logging and destroy the current EventLogWriter.
+ */
+void endEventLogging(void);
+
+/*
+ * Flush the eventlog. cap can be NULL if one is not held.
+ */
+void flushEventLog(Capability **cap);
diff --git a/rts/include/rts/ExecPage.h b/rts/include/rts/ExecPage.h
new file mode 100644
index 0000000000..4261b71259
--- /dev/null
+++ b/rts/include/rts/ExecPage.h
@@ -0,0 +1,18 @@
+/*
+ * Utilities for managing dynamically-allocated executable pages.
+ */
+
+#pragma once
+
+typedef struct {
+ char contents;
+} ExecPage;
+
+/* Allocate a writable page. */
+ExecPage *allocateExecPage(void);
+
+/* Make a page previously allocated by allocateExecPage. */
+void freezeExecPage(ExecPage *page);
+
+/* Free a page previously allocated by allocateExecPage. */
+void freeExecPage(ExecPage *page);
diff --git a/rts/include/rts/FileLock.h b/rts/include/rts/FileLock.h
new file mode 100644
index 0000000000..3d8056d7a0
--- /dev/null
+++ b/rts/include/rts/FileLock.h
@@ -0,0 +1,37 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2007-2009
+ *
+ * File locking support as required by Haskell
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+/* Note [RTS File locking]
+ * ~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The Haskell report dictates certain file locking behaviour.
+ * This is specified in the Haskell98 report under: 21.2.3 File locking
+ *
+ * GHC does not rely on the platform it's on to implement this.
+ * Instead we keep track of locked files in a data structure in
+ * the RTS. This file provides the interface to this data structure.
+ *
+ * In the base libraries we then use this interface to "lock" files.
+ * This means it's very much still possible for users outside of the
+ * rts/base library to open the files in question even if they are
+ * locked.
+ * */
+
+#pragma once
+
+#include "Stg.h"
+
+/* No valid FD would be negative, so use a word instead of int so the value
+ is compatible with a Windows handle. */
+int lockFile(StgWord64 id, StgWord64 dev, StgWord64 ino, int for_writing);
+int unlockFile(StgWord64 id);
diff --git a/rts/include/rts/Flags.h b/rts/include/rts/Flags.h
new file mode 100644
index 0000000000..11e7bfdaa7
--- /dev/null
+++ b/rts/include/rts/Flags.h
@@ -0,0 +1,330 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Datatypes that holds the command-line flag settings.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include "stg/Types.h"
+#include "Time.h"
+
+/* For defaults, see the @initRtsFlagsDefaults@ routine. */
+
+/* Note [Synchronization of flags and base APIs]
+ *
+ * We provide accessors to RTS flags in base. (GHC.RTS module)
+ * The API should be updated whenever RTS flags are modified.
+ */
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _GC_FLAGS {
+ FILE *statsFile;
+ uint32_t giveStats;
+#define NO_GC_STATS 0
+#define COLLECT_GC_STATS 1
+#define ONELINE_GC_STATS 2
+#define SUMMARY_GC_STATS 3
+#define VERBOSE_GC_STATS 4
+
+ uint32_t maxStkSize; /* in *words* */
+ uint32_t initialStkSize; /* in *words* */
+ uint32_t stkChunkSize; /* in *words* */
+ uint32_t stkChunkBufferSize; /* in *words* */
+
+ uint32_t maxHeapSize; /* in *blocks* */
+ uint32_t minAllocAreaSize; /* in *blocks* */
+ uint32_t largeAllocLim; /* in *blocks* */
+ uint32_t nurseryChunkSize; /* in *blocks* */
+ uint32_t minOldGenSize; /* in *blocks* */
+ uint32_t heapSizeSuggestion; /* in *blocks* */
+ bool heapSizeSuggestionAuto;
+ double oldGenFactor;
+ double returnDecayFactor;
+ double pcFreeHeap;
+
+ bool useNonmoving; // default = false
+ bool nonmovingSelectorOpt; // Do selector optimization in the
+ // non-moving heap, default = false
+ uint32_t generations;
+ bool squeezeUpdFrames;
+
+ bool compact; /* True <=> "compact all the time" */
+ double compactThreshold;
+
+ bool sweep; /* use "mostly mark-sweep" instead of copying
+ * for the oldest generation */
+ bool ringBell;
+
+ Time idleGCDelayTime; /* units: TIME_RESOLUTION */
+ Time interIdleGCWait; /* units: TIME_RESOLUTION */
+ bool doIdleGC;
+
+ Time longGCSync; /* units: TIME_RESOLUTION */
+
+ StgWord heapBase; /* address to ask the OS for memory */
+
+ StgWord allocLimitGrace; /* units: *blocks*
+ * After an AllocationLimitExceeded
+ * exception has been raised, how much
+ * extra space is given to the thread
+ * to handle the exception before we
+ * raise it again.
+ */
+ StgWord heapLimitGrace; /* units: *blocks*
+ * After a HeapOverflow exception has
+ * been raised, how much extra space is
+ * given to the thread to handle the
+ * exception before we raise it again.
+ */
+
+ bool numa; /* Use NUMA */
+ StgWord numaMask;
+} GC_FLAGS;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _DEBUG_FLAGS {
+ /* flags to control debugging output & extra checking in various subsystems */
+ bool scheduler; /* 's' */
+ bool interpreter; /* 'i' */
+ bool weak; /* 'w' */
+ bool gccafs; /* 'G' */
+ bool gc; /* 'g' */
+ bool nonmoving_gc; /* 'n' */
+ bool block_alloc; /* 'b' */
+ bool sanity; /* 'S' warning: might be expensive! */
+ bool zero_on_gc; /* 'Z' */
+ bool stable; /* 't' */
+ bool prof; /* 'p' */
+ bool linker; /* 'l' the object linker */
+ bool apply; /* 'a' */
+ bool stm; /* 'm' */
+ bool squeeze; /* 'z' stack squeezing & lazy blackholing */
+ bool hpc; /* 'c' coverage */
+ bool sparks; /* 'r' */
+ bool numa; /* '--debug-numa' */
+ bool compact; /* 'C' */
+} DEBUG_FLAGS;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _COST_CENTRE_FLAGS {
+ uint32_t doCostCentres;
+# define COST_CENTRES_NONE 0
+# define COST_CENTRES_SUMMARY 1
+# define COST_CENTRES_VERBOSE 2 /* incl. serial time profile */
+# define COST_CENTRES_ALL 3
+# define COST_CENTRES_JSON 4
+
+ int profilerTicks; /* derived */
+ int msecsPerTick; /* derived */
+ char const *outputFileNameStem;
+} COST_CENTRE_FLAGS;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _PROFILING_FLAGS {
+ uint32_t doHeapProfile;
+# define NO_HEAP_PROFILING 0 /* N.B. Used as indexes into arrays */
+# define HEAP_BY_CCS 1
+# define HEAP_BY_MOD 2
+# define HEAP_BY_DESCR 4
+# define HEAP_BY_TYPE 5
+# define HEAP_BY_RETAINER 6
+# define HEAP_BY_LDV 7
+
+# define HEAP_BY_CLOSURE_TYPE 8
+# define HEAP_BY_INFO_TABLE 9
+
+ Time heapProfileInterval; /* time between samples */
+ uint32_t heapProfileIntervalTicks; /* ticks between samples (derived) */
+ bool startHeapProfileAtStartup; /* true if we start profiling from program startup */
+
+
+ bool showCCSOnException;
+
+ uint32_t maxRetainerSetSize;
+
+ uint32_t ccsLength;
+
+ const char* modSelector;
+ const char* descrSelector;
+ const char* typeSelector;
+ const char* ccSelector;
+ const char* ccsSelector;
+ const char* retainerSelector;
+ const char* bioSelector;
+
+} PROFILING_FLAGS;
+
+#define TRACE_NONE 0
+#define TRACE_EVENTLOG 1
+#define TRACE_STDERR 2
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _TRACE_FLAGS {
+ int tracing;
+ bool timestamp; /* show timestamp in stderr output */
+ bool scheduler; /* trace scheduler events */
+ bool gc; /* trace GC events */
+ bool nonmoving_gc; /* trace nonmoving GC events */
+ bool sparks_sampled; /* trace spark events by a sampled method */
+ bool sparks_full; /* trace spark events 100% accurately */
+ bool ticky; /* trace ticky-ticky samples */
+ bool user; /* trace user events (emitted from Haskell code) */
+ Time eventlogFlushTime; /* Time between force eventlog flushes (or 0 if disabled) */
+ int eventlogFlushTicks;
+ char *trace_output; /* output filename for eventlog */
+} TRACE_FLAGS;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _CONCURRENT_FLAGS {
+ Time ctxtSwitchTime; /* units: TIME_RESOLUTION */
+ int ctxtSwitchTicks; /* derived */
+} CONCURRENT_FLAGS;
+
+/*
+ * The tickInterval is the time interval between "ticks", ie.
+ * timer signals (see Timer.{c,h}). It is the frequency at
+ * which we sample CCCS for profiling.
+ *
+ * It is changed by the +RTS -V<secs> flag.
+ */
+#define DEFAULT_TICK_INTERVAL USToTime(10000)
+
+/*
+ * When linkerAlwaysPic is true, the runtime linker assume that all object
+ * files were compiled with -fPIC -fexternal-dynamic-refs and load them
+ * anywhere in the address space.
+ * Note that there is no 32bit darwin system we can realistically expect to
+ * run on or compile for.
+ */
+#if defined(darwin_HOST_OS) || defined(aarch64_HOST_ARCH) || defined(arm_HOST_ARCH)
+#define DEFAULT_LINKER_ALWAYS_PIC true
+#else
+#define DEFAULT_LINKER_ALWAYS_PIC false
+#endif
+
+/* Which I/O Manager to use in the target program. */
+typedef enum _IO_MANAGER { IO_MNGR_NATIVE, IO_MNGR_POSIX } IO_MANAGER;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _MISC_FLAGS {
+ Time tickInterval; /* units: TIME_RESOLUTION */
+ bool install_signal_handlers;
+ bool install_seh_handlers;
+ bool generate_dump_file;
+ bool generate_stack_trace;
+ bool machineReadable;
+ bool disableDelayedOsMemoryReturn; /* See Note [MADV_FREE and MADV_DONTNEED].
+ It's in `MiscFlags` instead of
+ `GcFlags` because if GHC used madvise()
+ memory management for non-GC related
+ tasks in the future, we'd respect it
+ there as well. */
+ bool internalCounters; /* See Note [Internal Counter Stats] */
+ bool linkerAlwaysPic; /* Assume the object code is always PIC */
+ StgWord linkerMemBase; /* address to ask the OS for memory
+ * for the linker, NULL ==> off */
+ IO_MANAGER ioManager; /* The I/O manager to use. */
+ uint32_t numIoWorkerThreads; /* Number of I/O worker threads to use. */
+} MISC_FLAGS;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _PAR_FLAGS {
+ uint32_t nCapabilities; /* number of threads to run simultaneously */
+ bool migrate; /* migrate threads between capabilities */
+ uint32_t maxLocalSparks;
+ bool parGcEnabled; /* enable parallel GC */
+ uint32_t parGcGen; /* do parallel GC in this generation
+ * and higher only */
+ bool parGcLoadBalancingEnabled;
+ /* enable load-balancing in the
+ * parallel GC */
+ uint32_t parGcLoadBalancingGen;
+ /* do load-balancing in this
+ * generation and higher only */
+
+ uint32_t parGcNoSyncWithIdle;
+ /* if a Capability has been idle for
+ * this many GCs, do not try to wake
+ * it up when doing a
+ * non-load-balancing parallel GC.
+ * (zero disables) */
+
+ uint32_t parGcThreads;
+ /* Use this many threads for parallel
+ * GC (default: use all nNodes). */
+
+ bool setAffinity; /* force thread affinity with CPUs */
+} PAR_FLAGS;
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _TICKY_FLAGS {
+ bool showTickyStats;
+ FILE *tickyFile;
+} TICKY_FLAGS;
+
+/* Put them together: */
+
+/* See Note [Synchronization of flags and base APIs] */
+typedef struct _RTS_FLAGS {
+ /* The first portion of RTS_FLAGS is invariant. */
+ GC_FLAGS GcFlags;
+ CONCURRENT_FLAGS ConcFlags;
+ MISC_FLAGS MiscFlags;
+ DEBUG_FLAGS DebugFlags;
+ COST_CENTRE_FLAGS CcFlags;
+ PROFILING_FLAGS ProfFlags;
+ TRACE_FLAGS TraceFlags;
+ TICKY_FLAGS TickyFlags;
+ PAR_FLAGS ParFlags;
+} RTS_FLAGS;
+
+#if defined(COMPILING_RTS_MAIN)
+extern DLLIMPORT RTS_FLAGS RtsFlags;
+#elif IN_STG_CODE
+/* Note [RtsFlags is a pointer in STG code]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * When compiling with IN_STG_CODE the RtsFlags symbol is defined as a pointer.
+ * This is necessary because the C code generator can't generate '&label'.
+ */
+extern RTS_FLAGS RtsFlags[];
+#else
+extern RTS_FLAGS RtsFlags;
+#endif
+
+/*
+ * The printf formats are here, so we are less likely to make
+ * overly-long filenames (with disastrous results). No more than 128
+ * chars, please!
+ */
+
+#define STATS_FILENAME_MAXLEN 128
+
+#define GR_FILENAME_FMT "%0.124s.gr"
+#define HP_FILENAME_FMT "%0.124s.hp"
+#define LIFE_FILENAME_FMT "%0.122s.life"
+#define PROF_FILENAME_FMT "%0.122s.prof"
+#define PROF_FILENAME_FMT_GUM "%0.118s.%03d.prof"
+#define QP_FILENAME_FMT "%0.124s.qp"
+#define STAT_FILENAME_FMT "%0.122s.stat"
+#define TICKY_FILENAME_FMT "%0.121s.ticky"
+#define TIME_FILENAME_FMT "%0.122s.time"
+#define TIME_FILENAME_FMT_GUM "%0.118s.%03d.time"
+
+/* an "int" so as to match normal "argc" */
+/* Now defined in Stg.h (lib/std/cbits need these too.)
+extern int prog_argc;
+extern char **prog_argv;
+*/
+extern int rts_argc; /* ditto */
+extern char **rts_argv;
diff --git a/rts/include/rts/ForeignExports.h b/rts/include/rts/ForeignExports.h
new file mode 100644
index 0000000000..aeb524aebf
--- /dev/null
+++ b/rts/include/rts/ForeignExports.h
@@ -0,0 +1,38 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1995-2009
+ *
+ * Interface to the RTS's foreign export tracking code.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+struct _ObjectCode;
+
+/* N.B. See Note [Tracking foreign exports] in
+ * rts/ForeignExports.c. */
+struct ForeignExportsList {
+ /* a link field for linking these together into lists.
+ */
+ struct ForeignExportsList *next;
+ /* the length of ->exports */
+ int n_entries;
+ /* if the RTS linker loaded the module,
+ * to which ObjectCode these exports belong. */
+ struct _ObjectCode *oc;
+ /* if the RTS linker loaded the module,
+ * this points to an array of length ->n_entries
+ * recording the StablePtr for each export. */
+ StgStablePtr **stable_ptrs;
+ /* the exported closures. of length ->exports. */
+ StgPtr exports[];
+};
+
+void registerForeignExports(struct ForeignExportsList *exports);
+
diff --git a/rts/include/rts/GetTime.h b/rts/include/rts/GetTime.h
new file mode 100644
index 0000000000..53207ce307
--- /dev/null
+++ b/rts/include/rts/GetTime.h
@@ -0,0 +1,16 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1995-2009
+ *
+ * Interface to the RTS time
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+StgWord64 getMonotonicNSec (void);
diff --git a/rts/include/rts/Globals.h b/rts/include/rts/Globals.h
new file mode 100644
index 0000000000..bd3aa637db
--- /dev/null
+++ b/rts/include/rts/Globals.h
@@ -0,0 +1,36 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2006-2009
+ *
+ * The RTS stores some "global" values on behalf of libraries, so that
+ * some libraries can ensure that certain top-level things are shared
+ * even when multiple versions of the library are loaded. e.g. see
+ * Data.Typeable and GHC.Conc.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#define mkStoreAccessorPrototype(name) \
+ StgStablePtr \
+ getOrSet##name(StgStablePtr ptr);
+
+mkStoreAccessorPrototype(GHCConcSignalSignalHandlerStore)
+mkStoreAccessorPrototype(GHCConcWindowsPendingDelaysStore)
+mkStoreAccessorPrototype(GHCConcWindowsIOManagerThreadStore)
+mkStoreAccessorPrototype(GHCConcWindowsProddingStore)
+mkStoreAccessorPrototype(SystemEventThreadEventManagerStore)
+mkStoreAccessorPrototype(SystemEventThreadIOManagerThreadStore)
+mkStoreAccessorPrototype(SystemTimerThreadEventManagerStore)
+mkStoreAccessorPrototype(SystemTimerThreadIOManagerThreadStore)
+mkStoreAccessorPrototype(LibHSghcFastStringTable)
+mkStoreAccessorPrototype(LibHSghcGlobalHasPprDebug)
+mkStoreAccessorPrototype(LibHSghcGlobalHasNoDebugOutput)
+mkStoreAccessorPrototype(LibHSghcGlobalHasNoStateHack)
+extern HsInt ghc_unique_counter;
+extern HsInt ghc_unique_inc;
diff --git a/rts/include/rts/Hpc.h b/rts/include/rts/Hpc.h
new file mode 100644
index 0000000000..85d00ca485
--- /dev/null
+++ b/rts/include/rts/Hpc.h
@@ -0,0 +1,34 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2008-2009
+ *
+ * Haskell Program Coverage
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+// Simple linked list of modules
+typedef struct _HpcModuleInfo {
+ char *modName; // name of module
+ StgWord32 tickCount; // number of ticks
+ StgWord32 hashNo; // Hash number for this module's mix info
+ StgWord64 *tixArr; // tix Array; local for this module
+ bool from_file; // data was read from the .tix file
+ struct _HpcModuleInfo *next;
+} HpcModuleInfo;
+
+void hs_hpc_module (char *modName,
+ StgWord32 modCount,
+ StgWord32 modHashNo,
+ StgWord64 *tixArr);
+
+HpcModuleInfo * hs_hpc_rootModule (void);
+
+void startupHpc(void);
+void exitHpc(void);
diff --git a/rts/include/rts/IOInterface.h b/rts/include/rts/IOInterface.h
new file mode 100644
index 0000000000..9a646cc5cf
--- /dev/null
+++ b/rts/include/rts/IOInterface.h
@@ -0,0 +1,38 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * IO Manager functionality in the RTS
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+#if defined(mingw32_HOST_OS)
+
+#define IO_MANAGER_WAKEUP 0xffffffff
+#define IO_MANAGER_DIE 0xfffffffe
+/* spurious wakeups are returned as zero. */
+/* console events are ((event<<1) | 1). */
+
+int rts_InstallConsoleEvent ( int action, StgStablePtr *handler );
+void rts_ConsoleHandlerDone ( int ev );
+extern StgInt console_handler;
+
+void * getIOManagerEvent (void);
+HsWord32 readIOManagerEvent (void);
+void sendIOManagerEvent (HsWord32 event);
+
+#else
+
+void setIOManagerControlFd (uint32_t cap_no, int fd);
+void setTimerManagerControlFd(int fd);
+void setIOManagerWakeupFd (int fd);
+
+#endif
+
diff --git a/rts/include/rts/IPE.h b/rts/include/rts/IPE.h
new file mode 100644
index 0000000000..81a6d553d0
--- /dev/null
+++ b/rts/include/rts/IPE.h
@@ -0,0 +1,35 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2017-2018
+ *
+ * IPE API
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+
+typedef struct InfoProv_{
+ char * table_name;
+ char * closure_desc;
+ char * ty_desc;
+ char * label;
+ char * module;
+ char * srcloc;
+} InfoProv;
+
+typedef struct InfoProvEnt_ {
+ StgInfoTable * info;
+ InfoProv prov;
+ struct InfoProvEnt_ *link;
+} InfoProvEnt;
+
+extern InfoProvEnt * RTS_VAR(IPE_LIST); // registered IP list
+
+void registerInfoProvList(InfoProvEnt **cc_list);
+InfoProvEnt * lookupIPE(StgInfoTable *info);
diff --git a/rts/include/rts/Libdw.h b/rts/include/rts/Libdw.h
new file mode 100644
index 0000000000..d7bd55d06e
--- /dev/null
+++ b/rts/include/rts/Libdw.h
@@ -0,0 +1,97 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2014-2015
+ *
+ * Producing DWARF-based stacktraces with libdw.
+ *
+ * --------------------------------------------------------------------------*/
+
+#pragma once
+
+// for FILE
+#include <stdio.h>
+
+// Chunk capacity
+// This is rather arbitrary
+#define BACKTRACE_CHUNK_SZ 256
+
+/*
+ * Note [Chunked stack representation]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Consider the stack,
+ * main calls (bottom of stack)
+ * func1 which in turn calls
+ * func2 which calls
+ * func3 which calls
+ * func4 which calls
+ * func5 which calls
+ * func6 which calls
+ * func7 which requests a backtrace (top of stack)
+ *
+ * This would produce the Backtrace (using a smaller chunk size of three for
+ * illustrative purposes),
+ *
+ * Backtrace /----> Chunk /----> Chunk /----> Chunk
+ * last --------/ next --------/ next --------/ next
+ * n_frames=8 n_frames=2 n_frames=3 n_frames=3
+ * ~~~~~~~~~~ ~~~~~~~~~~ ~~~~~~~~~~
+ * func1 func4 func7
+ * main func3 func6
+ * func2 func5
+ *
+ */
+
+/* A chunk of code addresses from an execution stack
+ *
+ * The first address in this list corresponds to the stack frame
+ * nearest to the "top" of the stack.
+ */
+typedef struct BacktraceChunk_ {
+ StgWord n_frames; // number of frames in this chunk
+ struct BacktraceChunk_ *next; // the chunk following this one
+ StgPtr frames[BACKTRACE_CHUNK_SZ]; // the code addresses from the
+ // frames
+} __attribute__((packed)) BacktraceChunk;
+
+/* A chunked list of code addresses from an execution stack
+ *
+ * This structure is optimized for append operations since we append O(stack
+ * depth) times yet typically only traverse the stack trace once. Consequently,
+ * the "top" stack frame (that is, the one where we started unwinding) can be
+ * found in the last chunk. Yes, this is a bit inconsistent with the ordering
+ * within a chunk. See Note [Chunked stack representation] for a depiction.
+ */
+typedef struct Backtrace_ {
+ StgWord n_frames; // Total number of frames in the backtrace
+ BacktraceChunk *last; // The first chunk of frames (corresponding to the
+ // bottom of the stack)
+} Backtrace;
+
+/* Various information describing the location of an address */
+typedef struct Location_ {
+ const char *object_file;
+ const char *function;
+
+ // lineno and colno are only valid if source_file /= NULL
+ const char *source_file;
+ StgWord32 lineno;
+ StgWord32 colno;
+} __attribute__((packed)) Location;
+
+struct LibdwSession_;
+typedef struct LibdwSession_ LibdwSession;
+
+/* Free a backtrace */
+void backtraceFree(Backtrace *bt);
+
+/* Request a backtrace of the current stack state.
+ * May return NULL if a backtrace can't be acquired. */
+Backtrace *libdwGetBacktrace(LibdwSession *session);
+
+/* Lookup Location information for the given address.
+ * Returns 0 if successful, 1 if address could not be found. */
+int libdwLookupLocation(LibdwSession *session, Location *loc, StgPtr pc);
+
+/* Pretty-print a backtrace to the given FILE */
+void libdwPrintBacktrace(LibdwSession *session, FILE *file, Backtrace *bt);
diff --git a/rts/include/rts/LibdwPool.h b/rts/include/rts/LibdwPool.h
new file mode 100644
index 0000000000..76ff41c8c7
--- /dev/null
+++ b/rts/include/rts/LibdwPool.h
@@ -0,0 +1,19 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2015-2016
+ *
+ * A pool of libdw sessions
+ *
+ * --------------------------------------------------------------------------*/
+
+#pragma once
+
+/* Claim a session from the pool */
+LibdwSession *libdwPoolTake(void);
+
+/* Return a session to the pool */
+void libdwPoolRelease(LibdwSession *sess);
+
+/* Free any sessions in the pool forcing a reload of any loaded debug
+ * information */
+void libdwPoolClear(void);
diff --git a/rts/include/rts/Linker.h b/rts/include/rts/Linker.h
new file mode 100644
index 0000000000..1f3719c0c7
--- /dev/null
+++ b/rts/include/rts/Linker.h
@@ -0,0 +1,114 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2009
+ *
+ * RTS Object Linker
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(mingw32_HOST_OS)
+typedef wchar_t pathchar;
+#define PATH_FMT "ls"
+#else
+typedef char pathchar;
+#define PATH_FMT "s"
+#endif
+
+/* Initialize the object linker. Equivalent to initLinker_(1). */
+void initLinker (void);
+
+/* Initialize the object linker.
+ * The retain_cafs argument is:
+ *
+ * non-zero => Retain CAFs unconditionally in linked Haskell code.
+ * Note that this prevents any code from being unloaded.
+ * It should not be necessary unless you are GHCi or
+ * hs-plugins, which needs to be able call any function
+ * in the compiled code.
+ *
+ * zero => Do not retain CAFs. Everything reachable from foreign
+ * exports will be retained, due to the StablePtrs
+ * created by the module initialisation code. unloadObj
+ * frees these StablePtrs, which will allow the CAFs to
+ * be GC'd and the code to be removed.
+ */
+void initLinker_ (int retain_cafs);
+
+/* insert a symbol in the hash table */
+HsInt insertSymbol(pathchar* obj_name, char* key, void* data);
+
+/* lookup a symbol in the hash table */
+void *lookupSymbol( char *lbl );
+
+/* See Linker.c Note [runtime-linker-phases] */
+typedef enum {
+ OBJECT_LOADED,
+ OBJECT_NEEDED,
+ OBJECT_RESOLVED,
+ OBJECT_UNLOADED,
+ OBJECT_DONT_RESOLVE,
+ OBJECT_NOT_LOADED /* The object was either never loaded or has been
+ fully unloaded */
+} OStatus;
+
+/* check object load status */
+OStatus getObjectLoadStatus( pathchar *path );
+
+/* delete an object from the pool */
+HsInt unloadObj( pathchar *path );
+
+/* purge an object's symbols from the symbol table, but don't unload it */
+HsInt purgeObj( pathchar *path );
+
+/* add an obj (populate the global symbol table, but don't resolve yet) */
+HsInt loadObj( pathchar *path );
+
+/* add an arch (populate the global symbol table, but don't resolve yet) */
+HsInt loadArchive( pathchar *path );
+
+/* resolve all the currently unlinked objects in memory */
+HsInt resolveObjs( void );
+
+/* Load an .so using the system linker.
+ Returns a handle that can be passed to dlsym() or NULL on error.
+
+ In the case of error, stores the error message in errmsg. The caller
+ is responsible for freeing it. */
+void *loadNativeObj( pathchar *path, char **errmsg );
+
+/* Mark the .so loaded with the system linker for unloading.
+ The RTS will unload it when all the references to the .so disappear from
+ the heap.
+ Takes the handle returned from loadNativeObj() as an argument. */
+HsInt unloadNativeObj( void *handle );
+
+/* load a dynamic library */
+const char *addDLL( pathchar* dll_name );
+
+/* add a path to the library search path */
+HsPtr addLibrarySearchPath(pathchar* dll_path);
+
+/* removes a directory from the search path,
+ path must have been added using addLibrarySearchPath */
+HsBool removeLibrarySearchPath(HsPtr dll_path_index);
+
+/* give a warning about missing Windows patches that would make
+ the linker work better */
+void warnMissingKBLibraryPaths( void );
+
+/* -----------------------------------------------------------------------------
+* Searches the system directories to determine if there is a system DLL that
+* satisfies the given name. This prevent GHCi from linking against a static
+* library if a DLL is available.
+*/
+pathchar* findSystemLibrary(pathchar* dll_name);
+
+/* called by the initialization code for a module, not a user API */
+StgStablePtr foreignExportStablePtr (StgPtr p);
diff --git a/rts/include/rts/Main.h b/rts/include/rts/Main.h
new file mode 100644
index 0000000000..05924ad92b
--- /dev/null
+++ b/rts/include/rts/Main.h
@@ -0,0 +1,18 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2009
+ *
+ * Entry point for standalone Haskell programs.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+ * The entry point for Haskell programs that use a Haskell main function
+ * -------------------------------------------------------------------------- */
+
+int hs_main (int argc, char *argv[], // program args
+ StgClosure *main_closure, // closure for Main.main
+ RtsConfig rts_config) // RTS configuration
+ GNUC3_ATTRIBUTE(__noreturn__);
diff --git a/rts/include/rts/Messages.h b/rts/include/rts/Messages.h
new file mode 100644
index 0000000000..dbaf37bbc7
--- /dev/null
+++ b/rts/include/rts/Messages.h
@@ -0,0 +1,104 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Message API for use inside the RTS. All messages generated by the
+ * RTS should go through one of the functions declared here, and we
+ * also provide hooks so that messages from the RTS can be redirected
+ * as appropriate.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <stdarg.h>
+
+#if defined(mingw32_HOST_OS) && !defined(__clang__)
+/* On Win64, if we say "printf" then gcc thinks we are going to use
+ MS format specifiers like %I64d rather than %llu */
+#define PRINTF gnu_printf
+#else
+/* However, on OS X, "gnu_printf" isn't recognised */
+#define PRINTF printf
+#endif
+
+/* -----------------------------------------------------------------------------
+ * Message generation
+ * -------------------------------------------------------------------------- */
+
+/*
+ * A fatal internal error: this is for errors that probably indicate
+ * bugs in the RTS or compiler. We normally output bug reporting
+ * instructions along with the error message.
+ *
+ * barf() invokes (*fatalInternalErrorFn)(). This function is not
+ * expected to return.
+ */
+void barf(const char *s, ...)
+ GNUC3_ATTRIBUTE(__noreturn__)
+ GNUC3_ATTRIBUTE(format(PRINTF, 1, 2));
+
+void vbarf(const char *s, va_list ap)
+ GNUC3_ATTRIBUTE(__noreturn__);
+
+// declared in Rts.h:
+// extern void _assertFail(const char *filename, unsigned int linenum)
+// GNUC3_ATTRIBUTE(__noreturn__);
+
+/*
+ * An error condition which is caused by and/or can be corrected by
+ * the user.
+ *
+ * errorBelch() invokes (*errorMsgFn)().
+ */
+void errorBelch(const char *s, ...)
+ GNUC3_ATTRIBUTE(format (PRINTF, 1, 2));
+
+void verrorBelch(const char *s, va_list ap);
+
+/*
+ * An error condition which is caused by and/or can be corrected by
+ * the user, and which has an associated error condition reported
+ * by the system (in errno on Unix, and GetLastError() on Windows).
+ * The system error message is appended to the message generated
+ * from the supplied format string.
+ *
+ * sysErrorBelch() invokes (*sysErrorMsgFn)().
+ */
+void sysErrorBelch(const char *s, ...)
+ GNUC3_ATTRIBUTE(format (PRINTF, 1, 2));
+
+void vsysErrorBelch(const char *s, va_list ap);
+
+/*
+ * A debugging message. Debugging messages are generated either as a
+ * virtue of having DEBUG turned on, or by being explicitly selected
+ * via RTS options (eg. +RTS -Ds).
+ *
+ * debugBelch() invokes (*debugMsgFn)().
+ */
+void debugBelch(const char *s, ...)
+ GNUC3_ATTRIBUTE(format (PRINTF, 1, 2));
+
+void vdebugBelch(const char *s, va_list ap);
+
+
+/* Hooks for redirecting message generation: */
+
+typedef void RtsMsgFunction(const char *, va_list);
+
+extern RtsMsgFunction *fatalInternalErrorFn;
+extern RtsMsgFunction *debugMsgFn;
+extern RtsMsgFunction *errorMsgFn;
+
+/* Default stdio implementation of the message hooks: */
+
+extern RtsMsgFunction rtsFatalInternalErrorFn;
+extern RtsMsgFunction rtsDebugMsgFn;
+extern RtsMsgFunction rtsErrorMsgFn;
+extern RtsMsgFunction rtsSysErrorMsgFn;
diff --git a/rts/include/rts/NonMoving.h b/rts/include/rts/NonMoving.h
new file mode 100644
index 0000000000..35a04fe940
--- /dev/null
+++ b/rts/include/rts/NonMoving.h
@@ -0,0 +1,43 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2018-2019
+ *
+ * Non-moving garbage collector
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * http://ghc.haskell.org/trac/ghc/wiki/Commentary/SourceTree/Includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+// Forward declaration for Stg.h
+struct StgClosure_;
+struct StgThunk_;
+struct Capability_;
+
+/* This is called by the code generator */
+extern DLL_IMPORT_RTS
+void updateRemembSetPushClosure_(StgRegTable *reg, struct StgClosure_ *p);
+
+extern DLL_IMPORT_RTS
+void updateRemembSetPushThunk_(StgRegTable *reg, struct StgThunk_ *p);
+
+// Forward declaration for unregisterised backend.
+EF_(stg_copyArray_barrier);
+
+// Note that RTS code should not condition on this directly by rather
+// use the IF_NONMOVING_WRITE_BARRIER_ENABLED macro to ensure that
+// the barrier is eliminated in the non-threaded RTS.
+extern StgWord DLL_IMPORT_DATA_VAR(nonmoving_write_barrier_enabled);
+
+// A similar macro is defined in rts/include/Cmm.h for C-- code.
+#if defined(THREADED_RTS)
+#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
+ if (RTS_UNLIKELY(nonmoving_write_barrier_enabled))
+#else
+#define IF_NONMOVING_WRITE_BARRIER_ENABLED \
+ if (0)
+#endif
diff --git a/rts/include/rts/OSThreads.h b/rts/include/rts/OSThreads.h
new file mode 100644
index 0000000000..d24a1313a6
--- /dev/null
+++ b/rts/include/rts/OSThreads.h
@@ -0,0 +1,267 @@
+/* ---------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2001-2009
+ *
+ * Accessing OS threads functionality in a (mostly) OS-independent
+ * manner.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * --------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(HAVE_PTHREAD_H) && !defined(mingw32_HOST_OS)
+
+#if defined(CMINUSMINUS)
+
+#define OS_ACQUIRE_LOCK(mutex) foreign "C" pthread_mutex_lock(mutex)
+#define OS_RELEASE_LOCK(mutex) foreign "C" pthread_mutex_unlock(mutex)
+#define OS_ASSERT_LOCK_HELD(mutex) /* nothing */
+
+#else
+
+#include <pthread.h>
+#include <errno.h>
+
+typedef struct {
+ pthread_cond_t cond;
+
+ // Which clock are pthread_cond_timedwait calls referenced against?
+ // N.B. Some older Darwin releases don't support clock_gettime. However, we
+ // do want to reference to CLOCK_MONOTONIC whenever possible as it is more
+ // robust against system time changes and is likely cheaper to query.
+#if defined(HAVE_CLOCK_GETTIME) && defined(HAVE_PTHREAD_CONDATTR_SETCLOCK)
+ clockid_t timeout_clk;
+#endif
+} Condition;
+typedef pthread_mutex_t Mutex;
+typedef pthread_t OSThreadId;
+typedef pthread_key_t ThreadLocalKey;
+
+#define OSThreadProcAttr /* nothing */
+
+#define INIT_COND_VAR PTHREAD_COND_INITIALIZER
+
+#if defined(LOCK_DEBUG)
+#define LOCK_DEBUG_BELCH(what, mutex) \
+ debugBelch("%s(0x%p) %s %d\n", what, mutex, __FILE__, __LINE__)
+#else
+#define LOCK_DEBUG_BELCH(what, mutex) /* nothing */
+#endif
+
+/* Always check the result of lock and unlock. */
+#define OS_ACQUIRE_LOCK(mutex) { \
+ LOCK_DEBUG_BELCH("ACQUIRE_LOCK", mutex); \
+ int __r = pthread_mutex_lock(mutex); \
+ if (__r != 0) { \
+ barf("ACQUIRE_LOCK failed (%s:%d): %d", __FILE__, __LINE__, __r); \
+ } }
+
+// Returns zero if the lock was acquired.
+EXTERN_INLINE int OS_TRY_ACQUIRE_LOCK(pthread_mutex_t *mutex);
+EXTERN_INLINE int OS_TRY_ACQUIRE_LOCK(pthread_mutex_t *mutex)
+{
+ LOCK_DEBUG_BELCH("TRY_ACQUIRE_LOCK", mutex);
+ return pthread_mutex_trylock(mutex);
+}
+
+#define OS_RELEASE_LOCK(mutex) \
+ LOCK_DEBUG_BELCH("RELEASE_LOCK", mutex); \
+ if (pthread_mutex_unlock(mutex) != 0) { \
+ barf("RELEASE_LOCK: I do not own this lock: %s %d", __FILE__,__LINE__); \
+ }
+
+// Note: this assertion calls pthread_mutex_lock() on a mutex that
+// is already held by the calling thread. The mutex should therefore
+// have been created with PTHREAD_MUTEX_ERRORCHECK, otherwise this
+// assertion will hang. We always initialise mutexes with
+// PTHREAD_MUTEX_ERRORCHECK when DEBUG is on (see rts/posix/OSThreads.h).
+#define OS_ASSERT_LOCK_HELD(mutex) ASSERT(pthread_mutex_lock(mutex) == EDEADLK)
+
+#endif // CMINUSMINUS
+
+# elif defined(HAVE_WINDOWS_H)
+
+#if defined(CMINUSMINUS)
+
+/* We jump through a hoop here to get a CCall AcquireSRWLockExclusive
+ and ReleaseSRWLockExclusive, as that's what C-- wants. */
+
+#define OS_ACQUIRE_LOCK(mutex) foreign "stdcall" AcquireSRWLockExclusive(mutex)
+#define OS_RELEASE_LOCK(mutex) foreign "stdcall" ReleaseSRWLockExclusive(mutex)
+#define OS_ASSERT_LOCK_HELD(mutex) /* nothing */
+
+#else // CMINUSMINUS
+
+#include <windows.h>
+#include <synchapi.h>
+
+/* Use native conditional variables coupled with SRW locks, these are more
+ efficient and occur a smaller overhead then emulating them with events.
+ See Note [SRW locks]. */
+typedef CONDITION_VARIABLE Condition;
+typedef DWORD OSThreadId;
+// don't be tempted to use HANDLE as the OSThreadId: there can be
+// many HANDLES to a given thread, so comparison would not work.
+typedef DWORD ThreadLocalKey;
+
+#define OSThreadProcAttr __stdcall
+
+#define INIT_COND_VAR 0
+
+/* Note [SRW locks]
+ We have a choice for implementing Mutexes on Windows. Standard
+ Mutexes are kernel objects that require kernel calls to
+ acquire/release, whereas CriticalSections are spin-locks that block
+ in the kernel after spinning for a configurable number of times.
+ CriticalSections are *much* faster than Mutexes, however not as fast as
+ slim reader/writer locks. CriticalSections also require a 48 byte structure
+ to provide lock re-entrancy. We don't need that because the other primitives
+ used for other platforms don't have this, as such locks are used defensively
+ in the RTS in a way that we don't need re-entrancy. This means that SRW's
+ 8 byte size is much more appropriate. With an 8 byte payload there's a
+ higher chance of it being in your cache line. They're also a lot faster than
+ CriticalSections when multiple threads are involved. CS requires setup and
+ teardown via kernel calls while SRWL is zero-initialized via
+ SRWLOCK_INIT assignment. */
+
+typedef SRWLOCK Mutex;
+
+#if defined(LOCK_DEBUG)
+
+#define OS_ACQUIRE_LOCK(mutex) \
+ debugBelch("ACQUIRE_LOCK(0x%p) %s %d\n", mutex,__FILE__,__LINE__); \
+ AcquireSRWLockExclusive(mutex)
+#define OS_RELEASE_LOCK(mutex) \
+ debugBelch("RELEASE_LOCK(0x%p) %s %d\n", mutex,__FILE__,__LINE__); \
+ ReleaseSRWLockExclusive(mutex)
+#define OS_ASSERT_LOCK_HELD(mutex) /* nothing */
+
+#else
+
+#define OS_ACQUIRE_LOCK(mutex) AcquireSRWLockExclusive(mutex)
+#define OS_TRY_ACQUIRE_LOCK(mutex) (TryAcquireSRWLockExclusive(mutex) == 0)
+#define OS_RELEASE_LOCK(mutex) ReleaseSRWLockExclusive(mutex)
+#define OS_INIT_LOCK(mutex) InitializeSRWLock(mutex)
+#define OS_CLOSE_LOCK(mutex)
+
+// I don't know how to do this. TryEnterCriticalSection() doesn't do
+// the right thing.
+#define OS_ASSERT_LOCK_HELD(mutex) /* nothing */
+
+#endif // LOCK_DEBUG
+
+#endif // CMINUSMINUS
+
+# elif defined(THREADED_RTS)
+# error "Threads not supported"
+# endif
+
+
+#if !defined(CMINUSMINUS)
+//
+// General thread operations
+//
+extern OSThreadId osThreadId ( void );
+extern void shutdownThread ( void ) GNUC3_ATTRIBUTE(__noreturn__);
+extern void yieldThread ( void );
+
+typedef void* OSThreadProcAttr OSThreadProc(void *);
+
+extern int createOSThread ( OSThreadId* tid, char *name,
+ OSThreadProc *startProc, void *param);
+extern bool osThreadIsAlive ( OSThreadId id );
+extern void interruptOSThread ( OSThreadId id );
+extern void joinOSThread ( OSThreadId id );
+
+//
+// Condition Variables
+//
+extern void initCondition ( Condition* pCond );
+extern void closeCondition ( Condition* pCond );
+extern void broadcastCondition ( Condition* pCond );
+extern void signalCondition ( Condition* pCond );
+extern void waitCondition ( Condition* pCond, Mutex* pMut );
+// Returns false on timeout, true otherwise.
+extern bool timedWaitCondition ( Condition* pCond, Mutex* pMut, Time timeout);
+
+//
+// Mutexes
+//
+extern void initMutex ( Mutex* pMut );
+extern void closeMutex ( Mutex* pMut );
+
+//
+// Thread-local storage
+//
+void newThreadLocalKey (ThreadLocalKey *key);
+void *getThreadLocalVar (ThreadLocalKey *key);
+void setThreadLocalVar (ThreadLocalKey *key, void *value);
+void freeThreadLocalKey (ThreadLocalKey *key);
+
+// Processors and affinity
+void setThreadAffinity (uint32_t n, uint32_t m);
+void setThreadNode (uint32_t node);
+void releaseThreadNode (void);
+#endif // !CMINUSMINUS
+
+#if defined(THREADED_RTS)
+
+#define ACQUIRE_LOCK(l) OS_ACQUIRE_LOCK(l)
+#define TRY_ACQUIRE_LOCK(l) OS_TRY_ACQUIRE_LOCK(l)
+#define RELEASE_LOCK(l) OS_RELEASE_LOCK(l)
+#define ASSERT_LOCK_HELD(l) OS_ASSERT_LOCK_HELD(l)
+
+#else
+
+#define ACQUIRE_LOCK(l)
+#define TRY_ACQUIRE_LOCK(l) 0
+#define RELEASE_LOCK(l)
+#define ASSERT_LOCK_HELD(l)
+
+#endif /* defined(THREADED_RTS) */
+
+#if !defined(CMINUSMINUS)
+//
+// Support for forkOS (defined regardless of THREADED_RTS, but does
+// nothing when !THREADED_RTS).
+//
+int forkOS_createThread ( HsStablePtr entry );
+
+//
+// Free any global resources created in OSThreads.
+//
+void freeThreadingResources(void);
+
+//
+// Returns the number of processor cores in the machine
+//
+uint32_t getNumberOfProcessors (void);
+
+//
+// Support for getting at the kernel thread Id for tracing/profiling.
+//
+// This stuff is optional and only used for tracing/profiling purposes, to
+// match up thread ids recorded by other tools. For example, on Linux and OSX
+// the pthread_t type is not the same as the kernel thread id, and system
+// profiling tools like Linux perf, and OSX's DTrace use the kernel thread Id.
+// So if we want to match up RTS tasks with kernel threads recorded by these
+// tools then we need to know the kernel thread Id, and this must be a separate
+// type from the OSThreadId.
+//
+// If the feature cannot be supported on an OS, it is OK to always return 0.
+// In particular it would almost certainly be meaningless on systems not using
+// a 1:1 threading model.
+
+// We use a common serialisable representation on all OSs
+// This is ok for Windows, OSX and Linux.
+typedef StgWord64 KernelThreadId;
+
+// Get the current kernel thread id
+KernelThreadId kernelThreadId (void);
+
+#endif /* CMINUSMINUS */
diff --git a/rts/include/rts/Parallel.h b/rts/include/rts/Parallel.h
new file mode 100644
index 0000000000..7577a3967c
--- /dev/null
+++ b/rts/include/rts/Parallel.h
@@ -0,0 +1,16 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Parallelism-related functionality
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+StgInt newSpark (StgRegTable *reg, StgClosure *p);
diff --git a/rts/include/rts/PosixSource.h b/rts/include/rts/PosixSource.h
new file mode 100644
index 0000000000..13fd7b0ff5
--- /dev/null
+++ b/rts/include/rts/PosixSource.h
@@ -0,0 +1,38 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Include this file into sources which should not need any non-Posix services.
+ * That includes most RTS C sources.
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <ghcplatform.h>
+
+/* We aim for C99 so we need to define following two defines in a consistent way
+ with what POSIX/XOPEN provide for C99. Some OSes are particularly picky about
+ the right versions defined here, e.g. Solaris
+ We also settle on lowest version of POSIX/XOPEN needed for proper C99 support
+ here which is POSIX.1-2001 compilation and Open Group Technical Standard,
+ Issue 6 (XPG6). XPG6 itself is a result of the merge of X/Open and POSIX
+ specification. It is also referred as IEEE Std. 1003.1-2001 or ISO/IEC
+ 9945:2002 or UNIX 03 and SUSv3.
+ Please also see trac ticket #11757 for more information about switch
+ to C99/C11.
+
+ However, the use of `strnlen`, which is strictly speaking only available in
+ IEEE Std 1003.1-2008 (XPG7), requires lifting the bounds, to be able to
+ compile ghc on systems that are strict about enforcing the standard, e.g.
+ Apples mobile platforms.
+
+ Oracle's Solaris 11 supports only up to XPG6, hence the ifdef.
+ */
+
+#if defined(solaris2_HOST_OS)
+#define _POSIX_C_SOURCE 200112L
+#define _XOPEN_SOURCE 600
+#else
+#define _POSIX_C_SOURCE 200809L
+#define _XOPEN_SOURCE 700
+#endif
diff --git a/rts/include/rts/PrimFloat.h b/rts/include/rts/PrimFloat.h
new file mode 100644
index 0000000000..26b6f7ceec
--- /dev/null
+++ b/rts/include/rts/PrimFloat.h
@@ -0,0 +1,17 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Primitive floating-point operations
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+StgDouble __int_encodeDouble (I_ j, I_ e);
+StgFloat __int_encodeFloat (I_ j, I_ e);
+StgDouble __word_encodeDouble (W_ j, I_ e);
+StgFloat __word_encodeFloat (W_ j, I_ e);
diff --git a/rts/include/rts/Profiling.h b/rts/include/rts/Profiling.h
new file mode 100644
index 0000000000..b329a493db
--- /dev/null
+++ b/rts/include/rts/Profiling.h
@@ -0,0 +1,17 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2017-2018
+ *
+ * Cost-centre profiling API
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+void registerCcList(CostCentre **cc_list);
+void registerCcsList(CostCentreStack **cc_list);
diff --git a/rts/include/rts/Signals.h b/rts/include/rts/Signals.h
new file mode 100644
index 0000000000..96f0756538
--- /dev/null
+++ b/rts/include/rts/Signals.h
@@ -0,0 +1,23 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * RTS signal handling
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* NB. #included in Haskell code, no prototypes in here. */
+
+/* arguments to stg_sig_install() */
+#define STG_SIG_DFL (-1)
+#define STG_SIG_IGN (-2)
+#define STG_SIG_ERR (-3)
+#define STG_SIG_HAN (-4)
+#define STG_SIG_RST (-5)
diff --git a/rts/include/rts/SpinLock.h b/rts/include/rts/SpinLock.h
new file mode 100644
index 0000000000..c1fe6c866c
--- /dev/null
+++ b/rts/include/rts/SpinLock.h
@@ -0,0 +1,75 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2006-2009
+ *
+ * Spin locks
+ *
+ * These are simple spin-only locks as opposed to Mutexes which
+ * probably spin for a while before blocking in the kernel. We use
+ * these when we are sure that all our threads are actively running on
+ * a CPU, eg. in the GC.
+ *
+ * TODO: measure whether we really need these, or whether Mutexes
+ * would do (and be a bit safer if a CPU becomes loaded).
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+#if defined(THREADED_RTS)
+
+typedef struct SpinLock_
+{
+ StgWord lock;
+#if defined(PROF_SPIN)
+ StgWord64 spin; // incremented every time we spin in ACQUIRE_SPIN_LOCK
+ StgWord64 yield; // incremented every time we yield in ACQUIRE_SPIN_LOCK
+#endif
+} SpinLock;
+
+// PROF_SPIN enables counting the number of times we spin on a lock
+#if defined(PROF_SPIN)
+#define IF_PROF_SPIN(x) x
+#else
+#define IF_PROF_SPIN(x)
+#endif
+
+void acquire_spin_lock_slow_path(SpinLock * p);
+
+// acquire spin lock
+INLINE_HEADER void ACQUIRE_SPIN_LOCK(SpinLock * p)
+{
+ StgWord32 r = cas((StgVolatilePtr)&(p->lock), 1, 0);
+ if (RTS_UNLIKELY(r == 0))
+ acquire_spin_lock_slow_path(p);
+}
+
+// release spin lock
+INLINE_HEADER void RELEASE_SPIN_LOCK(SpinLock * p)
+{
+ RELEASE_STORE(&p->lock, 1);
+}
+
+// initialise spin lock
+INLINE_HEADER void initSpinLock(SpinLock * p)
+{
+ IF_PROF_SPIN(p->spin = 0);
+ IF_PROF_SPIN(p->yield = 0);
+ RELEASE_STORE(&p->lock, 1);
+}
+
+#else /* !THREADED_RTS */
+
+// Using macros here means we don't have to ensure the argument is in scope
+#define ACQUIRE_SPIN_LOCK(p) /* nothing */
+#define RELEASE_SPIN_LOCK(p) /* nothing */
+
+INLINE_HEADER void initSpinLock(void * p STG_UNUSED)
+{ /* nothing */ }
+
+#endif /* THREADED_RTS */
diff --git a/rts/include/rts/StableName.h b/rts/include/rts/StableName.h
new file mode 100644
index 0000000000..4e4f976dae
--- /dev/null
+++ b/rts/include/rts/StableName.h
@@ -0,0 +1,32 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Stable Names
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+ PRIVATE from here.
+ -------------------------------------------------------------------------- */
+
+typedef struct {
+ StgPtr addr; // Haskell object when entry is in use, next free
+ // entry (NULL when this is the last free entry)
+ // otherwise. May be NULL temporarily during GC (when
+ // pointee dies).
+
+ StgPtr old; // Old Haskell object, used during GC
+
+ StgClosure *sn_obj; // The StableName object, or NULL when the entry is
+ // free
+} snEntry;
+
+extern DLL_IMPORT_RTS snEntry *stable_name_table;
diff --git a/rts/include/rts/StablePtr.h b/rts/include/rts/StablePtr.h
new file mode 100644
index 0000000000..73cd5bed4d
--- /dev/null
+++ b/rts/include/rts/StablePtr.h
@@ -0,0 +1,39 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Stable Pointers
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+ATTR_ALWAYS_INLINE EXTERN_INLINE StgPtr deRefStablePtr (StgStablePtr stable_ptr);
+StgStablePtr getStablePtr (StgPtr p);
+
+/* -----------------------------------------------------------------------------
+ PRIVATE from here.
+ -------------------------------------------------------------------------- */
+
+typedef struct {
+ StgPtr addr; // Haskell object when entry is in use, next free
+ // entry (NULL when this is the last free entry)
+ // otherwise.
+} spEntry;
+
+extern DLL_IMPORT_RTS spEntry *stable_ptr_table;
+
+ATTR_ALWAYS_INLINE EXTERN_INLINE
+StgPtr deRefStablePtr(StgStablePtr sp)
+{
+ // acquire load to ensure that we see the new SPT if it has been recently
+ // enlarged.
+ const spEntry *spt = ACQUIRE_LOAD(&stable_ptr_table);
+ // acquire load to ensure that the referenced object is visible.
+ return ACQUIRE_LOAD(&spt[(StgWord)sp].addr);
+}
diff --git a/rts/include/rts/StaticPtrTable.h b/rts/include/rts/StaticPtrTable.h
new file mode 100644
index 0000000000..5753e957bd
--- /dev/null
+++ b/rts/include/rts/StaticPtrTable.h
@@ -0,0 +1,44 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2008-2009
+ *
+ * Initialization of the Static Pointer Table
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/** Inserts an entry in the Static Pointer Table.
+ *
+ * The key is a fingerprint computed from the static pointer and the spe_closure
+ * is a pointer to the closure defining the table entry.
+ *
+ * A stable pointer to the closure is made to prevent it from being garbage
+ * collected while the entry exists on the table.
+ *
+ * This function is called from the code generated by
+ * compiler/deSugar/StaticPtrTable.sptInitCode
+ *
+ * */
+void hs_spt_insert (StgWord64 key[2],void* spe_closure);
+
+/** Inserts an entry for a StgTablePtr in the Static Pointer Table.
+ *
+ * This function is called from the GHCi interpreter to insert
+ * SPT entries for bytecode objects.
+ *
+ * */
+void hs_spt_insert_stableptr(StgWord64 key[2], StgStablePtr *entry);
+
+/** Removes an entry from the Static Pointer Table.
+ *
+ * This function is called from the code generated by
+ * compiler/deSugar/StaticPtrTable.sptInitCode
+ *
+ * */
+void hs_spt_remove (StgWord64 key[2]);
diff --git a/rts/include/rts/TSANUtils.h b/rts/include/rts/TSANUtils.h
new file mode 100644
index 0000000000..72f4541a89
--- /dev/null
+++ b/rts/include/rts/TSANUtils.h
@@ -0,0 +1,67 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2006-2019
+ *
+ * Utilities for annotating "safe" data races for Thread Sanitizer
+ * -------------------------------------------------------------------------- */
+
+/*
+ * Note [ThreadSanitizer]
+ * ~~~~~~~~~~~~~~~~~~~~~~~
+ * ThreadSanitizer (abbreviated TSAN) is a library and set of compiler
+ * instrumentation (supported by both GCC and Clang) for checking C/C++ code
+ * for data races.
+ *
+ * In GHC we use it to check the runtime system implementation (but not yet
+ * generated code). TSAN requires that the checked program uses C++11-style
+ * atomics for all potentially-racing accesses. Note that we use the __atomic_*
+ * builtin operations but not the C11 _Atomic types to maintain compatibility
+ * with older compilers.
+ *
+ * In addition to the atomic operations themselves, TSAN provides a variety of
+ * annotation operations which can be used to annotate cases where the
+ * intended semantics are either ambiguous or intentionally racy (known as a
+ * *benign race*).
+ *
+ * Finally, there are a few benign races which we can't easily annotate. To
+ * silence these errors we have a suppressions file in rts/.tsan-suppressions.
+ * In general it's best to add suppressions only as a last resort, when the
+ * more precise annotation functions prove to be insufficient.
+ *
+ * Users guide: https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual
+ */
+
+#if defined(__SANITIZE_THREAD__)
+#define TSAN_ENABLED
+#elif defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#define TSAN_ENABLED
+#endif
+#endif
+
+#if defined(TSAN_ENABLED)
+#if !defined(HAVE_C11_ATOMICS)
+#error TSAN cannot be enabled without C11 atomics suppoort.
+#endif
+
+#define TSAN_ANNOTATE_HAPPENS_BEFORE(addr) \
+ AnnotateHappensBefore(__FILE__, __LINE__, (void*)(addr))
+#define TSAN_ANNOTATE_HAPPENS_AFTER(addr) \
+ AnnotateHappensAfter(__FILE__, __LINE__, (void*)(addr))
+#define TSAN_ANNOTATE_BENIGN_RACE_SIZED(addr,size,desc) \
+ AnnotateBenignRaceSized(__FILE__, __LINE__, (void*)(addr), size, desc)
+void AnnotateHappensBefore(const char* f, int l, void* addr);
+void AnnotateHappensAfter(const char* f, int l, void* addr);
+void AnnotateBenignRaceSized(const char *file,
+ int line,
+ const volatile void *mem,
+ long size,
+ const char *description);
+#else
+#define TSAN_ANNOTATE_HAPPENS_BEFORE(addr)
+#define TSAN_ANNOTATE_HAPPENS_AFTER(addr)
+#define TSAN_ANNOTATE_BENIGN_RACE_SIZED(addr,size,desc)
+#endif
+
+#define TSAN_ANNOTATE_BENIGN_RACE(addr,desc) \
+ TSAN_ANNOTATE_BENIGN_RACE_SIZED((void*)(addr), sizeof(*addr), desc)
diff --git a/rts/include/rts/TTY.h b/rts/include/rts/TTY.h
new file mode 100644
index 0000000000..9892571a17
--- /dev/null
+++ b/rts/include/rts/TTY.h
@@ -0,0 +1,17 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2009
+ *
+ * POSIX TTY-related functionality
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+void* __hscore_get_saved_termios(int fd);
+void __hscore_set_saved_termios(int fd, void* ts);
diff --git a/rts/include/rts/Threads.h b/rts/include/rts/Threads.h
new file mode 100644
index 0000000000..51c11742ca
--- /dev/null
+++ b/rts/include/rts/Threads.h
@@ -0,0 +1,79 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team 1998-2009
+ *
+ * External API for the scheduler. For most uses, the functions in
+ * RtsAPI.h should be enough.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(HAVE_SYS_TYPES_H)
+#include <sys/types.h>
+#endif
+
+//
+// Creating threads
+//
+StgTSO *createThread (Capability *cap, W_ stack_size);
+
+// precondition:
+// (*cap)->running_task != NULL
+// (*cap)->running_task must be a bound task (e.g. newBoundTask() has been
+// called on that thread).
+void scheduleWaitThread (/* in */ StgTSO *tso,
+ /* out */ HaskellObj* ret,
+ /* inout */ Capability **cap);
+
+StgTSO *createGenThread (Capability *cap, W_ stack_size,
+ StgClosure *closure);
+StgTSO *createIOThread (Capability *cap, W_ stack_size,
+ StgClosure *closure);
+StgTSO *createStrictIOThread (Capability *cap, W_ stack_size,
+ StgClosure *closure);
+
+// Suspending/resuming threads around foreign calls
+void * suspendThread (StgRegTable *, bool interruptible);
+StgRegTable * resumeThread (void *);
+
+//
+// Thread operations from Threads.c
+//
+bool eq_thread (StgPtr tso1, StgPtr tso2);
+int cmp_thread (StgPtr tso1, StgPtr tso2);
+long rts_getThreadId (StgPtr tso);
+void rts_enableThreadAllocationLimit (StgPtr tso);
+void rts_disableThreadAllocationLimit (StgPtr tso);
+
+#if !defined(mingw32_HOST_OS)
+pid_t forkProcess (HsStablePtr *entry);
+#else
+pid_t forkProcess (HsStablePtr *entry)
+ GNU_ATTRIBUTE(__noreturn__);
+#endif
+
+HsBool rtsSupportsBoundThreads (void);
+
+// The number of Capabilities.
+// ToDo: I would like this to be private to the RTS and instead expose a
+// function getNumCapabilities(), but it is used in compiler/cbits/genSym.c
+extern unsigned int n_capabilities;
+
+// The number of Capabilities that are not disabled
+extern uint32_t enabled_capabilities;
+
+#if !IN_STG_CODE
+extern Capability MainCapability;
+#endif
+
+//
+// Change the number of capabilities (only supports increasing the
+// current value at the moment).
+//
+extern void setNumCapabilities (uint32_t new_);
diff --git a/rts/include/rts/Ticky.h b/rts/include/rts/Ticky.h
new file mode 100644
index 0000000000..93043d8514
--- /dev/null
+++ b/rts/include/rts/Ticky.h
@@ -0,0 +1,32 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * TICKY_TICKY types
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+ The StgEntCounter type - needed regardless of TICKY_TICKY
+ -------------------------------------------------------------------------- */
+
+typedef struct _StgEntCounter {
+ /* Using StgWord for everything, because both the C and asm code
+ generators make trouble if you try to pack things tighter */
+ StgWord registeredp; /* 0 == no, 1 == yes */
+ StgInt arity; /* arity (static info) */
+ StgInt allocd; /* # allocation of this closure */
+ /* (rest of args are in registers) */
+ char *str; /* name of the thing */
+ char *arg_kinds; /* info about the args types */
+ StgInt entry_count; /* Trips to fast entry code */
+ StgInt allocs; /* number of allocations by this fun */
+ struct _StgEntCounter *link;/* link to chain them all together */
+} StgEntCounter;
diff --git a/rts/include/rts/Time.h b/rts/include/rts/Time.h
new file mode 100644
index 0000000000..ab291cd6c5
--- /dev/null
+++ b/rts/include/rts/Time.h
@@ -0,0 +1,45 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Time values in the RTS
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * --------------------------------------------------------------------------*/
+
+#pragma once
+
+// For most time values in the RTS we use a fixed resolution of nanoseconds,
+// normalising the time we get from platform-dependent APIs to this
+// resolution.
+#define TIME_RESOLUTION 1000000000
+typedef int64_t Time;
+
+#define TIME_MAX HS_INT64_MAX
+
+#if TIME_RESOLUTION == 1000000000
+// I'm being lazy, but it's awkward to define fully general versions of these
+#define TimeToMS(t) ((t) / 1000000)
+#define TimeToUS(t) ((t) / 1000)
+#define TimeToNS(t) (t)
+#define MSToTime(t) ((Time)(t) * 1000000)
+#define USToTime(t) ((Time)(t) * 1000)
+#define NSToTime(t) ((Time)(t))
+#else
+#error Fix TimeToNS(), TimeToUS() etc.
+#endif
+
+#define SecondsToTime(t) ((Time)(t) * TIME_RESOLUTION)
+#define TimeToSeconds(t) ((t) / TIME_RESOLUTION)
+#define TimeToSecondsDbl(t) ((double)(t) / TIME_RESOLUTION)
+
+// Use instead of SecondsToTime() when we have a floating-point
+// seconds value, to avoid truncating it.
+INLINE_HEADER Time fsecondsToTime (double t)
+{
+ return (Time)(t * TIME_RESOLUTION);
+}
+
+Time getProcessElapsedTime (void);
diff --git a/rts/include/rts/Timer.h b/rts/include/rts/Timer.h
new file mode 100644
index 0000000000..c60cd37590
--- /dev/null
+++ b/rts/include/rts/Timer.h
@@ -0,0 +1,18 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1995-2009
+ *
+ * Interface to the RTS timer signal (uses OS-dependent Ticker.h underneath)
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+void startTimer (void);
+void stopTimer (void);
+int rtsTimerSignal (void);
diff --git a/rts/include/rts/Types.h b/rts/include/rts/Types.h
new file mode 100644
index 0000000000..51e8f80e66
--- /dev/null
+++ b/rts/include/rts/Types.h
@@ -0,0 +1,31 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * RTS-specific types.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <stddef.h>
+#include <stdbool.h>
+
+// Deprecated, use uint32_t instead.
+typedef unsigned int nat __attribute__((deprecated)); /* uint32_t */
+
+/* ullong (64|128-bit) type: only include if needed (not ANSI) */
+#if defined(__GNUC__)
+#define LL(x) (x##LL)
+#else
+#define LL(x) (x##L)
+#endif
+
+typedef struct StgClosure_ StgClosure;
+typedef struct StgInfoTable_ StgInfoTable;
+typedef struct StgTSO_ StgTSO;
diff --git a/rts/include/rts/Utils.h b/rts/include/rts/Utils.h
new file mode 100644
index 0000000000..4aee9c3a67
--- /dev/null
+++ b/rts/include/rts/Utils.h
@@ -0,0 +1,16 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * RTS external APIs. This file declares everything that the GHC RTS
+ * exposes externally.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* Alternate to raise(3) for threaded rts, for BSD-based OSes */
+int genericRaise(int sig);
diff --git a/rts/include/rts/prof/CCS.h b/rts/include/rts/prof/CCS.h
new file mode 100644
index 0000000000..7685f03003
--- /dev/null
+++ b/rts/include/rts/prof/CCS.h
@@ -0,0 +1,226 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2009-2012
+ *
+ * Macros for profiling operations in STG code
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------------*/
+/*
+ * Note [struct alignment]
+ * NB. be careful to avoid unwanted padding between fields, by
+ * putting the 8-byte fields on an 8-byte boundary. Padding can
+ * vary between C compilers, and we don't take into account any
+ * possible padding when generating CCS and CC decls in the code
+ * generator (GHC.StgToCmm.Prof).
+ */
+
+typedef struct CostCentre_ {
+ StgInt ccID; // Unique Id, allocated by the RTS
+
+ char * label;
+ char * module;
+ char * srcloc;
+
+ // used for accumulating costs at the end of the run...
+ StgWord64 mem_alloc; // align 8 (Note [struct alignment])
+ StgWord time_ticks;
+
+ StgBool is_caf; // true <=> CAF cost centre
+
+ struct CostCentre_ *link;
+} CostCentre;
+
+typedef struct CostCentreStack_ {
+ StgInt ccsID; // unique ID, allocated by the RTS
+
+ CostCentre *cc; // Cost centre at the top of the stack
+
+ struct CostCentreStack_ *prevStack; // parent
+ struct IndexTable_ *indexTable; // children
+ struct CostCentreStack_ *root; // root of stack
+ StgWord depth; // number of items in the stack
+
+ StgWord64 scc_count; // Count of times this CCS is entered
+ // align 8 (Note [struct alignment])
+
+ StgWord selected; // is this CCS shown in the heap
+ // profile? (zero if excluded via -hc
+ // -hm etc.)
+
+ StgWord time_ticks; // number of time ticks accumulated by
+ // this CCS
+
+ StgWord64 mem_alloc; // mem allocated by this CCS
+ // align 8 (Note [struct alignment])
+
+ StgWord64 inherited_alloc; // sum of mem_alloc over all children
+ // (calculated at the end)
+ // align 8 (Note [struct alignment])
+
+ StgWord inherited_ticks; // sum of time_ticks over all children
+ // (calculated at the end)
+} CostCentreStack;
+
+
+/* -----------------------------------------------------------------------------
+ * Start and stop the profiling timer. These can be called from
+ * Haskell to restrict the profile to portion(s) of the execution.
+ * See the module GHC.Profiling.
+ * ---------------------------------------------------------------------------*/
+
+void stopProfTimer ( void );
+void startProfTimer ( void );
+
+/* -----------------------------------------------------------------------------
+ * The rest is PROFILING only...
+ * ---------------------------------------------------------------------------*/
+
+#if defined(PROFILING)
+
+/* -----------------------------------------------------------------------------
+ * Constants
+ * ---------------------------------------------------------------------------*/
+
+#define EMPTY_STACK NULL
+#define EMPTY_TABLE NULL
+
+/* Constants used to set is_caf flag on CostCentres */
+#define CC_IS_CAF true
+#define CC_NOT_CAF false
+/* -----------------------------------------------------------------------------
+ * Data Structures
+ * ---------------------------------------------------------------------------*/
+
+// IndexTable is the list of children of a CCS. (Alternatively it is a
+// cache of the results of pushing onto a CCS, so that the second and
+// subsequent times we push a certain CC on a CCS we get the same
+// result).
+
+typedef struct IndexTable_ {
+ // Just a linked list of (cc, ccs) pairs, where the `ccs` is the result of
+ // pushing `cc` to the owner of the index table (another CostCentreStack).
+ CostCentre *cc;
+ CostCentreStack *ccs;
+ struct IndexTable_ *next;
+ // back_edge is true when `cc` is already in the stack, so pushing it
+ // truncates or drops (see RECURSION_DROPS and RECURSION_TRUNCATES in
+ // Profiling.c).
+ bool back_edge;
+} IndexTable;
+
+
+/* -----------------------------------------------------------------------------
+ Pre-defined cost centres and cost centre stacks
+ -------------------------------------------------------------------------- */
+
+#if IN_STG_CODE
+
+extern StgWord CC_MAIN[];
+extern StgWord CCS_MAIN[]; // Top CCS
+
+extern StgWord CC_SYSTEM[];
+extern StgWord CCS_SYSTEM[]; // RTS costs
+
+extern StgWord CC_GC[];
+extern StgWord CCS_GC[]; // Garbage collector costs
+
+extern StgWord CC_OVERHEAD[];
+extern StgWord CCS_OVERHEAD[]; // Profiling overhead
+
+extern StgWord CC_DONT_CARE[];
+extern StgWord CCS_DONT_CARE[]; // CCS attached to static constructors
+
+#else
+
+extern CostCentre CC_MAIN[];
+extern CostCentreStack CCS_MAIN[]; // Top CCS
+
+extern CostCentre CC_SYSTEM[];
+extern CostCentreStack CCS_SYSTEM[]; // RTS costs
+
+extern CostCentre CC_GC[];
+extern CostCentreStack CCS_GC[]; // Garbage collector costs
+
+extern CostCentre CC_OVERHEAD[];
+extern CostCentreStack CCS_OVERHEAD[]; // Profiling overhead
+
+extern CostCentre CC_DONT_CARE[];
+extern CostCentreStack CCS_DONT_CARE[]; // shouldn't ever get set
+
+extern CostCentre CC_PINNED[];
+extern CostCentreStack CCS_PINNED[]; // pinned memory
+
+extern CostCentre CC_IDLE[];
+extern CostCentreStack CCS_IDLE[]; // capability is idle
+
+#endif /* IN_STG_CODE */
+
+extern unsigned int RTS_VAR(era);
+
+/* -----------------------------------------------------------------------------
+ * Functions
+ * ---------------------------------------------------------------------------*/
+
+CostCentreStack * pushCostCentre (CostCentreStack *, CostCentre *);
+void enterFunCCS (StgRegTable *reg, CostCentreStack *);
+CostCentre *mkCostCentre (char *label, char *module, char *srcloc);
+
+extern CostCentre * RTS_VAR(CC_LIST); // registered CC list
+
+/* -----------------------------------------------------------------------------
+ * Declaring Cost Centres & Cost Centre Stacks.
+ * -------------------------------------------------------------------------- */
+
+# define CC_DECLARE(cc_ident,name,mod,loc,caf,is_local) \
+ is_local CostCentre cc_ident[1] \
+ = {{ .ccID = 0, \
+ .label = name, \
+ .module = mod, \
+ .srcloc = loc, \
+ .time_ticks = 0, \
+ .mem_alloc = 0, \
+ .link = 0, \
+ .is_caf = caf \
+ }};
+
+# define CCS_DECLARE(ccs_ident,cc_ident,is_local) \
+ is_local CostCentreStack ccs_ident[1] \
+ = {{ .ccsID = 0, \
+ .cc = cc_ident, \
+ .prevStack = NULL, \
+ .indexTable = NULL, \
+ .root = NULL, \
+ .depth = 0, \
+ .selected = 0, \
+ .scc_count = 0, \
+ .time_ticks = 0, \
+ .mem_alloc = 0, \
+ .inherited_ticks = 0, \
+ .inherited_alloc = 0 \
+ }};
+
+/* -----------------------------------------------------------------------------
+ * Time / Allocation Macros
+ * ---------------------------------------------------------------------------*/
+
+/* eliminate profiling overhead from allocation costs */
+#define CCS_ALLOC(ccs, size) (ccs)->mem_alloc += ((size)-sizeofW(StgProfHeader))
+#define ENTER_CCS_THUNK(cap,p) cap->r.rCCCS = p->header.prof.ccs
+
+#else /* !PROFILING */
+
+#define CCS_ALLOC(ccs, amount) doNothing()
+#define ENTER_CCS_THUNK(cap,p) doNothing()
+
+#endif /* PROFILING */
diff --git a/rts/include/rts/prof/Heap.h b/rts/include/rts/prof/Heap.h
new file mode 100644
index 0000000000..90700c809b
--- /dev/null
+++ b/rts/include/rts/prof/Heap.h
@@ -0,0 +1,24 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow, 2009
+ *
+ * Heap Census Profiling
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+ * Fine-grained control over heap census profiling which can be called from
+ * Haskell to restrict the profile to portion(s) of the execution.
+ * See the module GHC.Profiling.
+ * ---------------------------------------------------------------------------*/
+
+void requestHeapCensus ( void );
+void startHeapProfTimer ( void );
+void stopHeapProfTimer ( void );
diff --git a/rts/include/rts/prof/LDV.h b/rts/include/rts/prof/LDV.h
new file mode 100644
index 0000000000..73f7786537
--- /dev/null
+++ b/rts/include/rts/prof/LDV.h
@@ -0,0 +1,44 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow, 2009
+ *
+ * Lag/Drag/Void profiling.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(PROFILING)
+
+/* retrieves the LDV word from closure c */
+#define LDVW(c) (((StgClosure *)(c))->header.prof.hp.ldvw)
+
+/*
+ * Stores the creation time for closure c.
+ * This macro is called at the very moment of closure creation.
+ *
+ * NOTE: this initializes LDVW(c) to zero, which ensures that there
+ * is no conflict between retainer profiling and LDV profiling,
+ * because retainer profiling also expects LDVW(c) to be initialised
+ * to zero.
+ */
+
+#if defined(CMINUSMINUS)
+
+#else
+
+#define LDV_RECORD_CREATE(c) \
+ LDVW((c)) = ((StgWord)RTS_DEREF(era) << LDV_SHIFT) | LDV_STATE_CREATE
+
+#endif
+
+#else /* !PROFILING */
+
+#define LDV_RECORD_CREATE(c) /* nothing */
+
+#endif /* PROFILING */
diff --git a/rts/include/rts/storage/Block.h b/rts/include/rts/storage/Block.h
new file mode 100644
index 0000000000..730947e375
--- /dev/null
+++ b/rts/include/rts/storage/Block.h
@@ -0,0 +1,368 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-1999
+ *
+ * Block structure for the storage manager
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "ghcconfig.h"
+
+/* The actual block and megablock-size constants are defined in
+ * rts/include/Constants.h, all constants here are derived from these.
+ */
+
+/* Block related constants (BLOCK_SHIFT is defined in Constants.h) */
+
+#if SIZEOF_LONG == SIZEOF_VOID_P
+#define UNIT 1UL
+#elif SIZEOF_LONG_LONG == SIZEOF_VOID_P
+#define UNIT 1ULL
+#else
+#error "Size of pointer is suspicious."
+#endif
+
+#if defined(CMINUSMINUS)
+#define BLOCK_SIZE (1<<BLOCK_SHIFT)
+#else
+#define BLOCK_SIZE (UNIT<<BLOCK_SHIFT)
+// Note [integer overflow]
+#endif
+
+#define BLOCK_SIZE_W (BLOCK_SIZE/sizeof(W_))
+#define BLOCK_MASK (BLOCK_SIZE-1)
+
+#define BLOCK_ROUND_UP(p) (((W_)(p)+BLOCK_SIZE-1) & ~BLOCK_MASK)
+#define BLOCK_ROUND_DOWN(p) ((void *) ((W_)(p) & ~BLOCK_MASK))
+
+/* Megablock related constants (MBLOCK_SHIFT is defined in Constants.h) */
+
+#if defined(CMINUSMINUS)
+#define MBLOCK_SIZE (1<<MBLOCK_SHIFT)
+#else
+#define MBLOCK_SIZE (UNIT<<MBLOCK_SHIFT)
+// Note [integer overflow]
+#endif
+
+#define MBLOCK_SIZE_W (MBLOCK_SIZE/sizeof(W_))
+#define MBLOCK_MASK (MBLOCK_SIZE-1)
+
+#define MBLOCK_ROUND_UP(p) ((void *)(((W_)(p)+MBLOCK_SIZE-1) & ~MBLOCK_MASK))
+#define MBLOCK_ROUND_DOWN(p) ((void *)((W_)(p) & ~MBLOCK_MASK ))
+
+/* The largest size an object can be before we give it a block of its
+ * own and treat it as an immovable object during GC, expressed as a
+ * fraction of BLOCK_SIZE.
+ */
+#define LARGE_OBJECT_THRESHOLD ((uint32_t)(BLOCK_SIZE * 8 / 10))
+
+/*
+ * Note [integer overflow]
+ *
+ * The UL suffix in BLOCK_SIZE and MBLOCK_SIZE promotes the expression
+ * to an unsigned long, which means that expressions involving these
+ * will be promoted to unsigned long, which makes integer overflow
+ * less likely. Historically, integer overflow in expressions like
+ * (n * BLOCK_SIZE)
+ * where n is int or unsigned int, have caused obscure segfaults in
+ * programs that use large amounts of memory (e.g. #7762, #5086).
+ */
+
+/* -----------------------------------------------------------------------------
+ * Block descriptor. This structure *must* be the right length, so we
+ * can do pointer arithmetic on pointers to it.
+ */
+
+/* The block descriptor is 64 bytes on a 64-bit machine, and 32-bytes
+ * on a 32-bit machine.
+ */
+
+// Note: fields marked with [READ ONLY] must not be modified by the
+// client of the block allocator API. All other fields can be
+// freely modified.
+
+#if !defined(CMINUSMINUS)
+
+
+struct NonmovingSegmentInfo {
+ StgWord8 log_block_size;
+ StgWord16 next_free_snap;
+};
+
+typedef struct bdescr_ {
+
+ StgPtr start; // [READ ONLY] start addr of memory
+
+
+ union {
+ StgPtr free; // First free byte of memory.
+ // allocGroup() sets this to the value of start.
+ // NB. during use this value should lie
+ // between start and start + blocks *
+ // BLOCK_SIZE. Values outside this
+ // range are reserved for use by the
+ // block allocator. In particular, the
+ // value (StgPtr)(-1) is used to
+ // indicate that a block is unallocated.
+ //
+ // Unused by the non-moving allocator.
+ struct NonmovingSegmentInfo nonmoving_segment;
+ };
+
+ struct bdescr_ *link; // used for chaining blocks together
+
+ union {
+ struct bdescr_ *back; // used (occasionally) for doubly-linked lists
+ StgWord *bitmap; // bitmap for marking GC
+ StgPtr scan; // scan pointer for copying GC
+ } u;
+
+ struct generation_ *gen; // generation
+
+ StgWord16 gen_no; // gen->no, cached
+ StgWord16 dest_no; // number of destination generation
+ StgWord16 node; // which memory node does this block live on?
+
+ StgWord16 flags; // block flags, see below
+
+ StgWord32 blocks; // [READ ONLY] no. of blocks in a group
+ // (if group head, 0 otherwise)
+
+#if SIZEOF_VOID_P == 8
+ StgWord32 _padding[3];
+#else
+ StgWord32 _padding[0];
+#endif
+} bdescr;
+#endif
+
+#if SIZEOF_VOID_P == 8
+#define BDESCR_SIZE 0x40
+#define BDESCR_MASK 0x3f
+#define BDESCR_SHIFT 6
+#else
+#define BDESCR_SIZE 0x20
+#define BDESCR_MASK 0x1f
+#define BDESCR_SHIFT 5
+#endif
+
+/* Block contains objects evacuated during this GC */
+#define BF_EVACUATED 1
+/* Block is a large object */
+#define BF_LARGE 2
+/* Block is pinned */
+#define BF_PINNED 4
+/* Block is to be marked, not copied. Also used for marked large objects in
+ * non-moving heap. */
+#define BF_MARKED 8
+/* Block is executable */
+#define BF_EXEC 32
+/* Block contains only a small amount of live data */
+#define BF_FRAGMENTED 64
+/* we know about this block (for finding leaks) */
+#define BF_KNOWN 128
+/* Block was swept in the last generation */
+#define BF_SWEPT 256
+/* Block is part of a Compact */
+#define BF_COMPACT 512
+/* A non-moving allocator segment (see NonMoving.c) */
+#define BF_NONMOVING 1024
+/* A large object which has been moved to off of oldest_gen->large_objects and
+ * onto nonmoving_large_objects. The mark phase ignores objects which aren't
+ * so-flagged */
+#define BF_NONMOVING_SWEEPING 2048
+/* Maximum flag value (do not define anything higher than this!) */
+#define BF_FLAG_MAX (1 << 15)
+
+/* Finding the block descriptor for a given block -------------------------- */
+
+#if defined(CMINUSMINUS)
+
+#define Bdescr(p) \
+ ((((p) & MBLOCK_MASK & ~BLOCK_MASK) >> (BLOCK_SHIFT-BDESCR_SHIFT)) \
+ | ((p) & ~MBLOCK_MASK))
+
+#else
+
+EXTERN_INLINE bdescr *Bdescr(StgPtr p);
+EXTERN_INLINE bdescr *Bdescr(StgPtr p)
+{
+ return (bdescr *)
+ ((((W_)p & MBLOCK_MASK & ~BLOCK_MASK) >> (BLOCK_SHIFT-BDESCR_SHIFT))
+ | ((W_)p & ~MBLOCK_MASK)
+ );
+}
+
+#endif
+
+/* Useful Macros ------------------------------------------------------------ */
+
+/* Offset of first real data block in a megablock */
+
+#define FIRST_BLOCK_OFF \
+ ((W_)BLOCK_ROUND_UP(BDESCR_SIZE * (MBLOCK_SIZE / BLOCK_SIZE)))
+
+/* First data block in a given megablock */
+
+#define FIRST_BLOCK(m) ((void *)(FIRST_BLOCK_OFF + (W_)(m)))
+
+/* Last data block in a given megablock */
+
+#define LAST_BLOCK(m) ((void *)(MBLOCK_SIZE-BLOCK_SIZE + (W_)(m)))
+
+/* First real block descriptor in a megablock */
+
+#define FIRST_BDESCR(m) \
+ ((bdescr *)((FIRST_BLOCK_OFF>>(BLOCK_SHIFT-BDESCR_SHIFT)) + (W_)(m)))
+
+/* Last real block descriptor in a megablock */
+
+#define LAST_BDESCR(m) \
+ ((bdescr *)(((MBLOCK_SIZE-BLOCK_SIZE)>>(BLOCK_SHIFT-BDESCR_SHIFT)) + (W_)(m)))
+
+/* Number of usable blocks in a megablock */
+
+#if !defined(CMINUSMINUS) // already defined in DerivedConstants.h
+#define BLOCKS_PER_MBLOCK ((MBLOCK_SIZE - FIRST_BLOCK_OFF) / BLOCK_SIZE)
+#endif
+
+/* How many blocks in this megablock group */
+
+#define MBLOCK_GROUP_BLOCKS(n) \
+ (BLOCKS_PER_MBLOCK + (n-1) * (MBLOCK_SIZE / BLOCK_SIZE))
+
+/* Compute the required size of a megablock group */
+
+#define BLOCKS_TO_MBLOCKS(n) \
+ (1 + (W_)MBLOCK_ROUND_UP((n-BLOCKS_PER_MBLOCK) * BLOCK_SIZE) / MBLOCK_SIZE)
+
+
+#if !defined(CMINUSMINUS)
+/* to the end... */
+
+/* Double-linked block lists: --------------------------------------------- */
+
+INLINE_HEADER void
+dbl_link_onto(bdescr *bd, bdescr **list)
+{
+ bd->link = *list;
+ bd->u.back = NULL;
+ if (*list) {
+ (*list)->u.back = bd; /* double-link the list */
+ }
+ *list = bd;
+}
+
+INLINE_HEADER void
+dbl_link_remove(bdescr *bd, bdescr **list)
+{
+ if (bd->u.back) {
+ bd->u.back->link = bd->link;
+ } else {
+ *list = bd->link;
+ }
+ if (bd->link) {
+ bd->link->u.back = bd->u.back;
+ }
+}
+
+INLINE_HEADER void
+dbl_link_insert_after(bdescr *bd, bdescr *after)
+{
+ bd->link = after->link;
+ bd->u.back = after;
+ if (after->link) {
+ after->link->u.back = bd;
+ }
+ after->link = bd;
+}
+
+INLINE_HEADER void
+dbl_link_replace(bdescr *new_, bdescr *old, bdescr **list)
+{
+ new_->link = old->link;
+ new_->u.back = old->u.back;
+ if (old->link) {
+ old->link->u.back = new_;
+ }
+ if (old->u.back) {
+ old->u.back->link = new_;
+ } else {
+ *list = new_;
+ }
+}
+
+/* Initialisation ---------------------------------------------------------- */
+
+extern void initBlockAllocator(void);
+
+/* Allocation -------------------------------------------------------------- */
+
+bdescr *allocGroup(W_ n);
+
+EXTERN_INLINE bdescr* allocBlock(void);
+EXTERN_INLINE bdescr* allocBlock(void)
+{
+ return allocGroup(1);
+}
+
+bdescr *allocGroupOnNode(uint32_t node, W_ n);
+
+// Allocate n blocks, aligned at n-block boundary. The returned bdescr will
+// have this invariant
+//
+// bdescr->start % BLOCK_SIZE*n == 0
+//
+bdescr *allocAlignedGroupOnNode(uint32_t node, W_ n);
+
+EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node);
+EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node)
+{
+ return allocGroupOnNode(node,1);
+}
+
+// versions that take the storage manager lock for you:
+bdescr *allocGroup_lock(W_ n);
+bdescr *allocBlock_lock(void);
+
+bdescr *allocGroupOnNode_lock(uint32_t node, W_ n);
+bdescr *allocBlockOnNode_lock(uint32_t node);
+
+/* De-Allocation ----------------------------------------------------------- */
+
+void freeGroup(bdescr *p);
+void freeChain(bdescr *p);
+
+// versions that take the storage manager lock for you:
+void freeGroup_lock(bdescr *p);
+void freeChain_lock(bdescr *p);
+
+bdescr * splitBlockGroup (bdescr *bd, uint32_t blocks);
+
+/* Round a value to megablocks --------------------------------------------- */
+
+// We want to allocate an object around a given size, round it up or
+// down to the nearest size that will fit in an mblock group.
+INLINE_HEADER StgWord
+round_to_mblocks(StgWord words)
+{
+ if (words > BLOCKS_PER_MBLOCK * BLOCK_SIZE_W) {
+ // first, ignore the gap at the beginning of the first mblock by
+ // adding it to the total words. Then we can pretend we're
+ // dealing in a uniform unit of megablocks.
+ words += FIRST_BLOCK_OFF/sizeof(W_);
+
+ if ((words % MBLOCK_SIZE_W) < (MBLOCK_SIZE_W / 2)) {
+ words = (words / MBLOCK_SIZE_W) * MBLOCK_SIZE_W;
+ } else {
+ words = ((words / MBLOCK_SIZE_W) + 1) * MBLOCK_SIZE_W;
+ }
+
+ words -= FIRST_BLOCK_OFF/sizeof(W_);
+ }
+ return words;
+}
+
+#endif /* !CMINUSMINUS */
diff --git a/rts/include/rts/storage/ClosureMacros.h b/rts/include/rts/storage/ClosureMacros.h
new file mode 100644
index 0000000000..b841ef8be0
--- /dev/null
+++ b/rts/include/rts/storage/ClosureMacros.h
@@ -0,0 +1,645 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2012
+ *
+ * Macros for building and manipulating closures
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/* -----------------------------------------------------------------------------
+ Info tables are slammed up against the entry code, and the label
+ for the info table is at the *end* of the table itself. This
+ inline function adjusts an info pointer to point to the beginning
+ of the table, so we can use standard C structure indexing on it.
+
+ Note: this works for SRT info tables as long as you don't want to
+ access the SRT, since they are laid out the same with the SRT
+ pointer as the first word in the table.
+
+ NOTES ABOUT MANGLED C VS. MINI-INTERPRETER:
+
+ A couple of definitions:
+
+ "info pointer" The first word of the closure. Might point
+ to either the end or the beginning of the
+ info table, depending on whether we're using
+ the mini interpreter or not. GET_INFO(c)
+ retrieves the info pointer of a closure.
+
+ "info table" The info table structure associated with a
+ closure. This is always a pointer to the
+ beginning of the structure, so we can
+ use standard C structure indexing to pull out
+ the fields. get_itbl(c) returns a pointer to
+ the info table for closure c.
+
+ An address of the form xxxx_info points to the end of the info
+ table or the beginning of the info table depending on whether we're
+ mangling or not respectively. So,
+
+ c->header.info = xxx_info
+
+ makes absolute sense, whether mangling or not.
+
+ -------------------------------------------------------------------------- */
+
+INLINE_HEADER void SET_INFO(StgClosure *c, const StgInfoTable *info) {
+ RELAXED_STORE(&c->header.info, info);
+}
+INLINE_HEADER void SET_INFO_RELEASE(StgClosure *c, const StgInfoTable *info) {
+ RELEASE_STORE(&c->header.info, info);
+}
+INLINE_HEADER const StgInfoTable *GET_INFO(StgClosure *c) {
+ return RELAXED_LOAD(&c->header.info);
+}
+
+#if defined(TABLES_NEXT_TO_CODE)
+EXTERN_INLINE StgInfoTable *INFO_PTR_TO_STRUCT(const StgInfoTable *info);
+EXTERN_INLINE StgInfoTable *INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgInfoTable *)info - 1;}
+EXTERN_INLINE StgRetInfoTable *RET_INFO_PTR_TO_STRUCT(const StgInfoTable *info);
+EXTERN_INLINE StgRetInfoTable *RET_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgRetInfoTable *)info - 1;}
+INLINE_HEADER StgFunInfoTable *FUN_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgFunInfoTable *)info - 1;}
+INLINE_HEADER StgThunkInfoTable *THUNK_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgThunkInfoTable *)info - 1;}
+INLINE_HEADER StgConInfoTable *CON_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgConInfoTable *)info - 1;}
+INLINE_HEADER StgFunInfoTable *itbl_to_fun_itbl(const StgInfoTable *i) {return (StgFunInfoTable *)(i + 1) - 1;}
+INLINE_HEADER StgRetInfoTable *itbl_to_ret_itbl(const StgInfoTable *i) {return (StgRetInfoTable *)(i + 1) - 1;}
+INLINE_HEADER StgThunkInfoTable *itbl_to_thunk_itbl(const StgInfoTable *i) {return (StgThunkInfoTable *)(i + 1) - 1;}
+INLINE_HEADER StgConInfoTable *itbl_to_con_itbl(const StgInfoTable *i) {return (StgConInfoTable *)(i + 1) - 1;}
+#else
+EXTERN_INLINE StgInfoTable *INFO_PTR_TO_STRUCT(const StgInfoTable *info);
+EXTERN_INLINE StgInfoTable *INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgInfoTable *)info;}
+EXTERN_INLINE StgRetInfoTable *RET_INFO_PTR_TO_STRUCT(const StgInfoTable *info);
+EXTERN_INLINE StgRetInfoTable *RET_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgRetInfoTable *)info;}
+INLINE_HEADER StgFunInfoTable *FUN_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgFunInfoTable *)info;}
+INLINE_HEADER StgThunkInfoTable *THUNK_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgThunkInfoTable *)info;}
+INLINE_HEADER StgConInfoTable *CON_INFO_PTR_TO_STRUCT(const StgInfoTable *info) {return (StgConInfoTable *)info;}
+INLINE_HEADER StgFunInfoTable *itbl_to_fun_itbl(const StgInfoTable *i) {return (StgFunInfoTable *)i;}
+INLINE_HEADER StgRetInfoTable *itbl_to_ret_itbl(const StgInfoTable *i) {return (StgRetInfoTable *)i;}
+INLINE_HEADER StgThunkInfoTable *itbl_to_thunk_itbl(const StgInfoTable *i) {return (StgThunkInfoTable *)i;}
+INLINE_HEADER StgConInfoTable *itbl_to_con_itbl(const StgInfoTable *i) {return (StgConInfoTable *)i;}
+#endif
+
+EXTERN_INLINE const StgInfoTable *get_itbl(const StgClosure *c);
+EXTERN_INLINE const StgInfoTable *get_itbl(const StgClosure *c)
+{
+ return INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info));
+}
+
+EXTERN_INLINE const StgRetInfoTable *get_ret_itbl(const StgClosure *c);
+EXTERN_INLINE const StgRetInfoTable *get_ret_itbl(const StgClosure *c)
+{
+ return RET_INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info));
+}
+
+INLINE_HEADER const StgFunInfoTable *get_fun_itbl(const StgClosure *c)
+{
+ return FUN_INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info));
+}
+
+INLINE_HEADER const StgThunkInfoTable *get_thunk_itbl(const StgClosure *c)
+{
+ return THUNK_INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info));
+}
+
+INLINE_HEADER const StgConInfoTable *get_con_itbl(const StgClosure *c)
+{
+ return CON_INFO_PTR_TO_STRUCT(RELAXED_LOAD(&c->header.info));
+}
+
+INLINE_HEADER StgHalfWord GET_TAG(const StgClosure *con)
+{
+ return get_itbl(con)->srt;
+}
+
+/* -----------------------------------------------------------------------------
+ Macros for building closures
+ -------------------------------------------------------------------------- */
+
+#if defined(PROFILING)
+/*
+ The following macro works for both retainer profiling and LDV profiling. For
+ retainer profiling, 'era' remains 0, so by setting the 'ldvw' field we also set
+ 'rs' to zero.
+
+ Note that we don't have to bother handling the 'flip' bit properly[1] since the
+ retainer profiling code will just set 'rs' to NULL upon visiting a closure with
+ an invalid 'flip' bit anyways.
+
+ See Note [Profiling heap traversal visited bit] for details.
+
+ [1]: Technically we should set 'rs' to `NULL | flip`.
+ */
+#define SET_PROF_HDR(c,ccs_) \
+ ((c)->header.prof.ccs = ccs_, \
+ LDV_RECORD_CREATE((c)))
+#else
+#define SET_PROF_HDR(c,ccs)
+#endif
+
+#define SET_HDR(c,_info,ccs) \
+ { \
+ SET_PROF_HDR((StgClosure *)(c),ccs); \
+ RELAXED_STORE(&(c)->header.info, _info); \
+ }
+
+#define SET_HDR_RELEASE(c,_info,ccs) \
+ { \
+ SET_PROF_HDR((StgClosure *)(c),ccs); \
+ RELEASE_STORE(&(c)->header.info, _info); \
+ }
+
+#define SET_ARR_HDR(c,info,costCentreStack,n_bytes) \
+ (c)->bytes = n_bytes; \
+ SET_HDR(c,info,costCentreStack);
+
+// Use when changing a closure from one kind to another
+#define OVERWRITE_INFO(c, new_info) \
+ OVERWRITING_CLOSURE((StgClosure *)(c)); \
+ SET_INFO((StgClosure *)(c), (new_info)); \
+ LDV_RECORD_CREATE(c);
+
+/* -----------------------------------------------------------------------------
+ How to get hold of the static link field for a static closure.
+ -------------------------------------------------------------------------- */
+
+/* These are hard-coded. */
+#define THUNK_STATIC_LINK(p) (&(p)->payload[1])
+#define IND_STATIC_LINK(p) (&(p)->payload[1])
+
+INLINE_HEADER StgClosure **
+STATIC_LINK(const StgInfoTable *info, StgClosure *p)
+{
+ switch (info->type) {
+ case THUNK_STATIC:
+ return THUNK_STATIC_LINK(p);
+ case IND_STATIC:
+ return IND_STATIC_LINK(p);
+ default:
+ return &p->payload[info->layout.payload.ptrs +
+ info->layout.payload.nptrs];
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ INTLIKE and CHARLIKE closures.
+ -------------------------------------------------------------------------- */
+
+INLINE_HEADER P_ CHARLIKE_CLOSURE(int n) {
+ return (P_)&stg_CHARLIKE_closure[(n)-MIN_CHARLIKE];
+}
+INLINE_HEADER P_ INTLIKE_CLOSURE(int n) {
+ return (P_)&stg_INTLIKE_closure[(n)-MIN_INTLIKE];
+}
+
+/* ----------------------------------------------------------------------------
+ Macros for untagging and retagging closure pointers
+ For more information look at the comments in Cmm.h
+ ------------------------------------------------------------------------- */
+
+static inline StgWord
+GET_CLOSURE_TAG(const StgClosure * p)
+{
+ return (StgWord)p & TAG_MASK;
+}
+
+static inline StgClosure *
+UNTAG_CLOSURE(StgClosure * p)
+{
+ return (StgClosure*)((StgWord)p & ~TAG_MASK);
+}
+
+static inline const StgClosure *
+UNTAG_CONST_CLOSURE(const StgClosure * p)
+{
+ return (const StgClosure*)((StgWord)p & ~TAG_MASK);
+}
+
+static inline StgClosure *
+TAG_CLOSURE(StgWord tag,StgClosure * p)
+{
+ return (StgClosure*)((StgWord)p | tag);
+}
+
+/* -----------------------------------------------------------------------------
+ Forwarding pointers
+ -------------------------------------------------------------------------- */
+
+#define IS_FORWARDING_PTR(p) ((((StgWord)p) & 1) != 0)
+#define MK_FORWARDING_PTR(p) (((StgWord)p) | 1)
+#define UN_FORWARDING_PTR(p) (((StgWord)p) - 1)
+
+/* -----------------------------------------------------------------------------
+ DEBUGGING predicates for pointers
+
+ LOOKS_LIKE_INFO_PTR(p) returns False if p is definitely not an info ptr
+ LOOKS_LIKE_CLOSURE_PTR(p) returns False if p is definitely not a closure ptr
+
+ These macros are complete but not sound. That is, they might
+ return false positives. Do not rely on them to distinguish info
+ pointers from closure pointers, for example.
+
+ We don't use address-space predicates these days, for portability
+ reasons, and the fact that code/data can be scattered about the
+ address space in a dynamically-linked environment. Our best option
+ is to look at the alleged info table and see whether it seems to
+ make sense...
+ -------------------------------------------------------------------------- */
+
+INLINE_HEADER bool LOOKS_LIKE_INFO_PTR_NOT_NULL (StgWord p)
+{
+ StgInfoTable *info = INFO_PTR_TO_STRUCT((StgInfoTable *)p);
+ return info->type != INVALID_OBJECT && info->type < N_CLOSURE_TYPES;
+}
+
+INLINE_HEADER bool LOOKS_LIKE_INFO_PTR (StgWord p)
+{
+ return p && (IS_FORWARDING_PTR(p) || LOOKS_LIKE_INFO_PTR_NOT_NULL(p));
+}
+
+INLINE_HEADER bool LOOKS_LIKE_CLOSURE_PTR (const void *p)
+{
+ const StgInfoTable *info = RELAXED_LOAD(&UNTAG_CONST_CLOSURE((const StgClosure *) (p))->header.info);
+ return LOOKS_LIKE_INFO_PTR((StgWord) info);
+}
+
+/* -----------------------------------------------------------------------------
+ Macros for calculating the size of a closure
+ -------------------------------------------------------------------------- */
+
+EXTERN_INLINE StgOffset PAP_sizeW ( uint32_t n_args );
+EXTERN_INLINE StgOffset PAP_sizeW ( uint32_t n_args )
+{ return sizeofW(StgPAP) + n_args; }
+
+EXTERN_INLINE StgOffset AP_sizeW ( uint32_t n_args );
+EXTERN_INLINE StgOffset AP_sizeW ( uint32_t n_args )
+{ return sizeofW(StgAP) + n_args; }
+
+EXTERN_INLINE StgOffset AP_STACK_sizeW ( uint32_t size );
+EXTERN_INLINE StgOffset AP_STACK_sizeW ( uint32_t size )
+{ return sizeofW(StgAP_STACK) + size; }
+
+EXTERN_INLINE StgOffset CONSTR_sizeW( uint32_t p, uint32_t np );
+EXTERN_INLINE StgOffset CONSTR_sizeW( uint32_t p, uint32_t np )
+{ return sizeofW(StgHeader) + p + np; }
+
+EXTERN_INLINE StgOffset THUNK_SELECTOR_sizeW ( void );
+EXTERN_INLINE StgOffset THUNK_SELECTOR_sizeW ( void )
+{ return sizeofW(StgSelector); }
+
+EXTERN_INLINE StgOffset BLACKHOLE_sizeW ( void );
+EXTERN_INLINE StgOffset BLACKHOLE_sizeW ( void )
+{ return sizeofW(StgInd); } // a BLACKHOLE is a kind of indirection
+
+/* --------------------------------------------------------------------------
+ Sizes of closures
+ ------------------------------------------------------------------------*/
+
+EXTERN_INLINE StgOffset sizeW_fromITBL( const StgInfoTable* itbl );
+EXTERN_INLINE StgOffset sizeW_fromITBL( const StgInfoTable* itbl )
+{ return sizeofW(StgClosure)
+ + sizeofW(StgPtr) * itbl->layout.payload.ptrs
+ + sizeofW(StgWord) * itbl->layout.payload.nptrs; }
+
+EXTERN_INLINE StgOffset thunk_sizeW_fromITBL( const StgInfoTable* itbl );
+EXTERN_INLINE StgOffset thunk_sizeW_fromITBL( const StgInfoTable* itbl )
+{ return sizeofW(StgThunk)
+ + sizeofW(StgPtr) * itbl->layout.payload.ptrs
+ + sizeofW(StgWord) * itbl->layout.payload.nptrs; }
+
+EXTERN_INLINE StgOffset ap_stack_sizeW( StgAP_STACK* x );
+EXTERN_INLINE StgOffset ap_stack_sizeW( StgAP_STACK* x )
+{ return AP_STACK_sizeW(x->size); }
+
+EXTERN_INLINE StgOffset ap_sizeW( StgAP* x );
+EXTERN_INLINE StgOffset ap_sizeW( StgAP* x )
+{ return AP_sizeW(x->n_args); }
+
+EXTERN_INLINE StgOffset pap_sizeW( StgPAP* x );
+EXTERN_INLINE StgOffset pap_sizeW( StgPAP* x )
+{ return PAP_sizeW(x->n_args); }
+
+EXTERN_INLINE StgWord arr_words_words( StgArrBytes* x);
+EXTERN_INLINE StgWord arr_words_words( StgArrBytes* x)
+{ return ROUNDUP_BYTES_TO_WDS(x->bytes); }
+
+EXTERN_INLINE StgOffset arr_words_sizeW( StgArrBytes* x );
+EXTERN_INLINE StgOffset arr_words_sizeW( StgArrBytes* x )
+{ return sizeofW(StgArrBytes) + arr_words_words(x); }
+
+EXTERN_INLINE StgOffset mut_arr_ptrs_sizeW( StgMutArrPtrs* x );
+EXTERN_INLINE StgOffset mut_arr_ptrs_sizeW( StgMutArrPtrs* x )
+{ return sizeofW(StgMutArrPtrs) + x->size; }
+
+EXTERN_INLINE StgOffset small_mut_arr_ptrs_sizeW( StgSmallMutArrPtrs* x );
+EXTERN_INLINE StgOffset small_mut_arr_ptrs_sizeW( StgSmallMutArrPtrs* x )
+{ return sizeofW(StgSmallMutArrPtrs) + x->ptrs; }
+
+EXTERN_INLINE StgWord stack_sizeW ( StgStack *stack );
+EXTERN_INLINE StgWord stack_sizeW ( StgStack *stack )
+{ return sizeofW(StgStack) + stack->stack_size; }
+
+EXTERN_INLINE StgWord bco_sizeW ( StgBCO *bco );
+EXTERN_INLINE StgWord bco_sizeW ( StgBCO *bco )
+{ return bco->size; }
+
+EXTERN_INLINE StgWord compact_nfdata_full_sizeW ( StgCompactNFData *str );
+EXTERN_INLINE StgWord compact_nfdata_full_sizeW ( StgCompactNFData *str )
+{ return str->totalW; }
+
+/*
+ * TODO: Consider to switch return type from 'uint32_t' to 'StgWord' #8742
+ *
+ * (Also for 'closure_sizeW' below)
+ */
+EXTERN_INLINE uint32_t
+closure_sizeW_ (const StgClosure *p, const StgInfoTable *info);
+EXTERN_INLINE uint32_t
+closure_sizeW_ (const StgClosure *p, const StgInfoTable *info)
+{
+ switch (info->type) {
+ case THUNK_0_1:
+ case THUNK_1_0:
+ return sizeofW(StgThunk) + 1;
+ case FUN_0_1:
+ case CONSTR_0_1:
+ case FUN_1_0:
+ case CONSTR_1_0:
+ return sizeofW(StgHeader) + 1;
+ case THUNK_0_2:
+ case THUNK_1_1:
+ case THUNK_2_0:
+ return sizeofW(StgThunk) + 2;
+ case FUN_0_2:
+ case CONSTR_0_2:
+ case FUN_1_1:
+ case CONSTR_1_1:
+ case FUN_2_0:
+ case CONSTR_2_0:
+ return sizeofW(StgHeader) + 2;
+ case THUNK:
+ return thunk_sizeW_fromITBL(info);
+ case THUNK_SELECTOR:
+ return THUNK_SELECTOR_sizeW();
+ case AP_STACK:
+ return ap_stack_sizeW((StgAP_STACK *)p);
+ case AP:
+ return ap_sizeW((StgAP *)p);
+ case PAP:
+ return pap_sizeW((StgPAP *)p);
+ case IND:
+ return sizeofW(StgInd);
+ case ARR_WORDS:
+ return arr_words_sizeW((StgArrBytes *)p);
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
+ return mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);
+ case SMALL_MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_DIRTY:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
+ return small_mut_arr_ptrs_sizeW((StgSmallMutArrPtrs*)p);
+ case TSO:
+ return sizeofW(StgTSO);
+ case STACK:
+ return stack_sizeW((StgStack*)p);
+ case BCO:
+ return bco_sizeW((StgBCO *)p);
+ case TREC_CHUNK:
+ return sizeofW(StgTRecChunk);
+ default:
+ return sizeW_fromITBL(info);
+ }
+}
+
+// The definitive way to find the size, in words, of a heap-allocated closure
+EXTERN_INLINE uint32_t closure_sizeW (const StgClosure *p);
+EXTERN_INLINE uint32_t closure_sizeW (const StgClosure *p)
+{
+ return closure_sizeW_(p, get_itbl(p));
+}
+
+/* -----------------------------------------------------------------------------
+ Sizes of stack frames
+ -------------------------------------------------------------------------- */
+
+EXTERN_INLINE StgWord stack_frame_sizeW( StgClosure *frame );
+EXTERN_INLINE StgWord stack_frame_sizeW( StgClosure *frame )
+{
+ const StgRetInfoTable *info;
+
+ info = get_ret_itbl(frame);
+ switch (info->i.type) {
+
+ case RET_FUN:
+ return sizeofW(StgRetFun) + ((StgRetFun *)frame)->size;
+
+ case RET_BIG:
+ return 1 + GET_LARGE_BITMAP(&info->i)->size;
+
+ case RET_BCO:
+ return 2 + BCO_BITMAP_SIZE((StgBCO *)((P_)frame)[1]);
+
+ default:
+ return 1 + BITMAP_SIZE(info->i.layout.bitmap);
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ StgMutArrPtrs macros
+
+ An StgMutArrPtrs has a card table to indicate which elements are
+ dirty for the generational GC. The card table is an array of
+ bytes, where each byte covers (1 << MUT_ARR_PTRS_CARD_BITS)
+ elements. The card table is directly after the array data itself.
+ -------------------------------------------------------------------------- */
+
+// The number of card bytes needed
+INLINE_HEADER W_ mutArrPtrsCards (W_ elems)
+{
+ return (W_)((elems + (1 << MUT_ARR_PTRS_CARD_BITS) - 1)
+ >> MUT_ARR_PTRS_CARD_BITS);
+}
+
+// The number of words in the card table
+INLINE_HEADER W_ mutArrPtrsCardTableSize (W_ elems)
+{
+ return ROUNDUP_BYTES_TO_WDS(mutArrPtrsCards(elems));
+}
+
+// The address of the card for a particular card number
+INLINE_HEADER StgWord8 *mutArrPtrsCard (StgMutArrPtrs *a, W_ n)
+{
+ return ((StgWord8 *)&(a->payload[a->ptrs]) + n);
+}
+
+/* -----------------------------------------------------------------------------
+ Replacing a closure with a different one. We must call
+ OVERWRITING_CLOSURE(p) on the old closure that is about to be
+ overwritten.
+
+ Note [zeroing slop when overwriting closures]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ When we overwrite a closure in the heap with a smaller one, in some scenarios
+ we need to write zero words into "slop"; the memory that is left
+ unoccupied. See Note [slop on the heap]
+
+ Zeroing slop is required for:
+
+ - full-heap sanity checks (DEBUG, and +RTS -DS),
+
+ - LDV profiling (PROFILING, and +RTS -hb) and
+
+ However we can get into trouble if we're zeroing slop for ordinarily
+ immutable closures when using multiple threads, since there is nothing
+ preventing another thread from still being in the process of reading the
+ memory we're about to zero.
+
+ Thus, with the THREADED RTS and +RTS -N2 or greater we must not zero
+ immutable closure's slop.
+
+ Hence, an immutable closure's slop is zeroed when either:
+
+ - PROFILING && era > 0 (LDV is on) or
+ - !THREADED && DEBUG
+
+ Additionally:
+
+ - LDV profiling and +RTS -N2 are incompatible,
+
+ - full-heap sanity checks are disabled for the THREADED RTS, at least when
+ they don't run right after GC when there is no slop.
+ See Note [heap sanity checking with SMP].
+
+ -------------------------------------------------------------------------- */
+
+#if defined(PROFILING) || defined(DEBUG)
+#define OVERWRITING_CLOSURE(c) \
+ overwritingClosure(c)
+#define OVERWRITING_CLOSURE_SIZE(c, size) \
+ overwritingClosureSize(c, size)
+#define OVERWRITING_CLOSURE_MUTABLE(c, off) \
+ overwritingMutableClosureOfs(c, off)
+#else
+#define OVERWRITING_CLOSURE(c) \
+ do { (void) sizeof(c); } while(0)
+#define OVERWRITING_CLOSURE_SIZE(c, size) \
+ do { (void) sizeof(c); (void) sizeof(size); } while(0)
+#define OVERWRITING_CLOSURE_MUTABLE(c, off) \
+ do { (void) sizeof(c); (void) sizeof(off); } while(0)
+#endif
+
+#if defined(PROFILING)
+void LDV_recordDead (const StgClosure *c, uint32_t size);
+RTS_PRIVATE bool isInherentlyUsed ( StgHalfWord closure_type );
+#endif
+
+EXTERN_INLINE void
+zeroSlop (
+ StgClosure *p,
+ uint32_t offset, /*< offset to start zeroing at, in words */
+ uint32_t size, /*< total closure size, in words */
+ bool known_mutable /*< is this a closure who's slop we can always zero? */
+ );
+
+EXTERN_INLINE void
+zeroSlop (StgClosure *p, uint32_t offset, uint32_t size, bool known_mutable)
+{
+ // see Note [zeroing slop when overwriting closures], also #8402
+
+ const bool want_to_zero_immutable_slop = false
+ // Sanity checking (-DS) is enabled
+ || RTS_DEREF(RtsFlags).DebugFlags.sanity
+#if defined(PROFILING)
+ // LDV profiler is enabled
+ || era > 0
+#endif
+ ;
+
+ const bool can_zero_immutable_slop =
+ // Only if we're running single threaded.
+ RTS_DEREF(RtsFlags).ParFlags.nCapabilities <= 1;
+
+ const bool zero_slop_immutable =
+ want_to_zero_immutable_slop && can_zero_immutable_slop;
+
+ const bool zero_slop_mutable =
+#if defined(PROFILING)
+ // Always zero mutable closure slop when profiling. We do this to cover
+ // the case of shrinking mutable arrays in pinned blocks for the heap
+ // profiler, see Note [skipping slop in the heap profiler]
+ //
+ // TODO: We could make this check more specific and only zero if the
+ // object is in a BF_PINNED bdescr here. Update Note [slop on the heap]
+ // and [zeroing slop when overwriting closures] if you change this.
+ true
+#else
+ zero_slop_immutable
+#endif
+ ;
+
+ const bool zero_slop =
+ // If we're not sure this is a mutable closure treat it like an
+ // immutable one.
+ known_mutable ? zero_slop_mutable : zero_slop_immutable;
+
+ if(!zero_slop)
+ return;
+
+ for (uint32_t i = offset; i < size; i++) {
+ ((StgWord *)p)[i] = 0;
+ }
+}
+
+EXTERN_INLINE void overwritingClosure (StgClosure *p);
+EXTERN_INLINE void overwritingClosure (StgClosure *p)
+{
+ W_ size = closure_sizeW(p);
+#if defined(PROFILING)
+ if(era > 0 && !isInherentlyUsed(get_itbl(p)->type))
+ LDV_recordDead(p, size);
+#endif
+ zeroSlop(p, sizeofW(StgThunkHeader), size, /*known_mutable=*/false);
+}
+
+// Version of 'overwritingClosure' which overwrites only a suffix of a
+// closure. The offset is expressed in words relative to 'p' and shall
+// be less than or equal to closure_sizeW(p), and usually at least as
+// large as the respective thunk header.
+EXTERN_INLINE void
+overwritingMutableClosureOfs (StgClosure *p, uint32_t offset);
+
+EXTERN_INLINE void
+overwritingMutableClosureOfs (StgClosure *p, uint32_t offset)
+{
+ // Since overwritingClosureOfs is only ever called by:
+ //
+ // - shrinkMutableByteArray# (ARR_WORDS) and
+ //
+ // - shrinkSmallMutableArray# (SMALL_MUT_ARR_PTRS)
+ //
+ // we can safely omit the Ldv_recordDead call. Since these closures are
+ // considered inherenlty used we don't need to track their destruction.
+#if defined(PROFILING)
+ ASSERT(isInherentlyUsed(get_itbl(p)->type) == true);
+#endif
+ zeroSlop(p, offset, closure_sizeW(p), /*known_mutable=*/true);
+}
+
+// Version of 'overwritingClosure' which takes closure size as argument.
+EXTERN_INLINE void overwritingClosureSize (StgClosure *p, uint32_t size /* in words */);
+EXTERN_INLINE void overwritingClosureSize (StgClosure *p, uint32_t size)
+{
+ // This function is only called from stg_AP_STACK so we can assume it's not
+ // inherently used.
+#if defined(PROFILING)
+ ASSERT(isInherentlyUsed(get_itbl(p)->type) == false);
+ if(era > 0)
+ LDV_recordDead(p, size);
+#endif
+ zeroSlop(p, sizeofW(StgThunkHeader), size, /*known_mutable=*/false);
+}
diff --git a/rts/include/rts/storage/ClosureTypes.h b/rts/include/rts/storage/ClosureTypes.h
new file mode 100644
index 0000000000..85dc1a0ce4
--- /dev/null
+++ b/rts/include/rts/storage/ClosureTypes.h
@@ -0,0 +1,86 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2005
+ *
+ * Closure Type Constants: out here because the native code generator
+ * needs to get at them.
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/*
+ * WARNING WARNING WARNING
+ *
+ * If you add or delete any closure types, don't forget to update the following,
+ * - the closure flags table in rts/ClosureFlags.c
+ * - isRetainer in rts/RetainerProfile.c
+ * - the closure_type_names list in rts/Printer.c
+ */
+
+/* Object tag 0 raises an internal error */
+#define INVALID_OBJECT 0
+#define CONSTR 1
+#define CONSTR_1_0 2
+#define CONSTR_0_1 3
+#define CONSTR_2_0 4
+#define CONSTR_1_1 5
+#define CONSTR_0_2 6
+#define CONSTR_NOCAF 7
+#define FUN 8
+#define FUN_1_0 9
+#define FUN_0_1 10
+#define FUN_2_0 11
+#define FUN_1_1 12
+#define FUN_0_2 13
+#define FUN_STATIC 14
+#define THUNK 15
+#define THUNK_1_0 16
+#define THUNK_0_1 17
+#define THUNK_2_0 18
+#define THUNK_1_1 19
+#define THUNK_0_2 20
+#define THUNK_STATIC 21
+#define THUNK_SELECTOR 22
+#define BCO 23
+#define AP 24
+#define PAP 25
+#define AP_STACK 26
+#define IND 27
+#define IND_STATIC 28
+#define RET_BCO 29
+#define RET_SMALL 30
+#define RET_BIG 31
+#define RET_FUN 32
+#define UPDATE_FRAME 33
+#define CATCH_FRAME 34
+#define UNDERFLOW_FRAME 35
+#define STOP_FRAME 36
+#define BLOCKING_QUEUE 37
+#define BLACKHOLE 38
+#define MVAR_CLEAN 39
+#define MVAR_DIRTY 40
+#define TVAR 41
+#define ARR_WORDS 42
+#define MUT_ARR_PTRS_CLEAN 43
+#define MUT_ARR_PTRS_DIRTY 44
+#define MUT_ARR_PTRS_FROZEN_DIRTY 45
+#define MUT_ARR_PTRS_FROZEN_CLEAN 46
+#define MUT_VAR_CLEAN 47
+#define MUT_VAR_DIRTY 48
+#define WEAK 49
+#define PRIM 50
+#define MUT_PRIM 51
+#define TSO 52
+#define STACK 53
+#define TREC_CHUNK 54
+#define ATOMICALLY_FRAME 55
+#define CATCH_RETRY_FRAME 56
+#define CATCH_STM_FRAME 57
+#define WHITEHOLE 58
+#define SMALL_MUT_ARR_PTRS_CLEAN 59
+#define SMALL_MUT_ARR_PTRS_DIRTY 60
+#define SMALL_MUT_ARR_PTRS_FROZEN_DIRTY 61
+#define SMALL_MUT_ARR_PTRS_FROZEN_CLEAN 62
+#define COMPACT_NFDATA 63
+#define N_CLOSURE_TYPES 64
diff --git a/rts/include/rts/storage/Closures.h b/rts/include/rts/storage/Closures.h
new file mode 100644
index 0000000000..ebb836bca2
--- /dev/null
+++ b/rts/include/rts/storage/Closures.h
@@ -0,0 +1,488 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Closures
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/*
+ * The Layout of a closure header depends on which kind of system we're
+ * compiling for: profiling, parallel, ticky, etc.
+ */
+
+/* -----------------------------------------------------------------------------
+ The profiling header
+ -------------------------------------------------------------------------- */
+
+typedef struct {
+ CostCentreStack *ccs;
+ union {
+ StgWord trav; /* Heap traversal */
+ StgWord ldvw; /* Lag/Drag/Void Word */
+ } hp;
+ // Heap profiling header. This field is shared among the various heap
+ // profiling modes. Currently it is used by ProfHeap.c for Lag/Drag/Void
+ // profiling and by the heap traversal modes using TraverseHeap.c such as
+ // the retainer profiler.
+} StgProfHeader;
+
+/* -----------------------------------------------------------------------------
+ The SMP header
+
+ A thunk has a padding word to take the updated value. This is so
+ that the update doesn't overwrite the payload, so we can avoid
+ needing to lock the thunk during entry and update.
+
+ Note: this doesn't apply to THUNK_STATICs, which have no payload.
+
+ Note: we leave this padding word in all ways, rather than just SMP,
+ so that we don't have to recompile all our libraries for SMP.
+ -------------------------------------------------------------------------- */
+
+typedef struct {
+ StgWord pad;
+} StgSMPThunkHeader;
+
+/* -----------------------------------------------------------------------------
+ The full fixed-size closure header
+
+ The size of the fixed header is the sum of the optional parts plus a single
+ word for the entry code pointer.
+ -------------------------------------------------------------------------- */
+
+typedef struct {
+ // If TABLES_NEXT_TO_CODE is defined, then `info` is offset by
+ // `sizeof(StgInfoTable)` and so points to the `code` field of the
+ // StgInfoTable! You may want to use `get_itbl` to get the pointer to the
+ // start of the info table. See
+ // https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/rts/storage/heap-objects#tables_next_to_code.
+ const StgInfoTable* info;
+#if defined(PROFILING)
+ StgProfHeader prof;
+#endif
+} StgHeader;
+
+typedef struct {
+ const StgInfoTable* info;
+#if defined(PROFILING)
+ StgProfHeader prof;
+#endif
+ StgSMPThunkHeader smp;
+} StgThunkHeader;
+
+#define THUNK_EXTRA_HEADER_W (sizeofW(StgThunkHeader)-sizeofW(StgHeader))
+
+/* -----------------------------------------------------------------------------
+ Closure Types
+
+ For any given closure type (defined in InfoTables.h), there is a
+ corresponding structure defined below. The name of the structure
+ is obtained by concatenating the closure type with '_closure'
+ -------------------------------------------------------------------------- */
+
+/* All closures follow the generic format */
+
+typedef struct StgClosure_ {
+ StgHeader header;
+ struct StgClosure_ *payload[];
+} *StgClosurePtr; // StgClosure defined in rts/Types.h
+
+typedef struct StgThunk_ {
+ StgThunkHeader header;
+ struct StgClosure_ *payload[];
+} StgThunk;
+
+typedef struct {
+ StgThunkHeader header;
+ StgClosure *selectee;
+} StgSelector;
+
+/*
+ PAP payload contains pointers and non-pointers interleaved and we only have
+ one info table for PAPs (stg_PAP_info). To visit pointers in a PAP payload we
+ use the `fun`s bitmap. For a PAP with n_args arguments the first n_args bits
+ in the fun's bitmap tell us which payload locations contain pointers.
+*/
+typedef struct {
+ StgHeader header;
+ StgHalfWord arity; /* zero if it is an AP */
+ StgHalfWord n_args;
+ StgClosure *fun; /* really points to a fun */
+ StgClosure *payload[];
+} StgPAP;
+
+typedef struct {
+ StgThunkHeader header;
+ StgHalfWord arity; /* zero if it is an AP */
+ StgHalfWord n_args;
+ StgClosure *fun; /* really points to a fun */
+ StgClosure *payload[];
+} StgAP;
+
+typedef struct {
+ StgThunkHeader header;
+ StgWord size; /* number of words in payload */
+ StgClosure *fun;
+ StgClosure *payload[]; /* contains a chunk of *stack* */
+} StgAP_STACK;
+
+typedef struct {
+ StgHeader header;
+ StgClosure *indirectee;
+} StgInd;
+
+typedef struct {
+ StgHeader header;
+ StgClosure *indirectee;
+ StgClosure *static_link; // See Note [CAF lists]
+ const StgInfoTable *saved_info;
+ // `saved_info` also used for the link field for `debug_caf_list`,
+ // see `newCAF` and Note [CAF lists] in rts/sm/Storage.h.
+} StgIndStatic;
+
+typedef struct StgBlockingQueue_ {
+ StgHeader header;
+ struct StgBlockingQueue_ *link;
+ // here so it looks like an IND, to be able to skip the queue without
+ // deleting it (done in wakeBlockingQueue())
+ StgClosure *bh; // the BLACKHOLE
+ StgTSO *owner;
+ struct MessageBlackHole_ *queue;
+ // holds TSOs blocked on `bh`
+} StgBlockingQueue;
+
+typedef struct {
+ StgHeader header;
+ StgWord bytes;
+ StgWord payload[];
+} StgArrBytes;
+
+typedef struct {
+ StgHeader header;
+ StgWord ptrs;
+ StgWord size; // ptrs plus card table
+ StgClosure *payload[];
+ // see also: StgMutArrPtrs macros in ClosureMacros.h
+} StgMutArrPtrs;
+
+typedef struct {
+ StgHeader header;
+ StgWord ptrs;
+ StgClosure *payload[];
+} StgSmallMutArrPtrs;
+
+typedef struct {
+ StgHeader header;
+ StgClosure *var;
+} StgMutVar;
+
+typedef struct _StgUpdateFrame {
+ StgHeader header;
+ StgClosure *updatee;
+} StgUpdateFrame;
+
+typedef struct {
+ StgHeader header;
+ StgWord exceptions_blocked;
+ StgClosure *handler;
+} StgCatchFrame;
+
+typedef struct {
+ const StgInfoTable* info;
+ struct StgStack_ *next_chunk;
+} StgUnderflowFrame;
+
+typedef struct {
+ StgHeader header;
+} StgStopFrame;
+
+typedef struct {
+ StgHeader header;
+ StgWord data;
+} StgIntCharlikeClosure;
+
+/* statically allocated */
+typedef struct {
+ StgHeader header;
+} StgRetry;
+
+typedef struct _StgStableName {
+ StgHeader header;
+ StgWord sn;
+} StgStableName;
+
+typedef struct _StgWeak { /* Weak v */
+ StgHeader header;
+ StgClosure *cfinalizers;
+ StgClosure *key;
+ StgClosure *value; /* v */
+ StgClosure *finalizer;
+ struct _StgWeak *link;
+} StgWeak;
+
+typedef struct _StgCFinalizerList {
+ StgHeader header;
+ StgClosure *link;
+ void (*fptr)(void);
+ void *ptr;
+ void *eptr;
+ StgWord flag; /* has environment (0 or 1) */
+} StgCFinalizerList;
+
+/* Byte code objects. These are fixed size objects with pointers to
+ * four arrays, designed so that a BCO can be easily "re-linked" to
+ * other BCOs, to facilitate GHC's intelligent recompilation. The
+ * array of instructions is static and not re-generated when the BCO
+ * is re-linked, but the other 3 arrays will be regenerated.
+ *
+ * A BCO represents either a function or a stack frame. In each case,
+ * it needs a bitmap to describe to the garbage collector the
+ * pointerhood of its arguments/free variables respectively, and in
+ * the case of a function it also needs an arity. These are stored
+ * directly in the BCO, rather than in the instrs array, for two
+ * reasons:
+ * (a) speed: we need to get at the bitmap info quickly when
+ * the GC is examining APs and PAPs that point to this BCO
+ * (b) a subtle interaction with the compacting GC. In compacting
+ * GC, the info that describes the size/layout of a closure
+ * cannot be in an object more than one level of indirection
+ * away from the current object, because of the order in
+ * which pointers are updated to point to their new locations.
+ */
+
+typedef struct {
+ StgHeader header;
+ StgArrBytes *instrs; /* a pointer to an ArrWords */
+ StgArrBytes *literals; /* a pointer to an ArrWords */
+ StgMutArrPtrs *ptrs; /* a pointer to a MutArrPtrs */
+ StgHalfWord arity; /* arity of this BCO */
+ StgHalfWord size; /* size of this BCO (in words) */
+ StgWord bitmap[]; /* an StgLargeBitmap */
+} StgBCO;
+
+#define BCO_BITMAP(bco) ((StgLargeBitmap *)((StgBCO *)(bco))->bitmap)
+#define BCO_BITMAP_SIZE(bco) (BCO_BITMAP(bco)->size)
+#define BCO_BITMAP_BITS(bco) (BCO_BITMAP(bco)->bitmap)
+#define BCO_BITMAP_SIZEW(bco) ((BCO_BITMAP_SIZE(bco) + BITS_IN(StgWord) - 1) \
+ / BITS_IN(StgWord))
+
+/* A function return stack frame: used when saving the state for a
+ * garbage collection at a function entry point. The function
+ * arguments are on the stack, and we also save the function (its
+ * info table describes the pointerhood of the arguments).
+ *
+ * The stack frame size is also cached in the frame for convenience.
+ *
+ * The only RET_FUN is stg_gc_fun, which is created by __stg_gc_fun,
+ * both in HeapStackCheck.cmm.
+ */
+typedef struct {
+ const StgInfoTable* info;
+ StgWord size;
+ StgClosure * fun;
+ StgClosure * payload[];
+} StgRetFun;
+
+/* Concurrent communication objects */
+
+typedef struct StgMVarTSOQueue_ {
+ StgHeader header;
+ struct StgMVarTSOQueue_ *link;
+ struct StgTSO_ *tso;
+} StgMVarTSOQueue;
+
+typedef struct {
+ StgHeader header;
+ struct StgMVarTSOQueue_ *head;
+ struct StgMVarTSOQueue_ *tail;
+ StgClosure* value;
+} StgMVar;
+
+
+/* STM data structures
+ *
+ * StgTVar defines the only type that can be updated through the STM
+ * interface.
+ *
+ * Note that various optimisations may be possible in order to use less
+ * space for these data structures at the cost of more complexity in the
+ * implementation:
+ *
+ * - In StgTVar, current_value and first_watch_queue_entry could be held in
+ * the same field: if any thread is waiting then its expected_value for
+ * the tvar is the current value.
+ *
+ * - In StgTRecHeader, it might be worthwhile having separate chunks
+ * of read-only and read-write locations. This would save a
+ * new_value field in the read-only locations.
+ *
+ * - In StgAtomicallyFrame, we could combine the waiting bit into
+ * the header (maybe a different info tbl for a waiting transaction).
+ * This means we can specialise the code for the atomically frame
+ * (it immediately switches on frame->waiting anyway).
+ */
+
+typedef struct StgTRecHeader_ StgTRecHeader;
+
+typedef struct StgTVarWatchQueue_ {
+ StgHeader header;
+ StgClosure *closure; // StgTSO
+ struct StgTVarWatchQueue_ *next_queue_entry;
+ struct StgTVarWatchQueue_ *prev_queue_entry;
+} StgTVarWatchQueue;
+
+typedef struct {
+ StgHeader header;
+ StgClosure *current_value; /* accessed via atomics */
+ StgTVarWatchQueue *first_watch_queue_entry; /* accessed via atomics */
+ StgInt num_updates; /* accessed via atomics */
+} StgTVar;
+
+/* new_value == expected_value for read-only accesses */
+/* new_value is a StgTVarWatchQueue entry when trec in state TREC_WAITING */
+typedef struct {
+ StgTVar *tvar;
+ StgClosure *expected_value;
+ StgClosure *new_value;
+#if defined(THREADED_RTS)
+ StgInt num_updates;
+#endif
+} TRecEntry;
+
+#define TREC_CHUNK_NUM_ENTRIES 16
+
+typedef struct StgTRecChunk_ {
+ StgHeader header;
+ struct StgTRecChunk_ *prev_chunk;
+ StgWord next_entry_idx;
+ TRecEntry entries[TREC_CHUNK_NUM_ENTRIES];
+} StgTRecChunk;
+
+typedef enum {
+ TREC_ACTIVE, /* Transaction in progress, outcome undecided */
+ TREC_CONDEMNED, /* Transaction in progress, inconsistent / out of date reads */
+ TREC_COMMITTED, /* Transaction has committed, now updating tvars */
+ TREC_ABORTED, /* Transaction has aborted, now reverting tvars */
+ TREC_WAITING, /* Transaction currently waiting */
+} TRecState;
+
+struct StgTRecHeader_ {
+ StgHeader header;
+ struct StgTRecHeader_ *enclosing_trec;
+ StgTRecChunk *current_chunk;
+ TRecState state;
+};
+
+typedef struct {
+ StgHeader header;
+ StgClosure *code;
+ StgClosure *result;
+} StgAtomicallyFrame;
+
+typedef struct {
+ StgHeader header;
+ StgClosure *code;
+ StgClosure *handler;
+} StgCatchSTMFrame;
+
+typedef struct {
+ StgHeader header;
+ StgWord running_alt_code;
+ StgClosure *first_code;
+ StgClosure *alt_code;
+} StgCatchRetryFrame;
+
+/* ----------------------------------------------------------------------------
+ Messages
+ ------------------------------------------------------------------------- */
+
+typedef struct Message_ {
+ StgHeader header;
+ struct Message_ *link;
+} Message;
+
+typedef struct MessageWakeup_ {
+ StgHeader header;
+ Message *link;
+ StgTSO *tso;
+} MessageWakeup;
+
+typedef struct MessageThrowTo_ {
+ StgHeader header;
+ struct MessageThrowTo_ *link;
+ StgTSO *source;
+ StgTSO *target;
+ StgClosure *exception;
+} MessageThrowTo;
+
+typedef struct MessageBlackHole_ {
+ StgHeader header;
+ struct MessageBlackHole_ *link;
+ // here so it looks like an IND, to be able to skip the message without
+ // deleting it (done in throwToMsg())
+ StgTSO *tso;
+ StgClosure *bh;
+} MessageBlackHole;
+
+/* ----------------------------------------------------------------------------
+ Compact Regions
+ ------------------------------------------------------------------------- */
+
+//
+// A compact region is a list of blocks. Each block starts with an
+// StgCompactNFDataBlock structure, and the list is chained through the next
+// field of these structs. (the link field of the bdescr is used to chain
+// together multiple compact region on the compact_objects field of a
+// generation).
+//
+// See Note [Compact Normal Forms] for details
+//
+typedef struct StgCompactNFDataBlock_ {
+ struct StgCompactNFDataBlock_ *self;
+ // the address of this block this is copied over to the
+ // receiving end when serializing a compact, so the receiving
+ // end can allocate the block at best as it can, and then
+ // verify if pointer adjustment is needed or not by comparing
+ // self with the actual address; the same data is sent over as
+ // SerializedCompact metadata, but having it here simplifies
+ // the fixup implementation.
+ struct StgCompactNFData_ *owner;
+ // the closure who owns this block (used in objectGetCompact)
+ struct StgCompactNFDataBlock_ *next;
+ // chain of blocks used for serialization and freeing
+} StgCompactNFDataBlock;
+
+//
+// This is the Compact# primitive object.
+//
+typedef struct StgCompactNFData_ {
+ StgHeader header;
+ // for sanity and other checks in practice, nothing should ever
+ // need the compact info pointer (we don't even need fwding
+ // pointers because it's a large object)
+ StgWord totalW;
+ // Total number of words in all blocks in the compact
+ StgWord autoBlockW;
+ // size of automatically appended blocks
+ StgPtr hp, hpLim;
+ // the beginning and end of the free area in the nursery block. This is
+ // just a convenience so that we can avoid multiple indirections through
+ // the nursery pointer below during compaction.
+ StgCompactNFDataBlock *nursery;
+ // where to (try to) allocate from when appending
+ StgCompactNFDataBlock *last;
+ // the last block of the chain (to know where to append new
+ // blocks for resize)
+ struct hashtable *hash;
+ // the hash table for the current compaction, or NULL if
+ // there's no (sharing-preserved) compaction in progress.
+ StgClosure *result;
+ // Used temporarily to store the result of compaction. Doesn't need to be
+ // a GC root.
+ struct StgCompactNFData_ *link;
+ // Used by compacting GC for linking CNFs with threaded hash tables. See
+ // Note [CNFs in compacting GC] in Compact.c for details.
+} StgCompactNFData;
diff --git a/rts/include/rts/storage/FunTypes.h b/rts/include/rts/storage/FunTypes.h
new file mode 100644
index 0000000000..3b066ef7dc
--- /dev/null
+++ b/rts/include/rts/storage/FunTypes.h
@@ -0,0 +1,54 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2002
+ *
+ * Things for functions.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* generic - function comes with a small bitmap */
+#define ARG_GEN 0
+
+/* generic - function comes with a large bitmap */
+#define ARG_GEN_BIG 1
+
+/* BCO - function is really a BCO */
+#define ARG_BCO 2
+
+/*
+ * Specialised function types: bitmaps and calling sequences
+ * for these functions are pre-generated: see ghc/utils/genapply and
+ * generated code in ghc/rts/AutoApply.cmm.
+ *
+ * NOTE: other places to change if you change this table:
+ * - utils/genapply/Main.hs: stackApplyTypes
+ * - GHC.StgToCmm.Layout: stdPattern
+ */
+#define ARG_NONE 3
+#define ARG_N 4
+#define ARG_P 5
+#define ARG_F 6
+#define ARG_D 7
+#define ARG_L 8
+#define ARG_V16 9
+#define ARG_V32 10
+#define ARG_V64 11
+#define ARG_NN 12
+#define ARG_NP 13
+#define ARG_PN 14
+#define ARG_PP 15
+#define ARG_NNN 16
+#define ARG_NNP 17
+#define ARG_NPN 18
+#define ARG_NPP 19
+#define ARG_PNN 20
+#define ARG_PNP 21
+#define ARG_PPN 22
+#define ARG_PPP 23
+#define ARG_PPPP 24
+#define ARG_PPPPP 25
+#define ARG_PPPPPP 26
+#define ARG_PPPPPPP 27
+#define ARG_PPPPPPPP 28
diff --git a/rts/include/rts/storage/GC.h b/rts/include/rts/storage/GC.h
new file mode 100644
index 0000000000..478503aaee
--- /dev/null
+++ b/rts/include/rts/storage/GC.h
@@ -0,0 +1,261 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * External Storage Manger Interface
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include <stddef.h>
+#include "rts/OSThreads.h"
+
+/* -----------------------------------------------------------------------------
+ * Generational GC
+ *
+ * We support an arbitrary number of generations. Notes (in no particular
+ * order):
+ *
+ * - Objects "age" in the nursery for one GC cycle before being promoted
+ * to the next generation. There is no aging in other generations.
+ *
+ * - generation 0 is the allocation area. It is given
+ * a fixed set of blocks during initialisation, and these blocks
+ * normally stay in G0S0. In parallel execution, each
+ * Capability has its own nursery.
+ *
+ * - during garbage collection, each generation which is an
+ * evacuation destination (i.e. all generations except G0) is
+ * allocated a to-space. evacuated objects are allocated into
+ * the generation's to-space until GC is finished, when the
+ * original generations's contents may be freed and replaced
+ * by the to-space.
+ *
+ * - the mutable-list is per-generation. G0 doesn't have one
+ * (since every garbage collection collects at least G0).
+ *
+ * - block descriptors contain a pointer to the generation that
+ * the block belongs to, for convenience.
+ *
+ * - static objects are stored in per-generation lists. See GC.c for
+ * details of how we collect CAFs in the generational scheme.
+ *
+ * - large objects are per-generation, and are promoted in the
+ * same way as small objects.
+ *
+ * ------------------------------------------------------------------------- */
+
+// A count of blocks needs to store anything up to the size of memory
+// divided by the block size. The safest thing is therefore to use a
+// type that can store the full range of memory addresses,
+// ie. StgWord. Note that we have had some tricky int overflows in a
+// couple of cases caused by using ints rather than longs (e.g. #5086)
+
+typedef StgWord memcount;
+
+typedef struct nursery_ {
+ bdescr * blocks;
+ memcount n_blocks;
+} nursery;
+
+// Nursery invariants:
+//
+// - cap->r.rNursery points to the nursery for this capability
+//
+// - cap->r.rCurrentNursery points to the block in the nursery that we are
+// currently allocating into. While in Haskell the current heap pointer is
+// in Hp, outside Haskell it is stored in cap->r.rCurrentNursery->free.
+//
+// - the blocks *after* cap->rCurrentNursery in the chain are empty
+// (although their bd->free pointers have not been updated to
+// reflect that)
+//
+// - the blocks *before* cap->rCurrentNursery have been used. Except
+// for rCurrentAlloc.
+//
+// - cap->r.rCurrentAlloc is either NULL, or it points to a block in
+// the nursery *before* cap->r.rCurrentNursery.
+//
+// See also Note [allocation accounting] to understand how total
+// memory allocation is tracked.
+
+typedef struct generation_ {
+ uint32_t no; // generation number
+
+ bdescr * blocks; // blocks in this gen
+ memcount n_blocks; // number of blocks
+ memcount n_words; // number of used words
+
+ bdescr * large_objects; // large objects (doubly linked)
+ memcount n_large_blocks; // no. of blocks used by large objs
+ memcount n_large_words; // no. of words used by large objs
+ memcount n_new_large_words; // words of new large objects
+ // (for doYouWantToGC())
+
+ bdescr * compact_objects; // compact objects chain
+ // the second block in each compact is
+ // linked from the closure object, while
+ // the second compact object in the
+ // chain is linked from bd->link (like
+ // large objects)
+ memcount n_compact_blocks; // no. of blocks used by all compacts
+ bdescr * compact_blocks_in_import; // compact objects being imported
+ // (not known to the GC because
+ // potentially invalid, but we
+ // need to keep track of them
+ // to avoid assertions in Sanity)
+ // this is a list shaped like compact_objects
+ memcount n_compact_blocks_in_import; // no. of blocks used by compacts
+ // being imported
+
+ // Max blocks to allocate in this generation before collecting it. Collect
+ // this generation when
+ //
+ // n_blocks + n_large_blocks + n_compact_blocks > max_blocks
+ //
+ memcount max_blocks;
+
+ StgTSO * threads; // threads in this gen
+ // linked via global_link
+ StgWeak * weak_ptr_list; // weak pointers in this gen
+
+ struct generation_ *to; // destination gen for live objects
+
+ // stats information
+ uint32_t collections;
+ uint32_t par_collections;
+ uint32_t failed_promotions; // Currently unused
+
+ // ------------------------------------
+ // Fields below are used during GC only
+
+#if defined(THREADED_RTS)
+ char pad[128]; // make sure the following is
+ // on a separate cache line.
+ SpinLock sync; // lock for large_objects
+ // and scavenged_large_objects
+#endif
+
+ int mark; // mark (not copy)? (old gen only)
+ int compact; // compact (not sweep)? (old gen only)
+
+ // During GC, if we are collecting this gen, blocks and n_blocks
+ // are copied into the following two fields. After GC, these blocks
+ // are freed.
+ bdescr * old_blocks; // bdescr of first from-space block
+ memcount n_old_blocks; // number of blocks in from-space
+ memcount live_estimate; // for sweeping: estimate of live data
+
+ bdescr * scavenged_large_objects; // live large objs after GC (d-link)
+ memcount n_scavenged_large_blocks; // size (not count) of above
+
+ bdescr * live_compact_objects; // live compact objs after GC (d-link)
+ memcount n_live_compact_blocks; // size (not count) of above
+
+ bdescr * bitmap; // bitmap for compacting collection
+
+ StgTSO * old_threads;
+ StgWeak * old_weak_ptr_list;
+} generation;
+
+extern generation * generations;
+extern generation * g0;
+extern generation * oldest_gen;
+
+typedef void(*ListBlocksCb)(void *user, bdescr *);
+void listAllBlocks(ListBlocksCb cb, void *user);
+
+/* -----------------------------------------------------------------------------
+ Generic allocation
+
+ StgPtr allocate(Capability *cap, W_ n)
+ Allocates memory from the nursery in
+ the current Capability.
+
+ StgPtr allocatePinned(Capability *cap, W_ n, W_ alignment, W_ align_off)
+ Allocates a chunk of contiguous store
+ n words long, which is at a fixed
+ address (won't be moved by GC). The
+ word at the byte offset 'align_off'
+ will be aligned to 'alignment', which
+ must be a power of two.
+ Returns a pointer to the first word.
+ Always succeeds.
+
+ NOTE: the GC can't in general handle
+ pinned objects, so allocatePinned()
+ can only be used for ByteArrays at the
+ moment.
+
+ Don't forget to TICK_ALLOC_XXX(...)
+ after calling allocate or
+ allocatePinned, for the
+ benefit of the ticky-ticky profiler.
+
+ -------------------------------------------------------------------------- */
+
+StgPtr allocate ( Capability *cap, W_ n );
+StgPtr allocateMightFail ( Capability *cap, W_ n );
+StgPtr allocatePinned ( Capability *cap, W_ n, W_ alignment, W_ align_off);
+
+/* memory allocator for executable memory */
+typedef void* AdjustorWritable;
+typedef void* AdjustorExecutable;
+
+void flushExec(W_ len, AdjustorExecutable exec_addr);
+
+// Used by GC checks in external .cmm code:
+extern W_ large_alloc_lim;
+
+/* -----------------------------------------------------------------------------
+ Performing Garbage Collection
+ -------------------------------------------------------------------------- */
+
+void performGC(void);
+void performMajorGC(void);
+
+/* -----------------------------------------------------------------------------
+ The CAF table - used to let us revert CAFs in GHCi
+ -------------------------------------------------------------------------- */
+
+StgInd *newCAF (StgRegTable *reg, StgIndStatic *caf);
+StgInd *newRetainedCAF (StgRegTable *reg, StgIndStatic *caf);
+StgInd *newGCdCAF (StgRegTable *reg, StgIndStatic *caf);
+void revertCAFs (void);
+
+// Request that all CAFs are retained indefinitely.
+// (preferably use RtsConfig.keep_cafs instead)
+void setKeepCAFs (void);
+
+// Let the runtime know that all the CAFs in high mem are not
+// to be retained. Useful in conjunction with loadNativeObj
+void setHighMemDynamic (void);
+
+/* -----------------------------------------------------------------------------
+ This is the write barrier for MUT_VARs, a.k.a. IORefs. A
+ MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
+ is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
+ and is put on the mutable list.
+ -------------------------------------------------------------------------- */
+
+void dirty_MUT_VAR(StgRegTable *reg, StgMutVar *mv, StgClosure *old);
+
+/* set to disable CAF garbage collection in GHCi. */
+/* (needed when dynamic libraries are used). */
+extern bool keepCAFs;
+
+#include "rts/Flags.h"
+
+INLINE_HEADER void initBdescr(bdescr *bd, generation *gen, generation *dest)
+{
+ RELAXED_STORE(&bd->gen, gen);
+ RELAXED_STORE(&bd->gen_no, gen->no);
+ RELAXED_STORE(&bd->dest_no, dest->no);
+
+#if !IN_STG_CODE
+ /* See Note [RtsFlags is a pointer in STG code] */
+ ASSERT(gen->no < RtsFlags.GcFlags.generations);
+ ASSERT(dest->no < RtsFlags.GcFlags.generations);
+#endif
+}
diff --git a/rts/include/rts/storage/Heap.h b/rts/include/rts/storage/Heap.h
new file mode 100644
index 0000000000..b3b1efaf9f
--- /dev/null
+++ b/rts/include/rts/storage/Heap.h
@@ -0,0 +1,31 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The University of Glasgow 2006-2017
+ *
+ * Introspection into GHC's heap representation
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#include "rts/storage/Closures.h"
+
+StgMutArrPtrs *heap_view_closurePtrs(Capability *cap, StgClosure *closure);
+
+void heap_view_closure_ptrs_in_pap_payload(StgClosure *ptrs[], StgWord *nptrs
+ , StgClosure *fun, StgClosure **payload, StgWord size);
+
+StgWord heap_view_closureSize(StgClosure *closure);
+
+/*
+ * Collect the pointers of a closure into the given array. The given array should be
+ * large enough to hold all collected pointers e.g.
+ * `heap_view_closureSize(closure)`. Returns the number of pointers collected.
+ * The caller must ensure that `closure` is not modified (or moved by the GC)
+ * for the duration of the call to `collect_pointers`.
+ *
+ * In principle this is
+ * StgWord collect_pointers(StgClosure *closure, StgWord size, StgClosure *ptrs[size]);
+ * but we cannot write this and retain C++ compatibility.
+ */
+StgWord collect_pointers(StgClosure *closure, StgClosure *ptrs[]);
diff --git a/rts/include/rts/storage/InfoTables.h b/rts/include/rts/storage/InfoTables.h
new file mode 100644
index 0000000000..b97e12982b
--- /dev/null
+++ b/rts/include/rts/storage/InfoTables.h
@@ -0,0 +1,405 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2002
+ *
+ * Info Tables
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/* ----------------------------------------------------------------------------
+ Relative pointers
+
+ Several pointer fields in info tables are expressed as offsets
+ relative to the info pointer, so that we can generate
+ position-independent code.
+
+ Note [x86-64-relative]
+ There is a complication on the x86_64 platform, where pointers are
+ 64 bits, but the tools don't support 64-bit relative relocations.
+ However, the default memory model (small) ensures that all symbols
+ have values in the lower 2Gb of the address space, so offsets all
+ fit in 32 bits. Hence we can use 32-bit offset fields.
+
+ Somewhere between binutils-2.16.1 and binutils-2.16.91.0.6,
+ support for 64-bit PC-relative relocations was added, so maybe this
+ hackery can go away sometime.
+ ------------------------------------------------------------------------- */
+
+#if defined(x86_64_HOST_ARCH)
+#define OFFSET_FIELD(n) StgHalfInt n; StgHalfWord __pad_##n
+#else
+#define OFFSET_FIELD(n) StgInt n
+#endif
+
+/* -----------------------------------------------------------------------------
+ Profiling info
+ -------------------------------------------------------------------------- */
+
+typedef struct {
+#if !defined(TABLES_NEXT_TO_CODE)
+ char *closure_type;
+ char *closure_desc;
+#else
+ OFFSET_FIELD(closure_type_off);
+ OFFSET_FIELD(closure_desc_off);
+#endif
+} StgProfInfo;
+
+/* -----------------------------------------------------------------------------
+ Closure flags
+ -------------------------------------------------------------------------- */
+
+/* The type flags provide quick access to certain properties of a closure. */
+
+#define _HNF (1<<0) /* head normal form? */
+#define _BTM (1<<1) /* uses info->layout.bitmap */
+#define _NS (1<<2) /* non-sparkable */
+#define _THU (1<<3) /* thunk? */
+#define _MUT (1<<4) /* mutable? */
+#define _UPT (1<<5) /* unpointed? */
+#define _SRT (1<<6) /* has an SRT? */
+#define _IND (1<<7) /* is an indirection? */
+
+#define isMUTABLE(flags) ((flags) &_MUT)
+#define isBITMAP(flags) ((flags) &_BTM)
+#define isTHUNK(flags) ((flags) &_THU)
+#define isUNPOINTED(flags) ((flags) &_UPT)
+#define hasSRT(flags) ((flags) &_SRT)
+
+extern StgWord16 closure_flags[];
+
+#define closureFlags(c) (closure_flags[get_itbl \
+ (UNTAG_CONST_CLOSURE(c))->type])
+
+#define closure_HNF(c) ( closureFlags(c) & _HNF)
+#define closure_BITMAP(c) ( closureFlags(c) & _BTM)
+#define closure_NON_SPARK(c) ( (closureFlags(c) & _NS))
+#define closure_SHOULD_SPARK(c) (!(closureFlags(c) & _NS))
+#define closure_THUNK(c) ( closureFlags(c) & _THU)
+#define closure_MUTABLE(c) ( closureFlags(c) & _MUT)
+#define closure_UNPOINTED(c) ( closureFlags(c) & _UPT)
+#define closure_SRT(c) ( closureFlags(c) & _SRT)
+#define closure_IND(c) ( closureFlags(c) & _IND)
+
+/* same as above but for info-ptr rather than closure */
+#define ipFlags(ip) (closure_flags[ip->type])
+
+#define ip_HNF(ip) ( ipFlags(ip) & _HNF)
+#define ip_BITMAP(ip) ( ipFlags(ip) & _BTM)
+#define ip_SHOULD_SPARK(ip) (!(ipFlags(ip) & _NS))
+#define ip_THUNK(ip) ( ipFlags(ip) & _THU)
+#define ip_MUTABLE(ip) ( ipFlags(ip) & _MUT)
+#define ip_UNPOINTED(ip) ( ipFlags(ip) & _UPT)
+#define ip_SRT(ip) ( ipFlags(ip) & _SRT)
+#define ip_IND(ip) ( ipFlags(ip) & _IND)
+
+/* -----------------------------------------------------------------------------
+ Bitmaps
+
+ These are used to describe the pointerhood of a sequence of words
+ (usually on the stack) to the garbage collector. The two primary
+ uses are for stack frames, and functions (where we need to describe
+ the layout of a PAP to the GC).
+
+ In these bitmaps: 0 == ptr, 1 == non-ptr.
+ -------------------------------------------------------------------------- */
+
+/*
+ * Small bitmaps: for a small bitmap, we store the size and bitmap in
+ * the same word, using the following macros. If the bitmap doesn't
+ * fit in a single word, we use a pointer to an StgLargeBitmap below.
+ */
+#define MK_SMALL_BITMAP(size,bits) (((bits)<<BITMAP_BITS_SHIFT) | (size))
+
+#define BITMAP_SIZE(bitmap) ((bitmap) & BITMAP_SIZE_MASK)
+#define BITMAP_BITS(bitmap) ((bitmap) >> BITMAP_BITS_SHIFT)
+
+/*
+ * A large bitmap.
+ */
+typedef struct {
+ StgWord size;
+ StgWord bitmap[];
+} StgLargeBitmap;
+
+/* ----------------------------------------------------------------------------
+ Info Tables
+ ------------------------------------------------------------------------- */
+
+/*
+ * Stuff describing the closure layout. Well, actually, it might
+ * contain the selector index for a THUNK_SELECTOR. This union is one
+ * word long.
+ */
+typedef union {
+ struct { /* Heap closure payload layout: */
+ StgHalfWord ptrs; /* number of pointers */
+ StgHalfWord nptrs; /* number of non-pointers */
+ } payload;
+
+ StgWord bitmap; /* word-sized bit pattern describing */
+ /* a stack frame: see below */
+
+#if !defined(TABLES_NEXT_TO_CODE)
+ StgLargeBitmap* large_bitmap; /* pointer to large bitmap structure */
+#else
+ OFFSET_FIELD(large_bitmap_offset); /* offset from info table to large bitmap structure */
+#endif
+
+ StgWord selector_offset; /* used in THUNK_SELECTORs */
+
+} StgClosureInfo;
+
+
+#if defined(x86_64_HOST_ARCH) && defined(TABLES_NEXT_TO_CODE)
+// On x86_64 we can fit a pointer offset in half a word, so put the SRT offset
+// in the info->srt field directly.
+//
+// See the section "Referring to an SRT from the info table" in
+// Note [SRTs] in CmmBuildInfoTables.hs
+#define USE_INLINE_SRT_FIELD
+#endif
+
+#if defined(USE_INLINE_SRT_FIELD)
+// offset to the SRT / closure, or zero if there's no SRT
+typedef StgHalfInt StgSRTField;
+#else
+// non-zero if there is an SRT, the offset is in the optional srt field.
+typedef StgHalfWord StgSRTField;
+#endif
+
+
+/*
+ * The "standard" part of an info table. Every info table has this bit.
+ */
+typedef struct StgInfoTable_ {
+
+#if !defined(TABLES_NEXT_TO_CODE)
+ StgFunPtr entry; /* pointer to the entry code */
+#endif
+
+#if defined(PROFILING)
+ StgProfInfo prof;
+#endif
+
+ StgClosureInfo layout; /* closure layout info (one word) */
+
+ StgHalfWord type; /* closure type */
+ StgSRTField srt;
+ /* In a CONSTR:
+ - the zero-based constructor tag
+ In a FUN/THUNK
+ - if USE_INLINE_SRT_FIELD
+ - offset to the SRT (or zero if no SRT)
+ - otherwise
+ - non-zero if there is an SRT, offset is in srt_offset
+ */
+
+#if defined(TABLES_NEXT_TO_CODE)
+ StgCode code[];
+#endif
+} *StgInfoTablePtr; // StgInfoTable defined in rts/Types.h
+
+
+/* -----------------------------------------------------------------------------
+ Function info tables
+
+ This is the general form of function info tables. The compiler
+ will omit some of the fields in common cases:
+
+ - If fun_type is not ARG_GEN or ARG_GEN_BIG, then the slow_apply
+ and bitmap fields may be left out (they are at the end, so omitting
+ them doesn't affect the layout).
+
+ - If has_srt (in the std info table part) is zero, then the srt
+ field needn't be set. This only applies if the slow_apply and
+ bitmap fields have also been omitted.
+ -------------------------------------------------------------------------- */
+
+/*
+ Note [Encoding static reference tables]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ As static reference tables appear frequently in code, we use a special
+ compact encoding for the common case of a module defining only a few CAFs: We
+ produce one table containing a list of CAFs in the module and then include a
+ bitmap in each info table describing which entries of this table the closure
+ references.
+ */
+
+typedef struct StgFunInfoExtraRev_ {
+ OFFSET_FIELD(slow_apply_offset); /* apply to args on the stack */
+ union {
+ StgWord bitmap;
+ OFFSET_FIELD(bitmap_offset); /* arg ptr/nonptr bitmap */
+ } b;
+#if !defined(USE_INLINE_SRT_FIELD)
+ OFFSET_FIELD(srt_offset); /* pointer to the SRT closure */
+#endif
+ StgHalfWord fun_type; /* function type */
+ StgHalfWord arity; /* function arity */
+} StgFunInfoExtraRev;
+
+typedef struct StgFunInfoExtraFwd_ {
+ StgHalfWord fun_type; /* function type */
+ StgHalfWord arity; /* function arity */
+ StgClosure *srt; /* pointer to the SRT closure */
+ union { /* union for compat. with TABLES_NEXT_TO_CODE version */
+ StgWord bitmap; /* arg ptr/nonptr bitmap */
+ } b;
+ StgFun *slow_apply; /* apply to args on the stack */
+} StgFunInfoExtraFwd;
+
+typedef struct {
+#if defined(TABLES_NEXT_TO_CODE)
+ StgFunInfoExtraRev f;
+ StgInfoTable i;
+#else
+ StgInfoTable i;
+ StgFunInfoExtraFwd f;
+#endif
+} StgFunInfoTable;
+
+// canned bitmap for each arg type, indexed by constants in FunTypes.h
+extern const StgWord stg_arg_bitmaps[];
+
+/* -----------------------------------------------------------------------------
+ Return info tables
+ -------------------------------------------------------------------------- */
+
+/*
+ * When info tables are laid out backwards, we can omit the SRT
+ * pointer iff has_srt is zero.
+ */
+
+typedef struct {
+#if defined(TABLES_NEXT_TO_CODE)
+#if !defined(USE_INLINE_SRT_FIELD)
+ OFFSET_FIELD(srt_offset); /* offset to the SRT closure */
+#endif
+ StgInfoTable i;
+#else
+ StgInfoTable i;
+ StgClosure *srt; /* pointer to the SRT closure */
+#endif
+} StgRetInfoTable;
+
+/* -----------------------------------------------------------------------------
+ Thunk info tables
+ -------------------------------------------------------------------------- */
+
+/*
+ * When info tables are laid out backwards, we can omit the SRT
+ * pointer iff has_srt is zero.
+ */
+
+typedef struct StgThunkInfoTable_ {
+#if defined(TABLES_NEXT_TO_CODE)
+#if !defined(USE_INLINE_SRT_FIELD)
+ OFFSET_FIELD(srt_offset); /* offset to the SRT closure */
+#endif
+ StgInfoTable i;
+#else
+ StgInfoTable i;
+ StgClosure *srt; /* pointer to the SRT closure */
+#endif
+} StgThunkInfoTable;
+
+/* -----------------------------------------------------------------------------
+ Constructor info tables
+ -------------------------------------------------------------------------- */
+
+typedef struct StgConInfoTable_ {
+#if !defined(TABLES_NEXT_TO_CODE)
+ StgInfoTable i;
+#endif
+
+#if defined(TABLES_NEXT_TO_CODE)
+ OFFSET_FIELD(con_desc); // the name of the data constructor
+ // as: Package:Module.Name
+#else
+ char *con_desc;
+#endif
+
+#if defined(TABLES_NEXT_TO_CODE)
+ StgInfoTable i;
+#endif
+} StgConInfoTable;
+
+
+/* -----------------------------------------------------------------------------
+ Accessor macros for fields that might be offsets (C version)
+ -------------------------------------------------------------------------- */
+
+/*
+ * GET_SRT(info)
+ * info must be a Stg[Ret|Thunk]InfoTable* (an info table that has a SRT)
+ */
+#if defined(TABLES_NEXT_TO_CODE)
+#if defined(x86_64_HOST_ARCH)
+#define GET_SRT(info) \
+ ((StgClosure*) (((StgWord) ((info)+1)) + (info)->i.srt))
+#else
+#define GET_SRT(info) \
+ ((StgClosure*) (((StgWord) ((info)+1)) + (info)->srt_offset))
+#endif
+#else // !TABLES_NEXT_TO_CODE
+#define GET_SRT(info) ((info)->srt)
+#endif
+
+/*
+ * GET_CON_DESC(info)
+ * info must be a StgConInfoTable*.
+ */
+#if defined(TABLES_NEXT_TO_CODE)
+#define GET_CON_DESC(info) \
+ ((const char *)((StgWord)((info)+1) + ((info)->con_desc)))
+#else
+#define GET_CON_DESC(info) ((const char *)(info)->con_desc)
+#endif
+
+/*
+ * GET_FUN_SRT(info)
+ * info must be a StgFunInfoTable*
+ */
+#if defined(TABLES_NEXT_TO_CODE)
+#if defined(x86_64_HOST_ARCH)
+#define GET_FUN_SRT(info) \
+ ((StgClosure*) (((StgWord) ((info)+1)) + (info)->i.srt))
+#else
+#define GET_FUN_SRT(info) \
+ ((StgClosure*) (((StgWord) ((info)+1)) + (info)->f.srt_offset))
+#endif
+#else
+#define GET_FUN_SRT(info) ((info)->f.srt)
+#endif
+
+#if defined(TABLES_NEXT_TO_CODE)
+#define GET_LARGE_BITMAP(info) ((StgLargeBitmap*) (((StgWord) ((info)+1)) \
+ + (info)->layout.large_bitmap_offset))
+#else
+#define GET_LARGE_BITMAP(info) ((info)->layout.large_bitmap)
+#endif
+
+#if defined(TABLES_NEXT_TO_CODE)
+#define GET_FUN_LARGE_BITMAP(info) ((StgLargeBitmap*) (((StgWord) ((info)+1)) \
+ + (info)->f.b.bitmap_offset))
+#else
+#define GET_FUN_LARGE_BITMAP(info) ((StgLargeBitmap*) ((info)->f.b.bitmap))
+#endif
+
+/*
+ * GET_PROF_TYPE, GET_PROF_DESC
+ */
+#if defined(TABLES_NEXT_TO_CODE)
+#define GET_PROF_TYPE(info) ((char *)((StgWord)((info)+1) + (info->prof.closure_type_off)))
+#else
+#define GET_PROF_TYPE(info) ((info)->prof.closure_type)
+#endif
+#if defined(TABLES_NEXT_TO_CODE)
+#define GET_PROF_DESC(info) ((char *)((StgWord)((info)+1) + (info->prof.closure_desc_off)))
+#else
+#define GET_PROF_DESC(info) ((info)->prof.closure_desc)
+#endif
diff --git a/rts/include/rts/storage/MBlock.h b/rts/include/rts/storage/MBlock.h
new file mode 100644
index 0000000000..3acefda9a0
--- /dev/null
+++ b/rts/include/rts/storage/MBlock.h
@@ -0,0 +1,32 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2008
+ *
+ * MegaBlock Allocator interface.
+ *
+ * See wiki commentary at
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/heap-alloced
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+extern W_ peak_mblocks_allocated;
+extern W_ mblocks_allocated;
+
+extern void initMBlocks(void);
+extern void * getMBlock(void);
+extern void * getMBlocks(uint32_t n);
+extern void * getMBlockOnNode(uint32_t node);
+extern void * getMBlocksOnNode(uint32_t node, uint32_t n);
+extern void freeMBlocks(void *addr, uint32_t n);
+extern void releaseFreeMemory(void);
+extern void freeAllMBlocks(void);
+
+extern void *getFirstMBlock(void **state);
+extern void *getNextMBlock(void **state, void *mblock);
+
+#if defined(THREADED_RTS)
+// needed for HEAP_ALLOCED below
+extern SpinLock gc_alloc_block_sync;
+#endif
diff --git a/rts/include/rts/storage/TSO.h b/rts/include/rts/storage/TSO.h
new file mode 100644
index 0000000000..61215d9f38
--- /dev/null
+++ b/rts/include/rts/storage/TSO.h
@@ -0,0 +1,330 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * The definitions for Thread State Objects.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/*
+ * PROFILING info in a TSO
+ */
+typedef struct {
+ CostCentreStack *cccs; /* thread's current CCS */
+} StgTSOProfInfo;
+
+/*
+ * There is no TICKY info in a TSO at this time.
+ */
+
+/*
+ * Thread IDs are 64 bits.
+ */
+typedef StgWord64 StgThreadID;
+
+#define tsoLocked(tso) ((tso)->flags & TSO_LOCKED)
+
+/*
+ * Type returned after running a thread. Values of this type
+ * include HeapOverflow, StackOverflow etc. See Constants.h for the
+ * full list.
+ */
+typedef unsigned int StgThreadReturnCode;
+
+#if defined(mingw32_HOST_OS)
+/* results from an async I/O request + its request ID. */
+typedef struct {
+ unsigned int reqID;
+ int len;
+ int errCode;
+} StgAsyncIOResult;
+#endif
+
+/* Reason for thread being blocked. See comment above struct StgTso_. */
+typedef union {
+ StgClosure *closure;
+ StgTSO *prev; // a back-link when the TSO is on the run queue (NotBlocked)
+ struct MessageBlackHole_ *bh;
+ struct MessageThrowTo_ *throwto;
+ struct MessageWakeup_ *wakeup;
+ StgInt fd; /* StgInt instead of int, so that it's the same size as the ptrs */
+#if defined(mingw32_HOST_OS)
+ StgAsyncIOResult *async_result;
+#endif
+#if !defined(THREADED_RTS)
+ StgWord target;
+ // Only for the non-threaded RTS: the target time for a thread
+ // blocked in threadDelay, in units of 1ms. This is a
+ // compromise: we don't want to take up much space in the TSO. If
+ // you want better resolution for threadDelay, use -threaded.
+#endif
+} StgTSOBlockInfo;
+
+
+/*
+ * TSOs live on the heap, and therefore look just like heap objects.
+ * Large TSOs will live in their own "block group" allocated by the
+ * storage manager, and won't be copied during garbage collection.
+ */
+
+/*
+ * Threads may be blocked for several reasons. A blocked thread will
+ * have the reason in the why_blocked field of the TSO, and some
+ * further info (such as the closure the thread is blocked on, or the
+ * file descriptor if the thread is waiting on I/O) in the block_info
+ * field.
+ */
+
+typedef struct StgTSO_ {
+ StgHeader header;
+
+ /* The link field, for linking threads together in lists (e.g. the
+ run queue on a Capability.
+ */
+ struct StgTSO_* _link;
+ /*
+ Currently used for linking TSOs on:
+ * cap->run_queue_{hd,tl}
+ * (non-THREADED_RTS); the blocked_queue
+ * and pointing to the next chunk for a ThreadOldStack
+
+ NOTE!!! do not modify _link directly, it is subject to
+ a write barrier for generational GC. Instead use the
+ setTSOLink() function. Exceptions to this rule are:
+
+ * setting the link field to END_TSO_QUEUE
+ * setting the link field of the currently running TSO, as it
+ will already be dirty.
+ */
+
+ struct StgTSO_* global_link; // Links threads on the
+ // generation->threads lists
+
+ /*
+ * The thread's stack
+ */
+ struct StgStack_ *stackobj;
+
+ /*
+ * The tso->dirty flag indicates that this TSO's stack should be
+ * scanned during garbage collection. It also indicates that this
+ * TSO is on the mutable list.
+ *
+ * NB. The dirty flag gets a word to itself, so that it can be set
+ * safely by multiple threads simultaneously (the flags field is
+ * not safe for this purpose; see #3429). It is harmless for the
+ * TSO to be on the mutable list multiple times.
+ *
+ * tso->dirty is set by dirty_TSO(), and unset by the garbage
+ * collector (only).
+ */
+
+ StgWord16 what_next; // Values defined in Constants.h
+ StgWord16 why_blocked; // Values defined in Constants.h
+ StgWord32 flags; // Values defined in Constants.h
+ StgTSOBlockInfo block_info;
+ StgThreadID id;
+ StgWord32 saved_errno;
+ StgWord32 dirty; /* non-zero => dirty */
+ struct InCall_* bound;
+ struct Capability_* cap;
+
+ struct StgTRecHeader_ * trec; /* STM transaction record */
+
+ /*
+ * A list of threads blocked on this TSO waiting to throw exceptions.
+ */
+ struct MessageThrowTo_ * blocked_exceptions;
+
+ /*
+ * A list of StgBlockingQueue objects, representing threads
+ * blocked on thunks that are under evaluation by this thread.
+ */
+ struct StgBlockingQueue_ *bq;
+
+ /*
+ * The allocation limit for this thread, which is updated as the
+ * thread allocates. If the value drops below zero, and
+ * TSO_ALLOC_LIMIT is set in flags, we raise an exception in the
+ * thread, and give the thread a little more space to handle the
+ * exception before we raise the exception again.
+ *
+ * This is an integer, because we might update it in a place where
+ * it isn't convenient to raise the exception, so we want it to
+ * stay negative until we get around to checking it.
+ *
+ * Use only PK_Int64/ASSIGN_Int64 macros to get/set the value of alloc_limit
+ * in C code otherwise you will cause alignment issues on SPARC
+ */
+ StgInt64 alloc_limit; /* in bytes */
+
+ /*
+ * sum of the sizes of all stack chunks (in words), used to decide
+ * whether to throw the StackOverflow exception when the stack
+ * overflows, or whether to just chain on another stack chunk.
+ *
+ * Note that this overestimates the real stack size, because each
+ * chunk will have a gap at the end, of +RTS -kb<size> words.
+ * This means stack overflows are not entirely accurate, because
+ * the more gaps there are, the sooner the stack will run into the
+ * hard +RTS -K<size> limit.
+ */
+ StgWord32 tot_stack_size;
+
+#if defined(TICKY_TICKY)
+ /* TICKY-specific stuff would go here. */
+#endif
+#if defined(PROFILING)
+ StgTSOProfInfo prof;
+#endif
+#if defined(mingw32_HOST_OS)
+ StgWord32 saved_winerror;
+#endif
+
+} *StgTSOPtr; // StgTSO defined in rts/Types.h
+
+/* Note [StgStack dirtiness flags and concurrent marking]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Without concurrent collection by the nonmoving collector the stack dirtiness story
+ * is quite simple: The stack is either STACK_DIRTY (meaning it has been added to mut_list)
+ * or not.
+ *
+ * However, things are considerably more complicated with concurrent collection
+ * (namely, when nonmoving_write_barrier_enabled is set): In addition to adding
+ * the stack to mut_list and flagging it as STACK_DIRTY, we also must ensure
+ * that stacks are marked in accordance with the nonmoving collector's snapshot
+ * invariant. This is: every stack alive at the time the snapshot is taken must
+ * be marked at some point after the moment the snapshot is taken and before it
+ * is mutated or the commencement of the sweep phase.
+ *
+ * This marking may be done by the concurrent mark phase (in the case of a
+ * thread that never runs during the concurrent mark) or by the mutator when
+ * dirtying the stack. However, it is unsafe for the concurrent collector to
+ * traverse the stack while it is under mutation. Consequently, the following
+ * handshake is obeyed by the mutator's write barrier and the concurrent mark to
+ * ensure this doesn't happen:
+ *
+ * 1. The entity seeking to mark first checks that the stack lives in the nonmoving
+ * generation; if not then the stack was not alive at the time the snapshot
+ * was taken and therefore we need not mark it.
+ *
+ * 2. The entity seeking to mark checks the stack's mark bit. If it is set then
+ * no mark is necessary.
+ *
+ * 3. The entity seeking to mark tries to lock the stack for marking by
+ * atomically setting its `marking` field to the current non-moving mark
+ * epoch:
+ *
+ * a. If the mutator finds the concurrent collector has already locked the
+ * stack then it waits until it is finished (indicated by the mark bit
+ * being set) before proceeding with execution.
+ *
+ * b. If the concurrent collector finds that the mutator has locked the stack
+ * then it moves on, leaving the mutator to mark it. There is no need to wait;
+ * the mark is guaranteed to finish before sweep due to the post-mark
+ * synchronization with mutators.
+ *
+ * c. Whoever succeeds in locking the stack is responsible for marking it and
+ * setting the stack's mark bit (either the BF_MARKED bit for large objects
+ * or otherwise its bit in its segment's mark bitmap).
+ *
+ * To ensure that mutation does not proceed until the stack is fully marked the
+ * mark phase must not set the mark bit until it has finished tracing.
+ *
+ */
+
+#define STACK_DIRTY 1
+// used by sanity checker to verify that all dirty stacks are on the mutable list
+#define STACK_SANE 64
+
+typedef struct StgStack_ {
+ StgHeader header;
+
+ /* Size of the `stack` field in *words*. This is not affected by how much of
+ * the stack space is used, nor if more stack space is linked to by an
+ * UNDERFLOW_FRAME.
+ */
+ StgWord32 stack_size;
+
+ StgWord8 dirty; // non-zero => dirty
+ StgWord8 marking; // non-zero => someone is currently marking the stack
+
+ /* Pointer to the "top" of the stack i.e. the most recently written address.
+ * The stack is filled downwards, so the "top" of the stack starts with `sp
+ * = stack + stack_size` and is decremented as the stack fills with data.
+ * See comment on "Invariants" below.
+ */
+ StgPtr sp;
+ StgWord stack[];
+} StgStack;
+
+// Calculate SpLim from a TSO (reads tso->stackobj, but no fields from
+// the stackobj itself).
+INLINE_HEADER StgPtr tso_SpLim (StgTSO* tso)
+{
+ return tso->stackobj->stack + RESERVED_STACK_WORDS;
+}
+
+/* -----------------------------------------------------------------------------
+ functions
+ -------------------------------------------------------------------------- */
+
+void dirty_TSO (Capability *cap, StgTSO *tso);
+void setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target);
+void setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target);
+
+void dirty_STACK (Capability *cap, StgStack *stack);
+
+/* -----------------------------------------------------------------------------
+ Invariants:
+
+ An active thread has the following properties:
+
+ tso->stack < tso->sp < tso->stack+tso->stack_size
+ tso->stack_size <= tso->max_stack_size
+
+ RESERVED_STACK_WORDS is large enough for any heap-check or
+ stack-check failure.
+
+ The size of the TSO struct plus the stack is either
+ (a) smaller than a block, or
+ (b) a multiple of BLOCK_SIZE
+
+ tso->why_blocked tso->block_info location
+ ----------------------------------------------------------------------
+ NotBlocked END_TSO_QUEUE runnable_queue, or running
+
+ BlockedOnBlackHole MessageBlackHole * TSO->bq
+
+ BlockedOnMVar the MVAR the MVAR's queue
+ BlockedOnIOCompletion the PortEVent the IOCP's queue
+
+ BlockedOnSTM END_TSO_QUEUE STM wait queue(s)
+ BlockedOnSTM STM_AWOKEN run queue
+
+ BlockedOnMsgThrowTo MessageThrowTo * TSO->blocked_exception
+
+ BlockedOnRead NULL blocked_queue
+ BlockedOnWrite NULL blocked_queue
+ BlockedOnDelay NULL blocked_queue
+
+ tso->link == END_TSO_QUEUE, if the thread is currently running.
+
+ A zombie thread has the following properties:
+
+ tso->what_next == ThreadComplete or ThreadKilled
+ tso->link == (could be on some queue somewhere)
+ tso->sp == tso->stack + tso->stack_size - 1 (i.e. top stack word)
+ tso->sp[0] == return value of thread, if what_next == ThreadComplete,
+ exception , if what_next == ThreadKilled
+
+ (tso->sp is left pointing at the top word on the stack so that
+ the return value or exception will be retained by a GC).
+
+ ---------------------------------------------------------------------------- */
+
+/* this is the NIL ptr for a TSO queue (e.g. runnable queue) */
+#define END_TSO_QUEUE ((StgTSO *)(void*)&stg_END_TSO_QUEUE_closure)
diff --git a/rts/include/stg/DLL.h b/rts/include/stg/DLL.h
new file mode 100644
index 0000000000..11902fc239
--- /dev/null
+++ b/rts/include/stg/DLL.h
@@ -0,0 +1,72 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Support for Windows DLLs.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(COMPILING_WINDOWS_DLL)
+# if defined(x86_64_HOST_ARCH)
+# define DLL_IMPORT_DATA_REF(x) (__imp_##x)
+# define DLL_IMPORT_DATA_VARNAME(x) *__imp_##x
+# else
+# define DLL_IMPORT_DATA_REF(x) (_imp__##x)
+# define DLL_IMPORT_DATA_VARNAME(x) *_imp__##x
+# endif
+# if __GNUC__ && !defined(__declspec)
+# define DLLIMPORT
+# else
+# define DLLIMPORT __declspec(dllimport)
+# if defined(x86_64_HOST_ARCH)
+# define DLLIMPORT_DATA(x) __imp_##x
+# else
+# define DLLIMPORT_DATA(x) _imp__##x
+# endif
+# endif
+#else
+# define DLL_IMPORT_DATA_REF(x) (&(x))
+# define DLL_IMPORT_DATA_VARNAME(x) x
+# define DLLIMPORT
+#endif
+
+/* The view of the rts/include/ header files differ ever so
+ slightly depending on whether the RTS is being compiled
+ or not - so we're forced to distinguish between two.
+ [oh, you want details :) : Data symbols defined by the RTS
+ have to be accessed through an extra level of indirection
+ when compiling generated .hc code compared to when the RTS
+ sources are being processed. This is only the case when
+ using Win32 DLLs. ]
+*/
+#if defined(COMPILING_RTS)
+#define DLL_IMPORT DLLIMPORT
+#define DLL_IMPORT_RTS
+#define DLL_IMPORT_DATA_VAR(x) x
+#else
+#define DLL_IMPORT
+#define DLL_IMPORT_RTS DLLIMPORT
+# if defined(COMPILING_WINDOWS_DLL)
+# if defined(x86_64_HOST_ARCH)
+# define DLL_IMPORT_DATA_VAR(x) __imp_##x
+# else
+# define DLL_IMPORT_DATA_VAR(x) _imp__##x
+# endif
+# else
+# define DLL_IMPORT_DATA_VAR(x) x
+# endif
+#endif
+
+
+#if defined(COMPILING_STDLIB)
+#define DLL_IMPORT_STDLIB
+#else
+#define DLL_IMPORT_STDLIB DLLIMPORT
+#endif
diff --git a/rts/include/stg/MachRegs.h b/rts/include/stg/MachRegs.h
new file mode 100644
index 0000000000..d50969b66a
--- /dev/null
+++ b/rts/include/stg/MachRegs.h
@@ -0,0 +1,854 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2014
+ *
+ * Registers used in STG code. Might or might not correspond to
+ * actual machine registers.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/* This file is #included into Haskell code in the compiler: #defines
+ * only in here please.
+ */
+
+/*
+ * Undefine these as a precaution: some of them were found to be
+ * defined by system headers on ARM/Linux.
+ */
+#undef REG_R1
+#undef REG_R2
+#undef REG_R3
+#undef REG_R4
+#undef REG_R5
+#undef REG_R6
+#undef REG_R7
+#undef REG_R8
+#undef REG_R9
+#undef REG_R10
+
+/*
+ * Defining MACHREGS_NO_REGS to 1 causes no global registers to be used.
+ * MACHREGS_NO_REGS is typically controlled by NO_REGS, which is
+ * typically defined by GHC, via a command-line option passed to gcc,
+ * when the -funregisterised flag is given.
+ *
+ * NB. When MACHREGS_NO_REGS to 1, calling & return conventions may be
+ * different. For example, all function arguments will be passed on
+ * the stack, and components of an unboxed tuple will be returned on
+ * the stack rather than in registers.
+ */
+#if MACHREGS_NO_REGS == 1
+
+/* Nothing */
+
+#elif MACHREGS_NO_REGS == 0
+
+/* ----------------------------------------------------------------------------
+ Caller saves and callee-saves regs.
+
+ Caller-saves regs have to be saved around C-calls made from STG
+ land, so this file defines CALLER_SAVES_<reg> for each <reg> that
+ is designated caller-saves in that machine's C calling convention.
+
+ As it stands, the only registers that are ever marked caller saves
+ are the RX, FX, DX and USER registers; as a result, if you
+ decide to caller save a system register (e.g. SP, HP, etc), note that
+ this code path is completely untested! -- EZY
+
+ See Note [Register parameter passing] for details.
+ -------------------------------------------------------------------------- */
+
+/* -----------------------------------------------------------------------------
+ The x86 register mapping
+
+ Ok, we've only got 6 general purpose registers, a frame pointer and a
+ stack pointer. \tr{%eax} and \tr{%edx} are return values from C functions,
+ hence they get trashed across ccalls and are caller saves. \tr{%ebx},
+ \tr{%esi}, \tr{%edi}, \tr{%ebp} are all callee-saves.
+
+ Reg STG-Reg
+ ---------------
+ ebx Base
+ ebp Sp
+ esi R1
+ edi Hp
+
+ Leaving SpLim out of the picture.
+ -------------------------------------------------------------------------- */
+
+#if defined(MACHREGS_i386)
+
+#define REG(x) __asm__("%" #x)
+
+#if !defined(not_doing_dynamic_linking)
+#define REG_Base ebx
+#endif
+#define REG_Sp ebp
+
+#if !defined(STOLEN_X86_REGS)
+#define STOLEN_X86_REGS 4
+#endif
+
+#if STOLEN_X86_REGS >= 3
+# define REG_R1 esi
+#endif
+
+#if STOLEN_X86_REGS >= 4
+# define REG_Hp edi
+#endif
+#define REG_MachSp esp
+
+#define REG_XMM1 xmm0
+#define REG_XMM2 xmm1
+#define REG_XMM3 xmm2
+#define REG_XMM4 xmm3
+
+#define REG_YMM1 ymm0
+#define REG_YMM2 ymm1
+#define REG_YMM3 ymm2
+#define REG_YMM4 ymm3
+
+#define REG_ZMM1 zmm0
+#define REG_ZMM2 zmm1
+#define REG_ZMM3 zmm2
+#define REG_ZMM4 zmm3
+
+#define MAX_REAL_VANILLA_REG 1 /* always, since it defines the entry conv */
+#define MAX_REAL_FLOAT_REG 0
+#define MAX_REAL_DOUBLE_REG 0
+#define MAX_REAL_LONG_REG 0
+#define MAX_REAL_XMM_REG 4
+#define MAX_REAL_YMM_REG 4
+#define MAX_REAL_ZMM_REG 4
+
+/* -----------------------------------------------------------------------------
+ The x86-64 register mapping
+
+ %rax caller-saves, don't steal this one
+ %rbx YES
+ %rcx arg reg, caller-saves
+ %rdx arg reg, caller-saves
+ %rsi arg reg, caller-saves
+ %rdi arg reg, caller-saves
+ %rbp YES (our *prime* register)
+ %rsp (unavailable - stack pointer)
+ %r8 arg reg, caller-saves
+ %r9 arg reg, caller-saves
+ %r10 caller-saves
+ %r11 caller-saves
+ %r12 YES
+ %r13 YES
+ %r14 YES
+ %r15 YES
+
+ %xmm0-7 arg regs, caller-saves
+ %xmm8-15 caller-saves
+
+ Use the caller-saves regs for Rn, because we don't always have to
+ save those (as opposed to Sp/Hp/SpLim etc. which always have to be
+ saved).
+
+ --------------------------------------------------------------------------- */
+
+#elif defined(MACHREGS_x86_64)
+
+#define REG(x) __asm__("%" #x)
+
+#define REG_Base r13
+#define REG_Sp rbp
+#define REG_Hp r12
+#define REG_R1 rbx
+#define REG_R2 r14
+#define REG_R3 rsi
+#define REG_R4 rdi
+#define REG_R5 r8
+#define REG_R6 r9
+#define REG_SpLim r15
+#define REG_MachSp rsp
+
+/*
+Map both Fn and Dn to register xmmn so that we can pass a function any
+combination of up to six Float# or Double# arguments without touching
+the stack. See Note [Overlapping global registers] for implications.
+*/
+
+#define REG_F1 xmm1
+#define REG_F2 xmm2
+#define REG_F3 xmm3
+#define REG_F4 xmm4
+#define REG_F5 xmm5
+#define REG_F6 xmm6
+
+#define REG_D1 xmm1
+#define REG_D2 xmm2
+#define REG_D3 xmm3
+#define REG_D4 xmm4
+#define REG_D5 xmm5
+#define REG_D6 xmm6
+
+#define REG_XMM1 xmm1
+#define REG_XMM2 xmm2
+#define REG_XMM3 xmm3
+#define REG_XMM4 xmm4
+#define REG_XMM5 xmm5
+#define REG_XMM6 xmm6
+
+#define REG_YMM1 ymm1
+#define REG_YMM2 ymm2
+#define REG_YMM3 ymm3
+#define REG_YMM4 ymm4
+#define REG_YMM5 ymm5
+#define REG_YMM6 ymm6
+
+#define REG_ZMM1 zmm1
+#define REG_ZMM2 zmm2
+#define REG_ZMM3 zmm3
+#define REG_ZMM4 zmm4
+#define REG_ZMM5 zmm5
+#define REG_ZMM6 zmm6
+
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_R3
+#define CALLER_SAVES_R4
+#endif
+#define CALLER_SAVES_R5
+#define CALLER_SAVES_R6
+
+#define CALLER_SAVES_F1
+#define CALLER_SAVES_F2
+#define CALLER_SAVES_F3
+#define CALLER_SAVES_F4
+#define CALLER_SAVES_F5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_F6
+#endif
+
+#define CALLER_SAVES_D1
+#define CALLER_SAVES_D2
+#define CALLER_SAVES_D3
+#define CALLER_SAVES_D4
+#define CALLER_SAVES_D5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_D6
+#endif
+
+#define CALLER_SAVES_XMM1
+#define CALLER_SAVES_XMM2
+#define CALLER_SAVES_XMM3
+#define CALLER_SAVES_XMM4
+#define CALLER_SAVES_XMM5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_XMM6
+#endif
+
+#define CALLER_SAVES_YMM1
+#define CALLER_SAVES_YMM2
+#define CALLER_SAVES_YMM3
+#define CALLER_SAVES_YMM4
+#define CALLER_SAVES_YMM5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_YMM6
+#endif
+
+#define CALLER_SAVES_ZMM1
+#define CALLER_SAVES_ZMM2
+#define CALLER_SAVES_ZMM3
+#define CALLER_SAVES_ZMM4
+#define CALLER_SAVES_ZMM5
+#if !defined(mingw32_HOST_OS)
+#define CALLER_SAVES_ZMM6
+#endif
+
+#define MAX_REAL_VANILLA_REG 6
+#define MAX_REAL_FLOAT_REG 6
+#define MAX_REAL_DOUBLE_REG 6
+#define MAX_REAL_LONG_REG 0
+#define MAX_REAL_XMM_REG 6
+#define MAX_REAL_YMM_REG 6
+#define MAX_REAL_ZMM_REG 6
+
+/* -----------------------------------------------------------------------------
+ The PowerPC register mapping
+
+ 0 system glue? (caller-save, volatile)
+ 1 SP (callee-save, non-volatile)
+ 2 AIX, powerpc64-linux:
+ RTOC (a strange special case)
+ powerpc32-linux:
+ reserved for use by system
+
+ 3-10 args/return (caller-save, volatile)
+ 11,12 system glue? (caller-save, volatile)
+ 13 on 64-bit: reserved for thread state pointer
+ on 32-bit: (callee-save, non-volatile)
+ 14-31 (callee-save, non-volatile)
+
+ f0 (caller-save, volatile)
+ f1-f13 args/return (caller-save, volatile)
+ f14-f31 (callee-save, non-volatile)
+
+ \tr{14}--\tr{31} are wonderful callee-save registers on all ppc OSes.
+ \tr{0}--\tr{12} are caller-save registers.
+
+ \tr{%f14}--\tr{%f31} are callee-save floating-point registers.
+
+ We can do the Whole Business with callee-save registers only!
+ -------------------------------------------------------------------------- */
+
+#elif defined(MACHREGS_powerpc)
+
+#define REG(x) __asm__(#x)
+
+#define REG_R1 r14
+#define REG_R2 r15
+#define REG_R3 r16
+#define REG_R4 r17
+#define REG_R5 r18
+#define REG_R6 r19
+#define REG_R7 r20
+#define REG_R8 r21
+#define REG_R9 r22
+#define REG_R10 r23
+
+#define REG_F1 fr14
+#define REG_F2 fr15
+#define REG_F3 fr16
+#define REG_F4 fr17
+#define REG_F5 fr18
+#define REG_F6 fr19
+
+#define REG_D1 fr20
+#define REG_D2 fr21
+#define REG_D3 fr22
+#define REG_D4 fr23
+#define REG_D5 fr24
+#define REG_D6 fr25
+
+#define REG_Sp r24
+#define REG_SpLim r25
+#define REG_Hp r26
+#define REG_Base r27
+
+#define MAX_REAL_FLOAT_REG 6
+#define MAX_REAL_DOUBLE_REG 6
+
+/* -----------------------------------------------------------------------------
+ The Sun SPARC register mapping
+
+ !! IMPORTANT: if you change this register mapping you must also update
+ compiler/GHC/CmmToAsm/SPARC/Regs.hs. That file handles the
+ mapping for the NCG. This one only affects via-c code.
+
+ The SPARC register (window) story: Remember, within the Haskell
+ Threaded World, we essentially ``shut down'' the register-window
+ mechanism---the window doesn't move at all while in this World. It
+ *does* move, of course, if we call out to arbitrary~C...
+
+ The %i, %l, and %o registers (8 each) are the input, local, and
+ output registers visible in one register window. The 8 %g (global)
+ registers are visible all the time.
+
+ zero: always zero
+ scratch: volatile across C-fn calls. used by linker.
+ app: usable by application
+ system: reserved for system
+
+ alloc: allocated to in the register allocator, intra-closure only
+
+ GHC usage v8 ABI v9 ABI
+ Global
+ %g0 zero zero zero
+ %g1 alloc scratch scrach
+ %g2 alloc app app
+ %g3 alloc app app
+ %g4 alloc app scratch
+ %g5 system scratch
+ %g6 system system
+ %g7 system system
+
+ Output: can be zapped by callee
+ %o0-o5 alloc caller saves
+ %o6 C stack ptr
+ %o7 C ret addr
+
+ Local: maintained by register windowing mechanism
+ %l0 alloc
+ %l1 R1
+ %l2 R2
+ %l3 R3
+ %l4 R4
+ %l5 R5
+ %l6 alloc
+ %l7 alloc
+
+ Input
+ %i0 Sp
+ %i1 Base
+ %i2 SpLim
+ %i3 Hp
+ %i4 alloc
+ %i5 R6
+ %i6 C frame ptr
+ %i7 C ret addr
+
+ The paired nature of the floating point registers causes complications for
+ the native code generator. For convenience, we pretend that the first 22
+ fp regs %f0 .. %f21 are actually 11 double regs, and the remaining 10 are
+ float (single) regs. The NCG acts accordingly. That means that the
+ following FP assignment is rather fragile, and should only be changed
+ with extreme care. The current scheme is:
+
+ %f0 /%f1 FP return from C
+ %f2 /%f3 D1
+ %f4 /%f5 D2
+ %f6 /%f7 ncg double spill tmp #1
+ %f8 /%f9 ncg double spill tmp #2
+ %f10/%f11 allocatable
+ %f12/%f13 allocatable
+ %f14/%f15 allocatable
+ %f16/%f17 allocatable
+ %f18/%f19 allocatable
+ %f20/%f21 allocatable
+
+ %f22 F1
+ %f23 F2
+ %f24 F3
+ %f25 F4
+ %f26 ncg single spill tmp #1
+ %f27 ncg single spill tmp #2
+ %f28 allocatable
+ %f29 allocatable
+ %f30 allocatable
+ %f31 allocatable
+
+ -------------------------------------------------------------------------- */
+
+#elif defined(MACHREGS_sparc)
+
+#define REG(x) __asm__("%" #x)
+
+#define CALLER_SAVES_USER
+
+#define CALLER_SAVES_F1
+#define CALLER_SAVES_F2
+#define CALLER_SAVES_F3
+#define CALLER_SAVES_F4
+#define CALLER_SAVES_D1
+#define CALLER_SAVES_D2
+
+#define REG_R1 l1
+#define REG_R2 l2
+#define REG_R3 l3
+#define REG_R4 l4
+#define REG_R5 l5
+#define REG_R6 i5
+
+#define REG_F1 f22
+#define REG_F2 f23
+#define REG_F3 f24
+#define REG_F4 f25
+
+/* for each of the double arg regs,
+ Dn_2 is the high half. */
+
+#define REG_D1 f2
+#define REG_D1_2 f3
+
+#define REG_D2 f4
+#define REG_D2_2 f5
+
+#define REG_Sp i0
+#define REG_SpLim i2
+
+#define REG_Hp i3
+
+#define REG_Base i1
+
+#define NCG_FirstFloatReg f22
+
+/* -----------------------------------------------------------------------------
+ The ARM EABI register mapping
+
+ Here we consider ARM mode (i.e. 32bit isns)
+ and also CPU with full VFPv3 implementation
+
+ ARM registers (see Chapter 5.1 in ARM IHI 0042D and
+ Section 9.2.2 in ARM Software Development Toolkit Reference Guide)
+
+ r15 PC The Program Counter.
+ r14 LR The Link Register.
+ r13 SP The Stack Pointer.
+ r12 IP The Intra-Procedure-call scratch register.
+ r11 v8/fp Variable-register 8.
+ r10 v7/sl Variable-register 7.
+ r9 v6/SB/TR Platform register. The meaning of this register is
+ defined by the platform standard.
+ r8 v5 Variable-register 5.
+ r7 v4 Variable register 4.
+ r6 v3 Variable register 3.
+ r5 v2 Variable register 2.
+ r4 v1 Variable register 1.
+ r3 a4 Argument / scratch register 4.
+ r2 a3 Argument / scratch register 3.
+ r1 a2 Argument / result / scratch register 2.
+ r0 a1 Argument / result / scratch register 1.
+
+ VFPv2/VFPv3/NEON registers
+ s0-s15/d0-d7/q0-q3 Argument / result/ scratch registers
+ s16-s31/d8-d15/q4-q7 callee-saved registers (must be preserved across
+ subroutine calls)
+
+ VFPv3/NEON registers (added to the VFPv2 registers set)
+ d16-d31/q8-q15 Argument / result/ scratch registers
+ ----------------------------------------------------------------------------- */
+
+#elif defined(MACHREGS_arm)
+
+#define REG(x) __asm__(#x)
+
+#define REG_Base r4
+#define REG_Sp r5
+#define REG_Hp r6
+#define REG_R1 r7
+#define REG_R2 r8
+#define REG_R3 r9
+#define REG_R4 r10
+#define REG_SpLim r11
+
+#if !defined(arm_HOST_ARCH_PRE_ARMv6)
+/* d8 */
+#define REG_F1 s16
+#define REG_F2 s17
+/* d9 */
+#define REG_F3 s18
+#define REG_F4 s19
+
+#define REG_D1 d10
+#define REG_D2 d11
+#endif
+
+/* -----------------------------------------------------------------------------
+ The ARMv8/AArch64 ABI register mapping
+
+ The AArch64 provides 31 64-bit general purpose registers
+ and 32 128-bit SIMD/floating point registers.
+
+ General purpose registers (see Chapter 5.1.1 in ARM IHI 0055B)
+
+ Register | Special | Role in the procedure call standard
+ ---------+---------+------------------------------------
+ SP | | The Stack Pointer
+ r30 | LR | The Link Register
+ r29 | FP | The Frame Pointer
+ r19-r28 | | Callee-saved registers
+ r18 | | The Platform Register, if needed;
+ | | or temporary register
+ r17 | IP1 | The second intra-procedure-call temporary register
+ r16 | IP0 | The first intra-procedure-call scratch register
+ r9-r15 | | Temporary registers
+ r8 | | Indirect result location register
+ r0-r7 | | Parameter/result registers
+
+
+ FPU/SIMD registers
+
+ s/d/q/v0-v7 Argument / result/ scratch registers
+ s/d/q/v8-v15 callee-saved registers (must be preserved across subroutine calls,
+ but only bottom 64-bit value needs to be preserved)
+ s/d/q/v16-v31 temporary registers
+
+ ----------------------------------------------------------------------------- */
+
+#elif defined(MACHREGS_aarch64)
+
+#define REG(x) __asm__(#x)
+
+#define REG_Base r19
+#define REG_Sp r20
+#define REG_Hp r21
+#define REG_R1 r22
+#define REG_R2 r23
+#define REG_R3 r24
+#define REG_R4 r25
+#define REG_R5 r26
+#define REG_R6 r27
+#define REG_SpLim r28
+
+#define REG_F1 s8
+#define REG_F2 s9
+#define REG_F3 s10
+#define REG_F4 s11
+
+#define REG_D1 d12
+#define REG_D2 d13
+#define REG_D3 d14
+#define REG_D4 d15
+
+/* -----------------------------------------------------------------------------
+ The s390x register mapping
+
+ Register | Role(s) | Call effect
+ ------------+-------------------------------------+-----------------
+ r0,r1 | - | caller-saved
+ r2 | Argument / return value | caller-saved
+ r3,r4,r5 | Arguments | caller-saved
+ r6 | Argument | callee-saved
+ r7...r11 | - | callee-saved
+ r12 | (Commonly used as GOT pointer) | callee-saved
+ r13 | (Commonly used as literal pool pointer) | callee-saved
+ r14 | Return address | caller-saved
+ r15 | Stack pointer | callee-saved
+ f0 | Argument / return value | caller-saved
+ f2,f4,f6 | Arguments | caller-saved
+ f1,f3,f5,f7 | - | caller-saved
+ f8...f15 | - | callee-saved
+ v0...v31 | - | caller-saved
+
+ Each general purpose register r0 through r15 as well as each floating-point
+ register f0 through f15 is 64 bits wide. Each vector register v0 through v31
+ is 128 bits wide.
+
+ Note, the vector registers v0 through v15 overlap with the floating-point
+ registers f0 through f15.
+
+ -------------------------------------------------------------------------- */
+
+#elif defined(MACHREGS_s390x)
+
+#define REG(x) __asm__("%" #x)
+
+#define REG_Base r7
+#define REG_Sp r8
+#define REG_Hp r10
+#define REG_R1 r11
+#define REG_R2 r12
+#define REG_R3 r13
+#define REG_R4 r6
+#define REG_R5 r2
+#define REG_R6 r3
+#define REG_R7 r4
+#define REG_R8 r5
+#define REG_SpLim r9
+#define REG_MachSp r15
+
+#define REG_F1 f8
+#define REG_F2 f9
+#define REG_F3 f10
+#define REG_F4 f11
+#define REG_F5 f0
+#define REG_F6 f1
+
+#define REG_D1 f12
+#define REG_D2 f13
+#define REG_D3 f14
+#define REG_D4 f15
+#define REG_D5 f2
+#define REG_D6 f3
+
+#define CALLER_SAVES_R5
+#define CALLER_SAVES_R6
+#define CALLER_SAVES_R7
+#define CALLER_SAVES_R8
+
+#define CALLER_SAVES_F5
+#define CALLER_SAVES_F6
+
+#define CALLER_SAVES_D5
+#define CALLER_SAVES_D6
+
+/* -----------------------------------------------------------------------------
+ The riscv64 register mapping
+
+ Register | Role(s) | Call effect
+ ------------+-----------------------------------------+-------------
+ zero | Hard-wired zero | -
+ ra | Return address | caller-saved
+ sp | Stack pointer | callee-saved
+ gp | Global pointer | callee-saved
+ tp | Thread pointer | callee-saved
+ t0,t1,t2 | - | caller-saved
+ s0 | Frame pointer | callee-saved
+ s1 | - | callee-saved
+ a0,a1 | Arguments / return values | caller-saved
+ a2..a7 | Arguments | caller-saved
+ s2..s11 | - | callee-saved
+ t3..t6 | - | caller-saved
+ ft0..ft7 | - | caller-saved
+ fs0,fs1 | - | callee-saved
+ fa0,fa1 | Arguments / return values | caller-saved
+ fa2..fa7 | Arguments | caller-saved
+ fs2..fs11 | - | callee-saved
+ ft8..ft11 | - | caller-saved
+
+ Each general purpose register as well as each floating-point
+ register is 64 bits wide.
+
+ -------------------------------------------------------------------------- */
+
+#elif defined(MACHREGS_riscv64)
+
+#define REG(x) __asm__(#x)
+
+#define REG_Base s1
+#define REG_Sp s2
+#define REG_Hp s3
+#define REG_R1 s4
+#define REG_R2 s5
+#define REG_R3 s6
+#define REG_R4 s7
+#define REG_R5 s8
+#define REG_R6 s9
+#define REG_R7 s10
+#define REG_SpLim s11
+
+#define REG_F1 fs0
+#define REG_F2 fs1
+#define REG_F3 fs2
+#define REG_F4 fs3
+#define REG_F5 fs4
+#define REG_F6 fs5
+
+#define REG_D1 fs6
+#define REG_D2 fs7
+#define REG_D3 fs8
+#define REG_D4 fs9
+#define REG_D5 fs10
+#define REG_D6 fs11
+
+#define MAX_REAL_FLOAT_REG 6
+#define MAX_REAL_DOUBLE_REG 6
+
+#else
+
+#error Cannot find platform to give register info for
+
+#endif
+
+#else
+
+#error Bad MACHREGS_NO_REGS value
+
+#endif
+
+/* -----------------------------------------------------------------------------
+ * These constants define how many stg registers will be used for
+ * passing arguments (and results, in the case of an unboxed-tuple
+ * return).
+ *
+ * We usually set MAX_REAL_VANILLA_REG and co. to be the number of the
+ * highest STG register to occupy a real machine register, otherwise
+ * the calling conventions will needlessly shuffle data between the
+ * stack and memory-resident STG registers. We might occasionally
+ * set these macros to other values for testing, though.
+ *
+ * Registers above these values might still be used, for instance to
+ * communicate with PrimOps and RTS functions.
+ */
+
+#if !defined(MAX_REAL_VANILLA_REG)
+# if defined(REG_R10)
+# define MAX_REAL_VANILLA_REG 10
+# elif defined(REG_R9)
+# define MAX_REAL_VANILLA_REG 9
+# elif defined(REG_R8)
+# define MAX_REAL_VANILLA_REG 8
+# elif defined(REG_R7)
+# define MAX_REAL_VANILLA_REG 7
+# elif defined(REG_R6)
+# define MAX_REAL_VANILLA_REG 6
+# elif defined(REG_R5)
+# define MAX_REAL_VANILLA_REG 5
+# elif defined(REG_R4)
+# define MAX_REAL_VANILLA_REG 4
+# elif defined(REG_R3)
+# define MAX_REAL_VANILLA_REG 3
+# elif defined(REG_R2)
+# define MAX_REAL_VANILLA_REG 2
+# elif defined(REG_R1)
+# define MAX_REAL_VANILLA_REG 1
+# else
+# define MAX_REAL_VANILLA_REG 0
+# endif
+#endif
+
+#if !defined(MAX_REAL_FLOAT_REG)
+# if defined(REG_F7)
+# error Please manually define MAX_REAL_FLOAT_REG for this architecture
+# elif defined(REG_F6)
+# define MAX_REAL_FLOAT_REG 6
+# elif defined(REG_F5)
+# define MAX_REAL_FLOAT_REG 5
+# elif defined(REG_F4)
+# define MAX_REAL_FLOAT_REG 4
+# elif defined(REG_F3)
+# define MAX_REAL_FLOAT_REG 3
+# elif defined(REG_F2)
+# define MAX_REAL_FLOAT_REG 2
+# elif defined(REG_F1)
+# define MAX_REAL_FLOAT_REG 1
+# else
+# define MAX_REAL_FLOAT_REG 0
+# endif
+#endif
+
+#if !defined(MAX_REAL_DOUBLE_REG)
+# if defined(REG_D7)
+# error Please manually define MAX_REAL_DOUBLE_REG for this architecture
+# elif defined(REG_D6)
+# define MAX_REAL_DOUBLE_REG 6
+# elif defined(REG_D5)
+# define MAX_REAL_DOUBLE_REG 5
+# elif defined(REG_D4)
+# define MAX_REAL_DOUBLE_REG 4
+# elif defined(REG_D3)
+# define MAX_REAL_DOUBLE_REG 3
+# elif defined(REG_D2)
+# define MAX_REAL_DOUBLE_REG 2
+# elif defined(REG_D1)
+# define MAX_REAL_DOUBLE_REG 1
+# else
+# define MAX_REAL_DOUBLE_REG 0
+# endif
+#endif
+
+#if !defined(MAX_REAL_LONG_REG)
+# if defined(REG_L1)
+# define MAX_REAL_LONG_REG 1
+# else
+# define MAX_REAL_LONG_REG 0
+# endif
+#endif
+
+#if !defined(MAX_REAL_XMM_REG)
+# if defined(REG_XMM6)
+# define MAX_REAL_XMM_REG 6
+# elif defined(REG_XMM5)
+# define MAX_REAL_XMM_REG 5
+# elif defined(REG_XMM4)
+# define MAX_REAL_XMM_REG 4
+# elif defined(REG_XMM3)
+# define MAX_REAL_XMM_REG 3
+# elif defined(REG_XMM2)
+# define MAX_REAL_XMM_REG 2
+# elif defined(REG_XMM1)
+# define MAX_REAL_XMM_REG 1
+# else
+# define MAX_REAL_XMM_REG 0
+# endif
+#endif
+
+/* define NO_ARG_REGS if we have no argument registers at all (we can
+ * optimise certain code paths using this predicate).
+ */
+#if MAX_REAL_VANILLA_REG < 2
+#define NO_ARG_REGS
+#else
+#undef NO_ARG_REGS
+#endif
diff --git a/rts/include/stg/MachRegsForHost.h b/rts/include/stg/MachRegsForHost.h
new file mode 100644
index 0000000000..e902d528f6
--- /dev/null
+++ b/rts/include/stg/MachRegsForHost.h
@@ -0,0 +1,80 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2011
+ *
+ * This header includes MachRegs.h "selecting" regs for the current host
+ * platform.
+ *
+ * Don't #include this in the RTS directly, instead include "RTS.h".
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(UnregisterisedCompiler)
+#if !defined(NO_REGS)
+#define NO_REGS
+#endif
+#endif
+
+/*
+ * Defining NO_REGS causes no global registers to be used. NO_REGS is
+ * typically defined by GHC, via a command-line option passed to gcc,
+ * when the -funregisterised flag is given.
+ *
+ * NB. When NO_REGS is on, calling & return conventions may be
+ * different. For example, all function arguments will be passed on
+ * the stack, and components of an unboxed tuple will be returned on
+ * the stack rather than in registers.
+ */
+#if defined(NO_REGS)
+
+#define MACHREGS_NO_REGS 1
+
+#else
+
+#define MACHREGS_NO_REGS 0
+
+#if defined(i386_HOST_ARCH)
+#define MACHREGS_i386 1
+#endif
+
+#if defined(x86_64_HOST_ARCH)
+#define MACHREGS_x86_64 1
+#endif
+
+#if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
+ || defined(powerpc64le_HOST_ARCH) || defined(rs6000_HOST_ARCH)
+#define MACHREGS_powerpc 1
+#endif
+
+#if defined(sparc_HOST_ARCH)
+#define MACHREGS_sparc 1
+#endif
+
+#if defined(arm_HOST_ARCH)
+#define MACHREGS_arm 1
+#endif
+
+#if defined(aarch64_HOST_ARCH)
+#define MACHREGS_aarch64 1
+#endif
+
+#if defined(darwin_HOST_OS)
+#define MACHREGS_darwin 1
+#endif
+
+#if defined(s390x_HOST_ARCH)
+#define MACHREGS_s390x 1
+#endif
+
+#if defined(riscv64_HOST_ARCH)
+#define MACHREGS_riscv64 1
+#endif
+
+#endif
+
+#include "MachRegs.h"
diff --git a/rts/include/stg/MiscClosures.h b/rts/include/stg/MiscClosures.h
new file mode 100644
index 0000000000..c4cb7d45a4
--- /dev/null
+++ b/rts/include/stg/MiscClosures.h
@@ -0,0 +1,639 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2009
+ *
+ * Declarations for various symbols exported by the RTS.
+ *
+ * ToDo: many of the symbols in here don't need to be exported, but
+ * our Cmm code generator doesn't know how to generate local symbols
+ * for the RTS bits (it assumes all RTS symbols are external).
+ *
+ * See wiki:commentary/compiler/backends/ppr-c#prototypes
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * --------------------------------------------------------------------------*/
+
+#pragma once
+
+#if IN_STG_CODE
+# define RTS_RET_INFO(i) extern const W_(i)[]
+# define RTS_FUN_INFO(i) extern const W_(i)[]
+# define RTS_THUNK_INFO(i) extern const W_(i)[]
+# define RTS_INFO(i) extern const W_(i)[]
+# define RTS_CLOSURE(i) extern W_(i)[]
+# define RTS_FUN_DECL(f) extern DLL_IMPORT_RTS StgFunPtr f(void)
+#else
+# define RTS_RET_INFO(i) extern DLL_IMPORT_RTS const StgRetInfoTable i
+# define RTS_FUN_INFO(i) extern DLL_IMPORT_RTS const StgFunInfoTable i
+# define RTS_THUNK_INFO(i) extern DLL_IMPORT_RTS const StgThunkInfoTable i
+# define RTS_INFO(i) extern DLL_IMPORT_RTS const StgInfoTable i
+# define RTS_CLOSURE(i) extern DLL_IMPORT_RTS StgClosure i
+# define RTS_FUN_DECL(f) extern DLL_IMPORT_RTS StgFunPtr f(void)
+#endif
+
+#if defined(TABLES_NEXT_TO_CODE)
+# define RTS_RET(f) RTS_INFO(f##_info)
+# define RTS_ENTRY(f) RTS_INFO(f##_info)
+# define RTS_FUN(f) RTS_FUN_INFO(f##_info)
+# define RTS_THUNK(f) RTS_THUNK_INFO(f##_info)
+#else
+# define RTS_RET(f) RTS_INFO(f##_info); RTS_FUN_DECL(f##_ret)
+# define RTS_ENTRY(f) RTS_INFO(f##_info); RTS_FUN_DECL(f##_entry)
+# define RTS_FUN(f) RTS_FUN_INFO(f##_info); RTS_FUN_DECL(f##_entry)
+# define RTS_THUNK(f) RTS_THUNK_INFO(f##_info); RTS_FUN_DECL(f##_entry)
+#endif
+
+/* Stack frames */
+RTS_RET(stg_upd_frame);
+RTS_RET(stg_bh_upd_frame);
+RTS_RET(stg_marked_upd_frame);
+RTS_RET(stg_noupd_frame);
+RTS_RET(stg_catch_frame);
+RTS_RET(stg_catch_retry_frame);
+RTS_RET(stg_atomically_frame);
+RTS_RET(stg_atomically_waiting_frame);
+RTS_RET(stg_catch_stm_frame);
+RTS_RET(stg_unmaskAsyncExceptionszh_ret);
+RTS_RET(stg_maskUninterruptiblezh_ret);
+RTS_RET(stg_maskAsyncExceptionszh_ret);
+RTS_RET(stg_stack_underflow_frame);
+RTS_RET(stg_restore_cccs);
+RTS_RET(stg_restore_cccs_eval);
+
+// RTS_FUN(stg_interp_constr1_entry);
+// RTS_FUN(stg_interp_constr2_entry);
+// RTS_FUN(stg_interp_constr3_entry);
+// RTS_FUN(stg_interp_constr4_entry);
+// RTS_FUN(stg_interp_constr5_entry);
+// RTS_FUN(stg_interp_constr6_entry);
+// RTS_FUN(stg_interp_constr7_entry);
+//
+// This is referenced using the FFI in the compiler (GHC.ByteCode.InfoTable),
+// so we can't give it the correct type here because the prototypes
+// would clash (FFI references are always declared with type StgWord[]
+// in the generated C code).
+
+/* Magic glue code for when compiled code returns a value in R1/F1/D1
+ or a VoidRep to the interpreter. */
+RTS_RET(stg_ctoi_R1p);
+RTS_RET(stg_ctoi_R1unpt);
+RTS_RET(stg_ctoi_R1n);
+RTS_RET(stg_ctoi_F1);
+RTS_RET(stg_ctoi_D1);
+RTS_RET(stg_ctoi_L1);
+RTS_RET(stg_ctoi_V);
+
+RTS_FUN_DECL(stg_ctoi_t);
+RTS_RET(stg_ctoi_t0);
+RTS_RET(stg_ctoi_t1);
+RTS_RET(stg_ctoi_t2);
+RTS_RET(stg_ctoi_t3);
+RTS_RET(stg_ctoi_t4);
+RTS_RET(stg_ctoi_t5);
+RTS_RET(stg_ctoi_t6);
+RTS_RET(stg_ctoi_t7);
+RTS_RET(stg_ctoi_t8);
+RTS_RET(stg_ctoi_t9);
+
+RTS_RET(stg_ctoi_t10);
+RTS_RET(stg_ctoi_t11);
+RTS_RET(stg_ctoi_t12);
+RTS_RET(stg_ctoi_t13);
+RTS_RET(stg_ctoi_t14);
+RTS_RET(stg_ctoi_t15);
+RTS_RET(stg_ctoi_t16);
+RTS_RET(stg_ctoi_t17);
+RTS_RET(stg_ctoi_t18);
+RTS_RET(stg_ctoi_t19);
+
+RTS_RET(stg_ctoi_t20);
+RTS_RET(stg_ctoi_t21);
+RTS_RET(stg_ctoi_t22);
+RTS_RET(stg_ctoi_t23);
+RTS_RET(stg_ctoi_t24);
+RTS_RET(stg_ctoi_t25);
+RTS_RET(stg_ctoi_t26);
+RTS_RET(stg_ctoi_t27);
+RTS_RET(stg_ctoi_t28);
+RTS_RET(stg_ctoi_t29);
+
+RTS_RET(stg_ctoi_t30);
+RTS_RET(stg_ctoi_t31);
+RTS_RET(stg_ctoi_t32);
+RTS_RET(stg_ctoi_t33);
+RTS_RET(stg_ctoi_t34);
+RTS_RET(stg_ctoi_t35);
+RTS_RET(stg_ctoi_t36);
+RTS_RET(stg_ctoi_t37);
+RTS_RET(stg_ctoi_t38);
+RTS_RET(stg_ctoi_t39);
+
+RTS_RET(stg_ctoi_t40);
+RTS_RET(stg_ctoi_t41);
+RTS_RET(stg_ctoi_t42);
+RTS_RET(stg_ctoi_t43);
+RTS_RET(stg_ctoi_t44);
+RTS_RET(stg_ctoi_t45);
+RTS_RET(stg_ctoi_t46);
+RTS_RET(stg_ctoi_t47);
+RTS_RET(stg_ctoi_t48);
+RTS_RET(stg_ctoi_t49);
+
+RTS_RET(stg_ctoi_t50);
+RTS_RET(stg_ctoi_t51);
+RTS_RET(stg_ctoi_t52);
+RTS_RET(stg_ctoi_t53);
+RTS_RET(stg_ctoi_t54);
+RTS_RET(stg_ctoi_t55);
+RTS_RET(stg_ctoi_t56);
+RTS_RET(stg_ctoi_t57);
+RTS_RET(stg_ctoi_t58);
+RTS_RET(stg_ctoi_t59);
+
+RTS_RET(stg_ctoi_t60);
+RTS_RET(stg_ctoi_t61);
+RTS_RET(stg_ctoi_t62);
+
+RTS_RET(stg_apply_interp);
+
+RTS_ENTRY(stg_IND);
+RTS_ENTRY(stg_IND_STATIC);
+RTS_ENTRY(stg_BLACKHOLE);
+RTS_ENTRY(stg_CAF_BLACKHOLE);
+RTS_ENTRY(__stg_EAGER_BLACKHOLE);
+RTS_ENTRY(stg_WHITEHOLE);
+RTS_ENTRY(stg_BLOCKING_QUEUE_CLEAN);
+RTS_ENTRY(stg_BLOCKING_QUEUE_DIRTY);
+
+RTS_FUN(stg_BCO);
+RTS_ENTRY(stg_EVACUATED);
+RTS_ENTRY(stg_WEAK);
+RTS_ENTRY(stg_DEAD_WEAK);
+RTS_ENTRY(stg_C_FINALIZER_LIST);
+RTS_ENTRY(stg_STABLE_NAME);
+RTS_ENTRY(stg_MVAR_CLEAN);
+RTS_ENTRY(stg_MVAR_DIRTY);
+RTS_ENTRY(stg_TVAR_CLEAN);
+RTS_ENTRY(stg_TVAR_DIRTY);
+RTS_ENTRY(stg_TSO);
+RTS_ENTRY(stg_STACK);
+RTS_ENTRY(stg_RUBBISH_ENTRY);
+RTS_ENTRY(stg_ARR_WORDS);
+RTS_ENTRY(stg_MUT_ARR_WORDS);
+RTS_ENTRY(stg_MUT_ARR_PTRS_CLEAN);
+RTS_ENTRY(stg_MUT_ARR_PTRS_DIRTY);
+RTS_ENTRY(stg_MUT_ARR_PTRS_FROZEN_CLEAN);
+RTS_ENTRY(stg_MUT_ARR_PTRS_FROZEN_DIRTY);
+RTS_ENTRY(stg_SMALL_MUT_ARR_PTRS_CLEAN);
+RTS_ENTRY(stg_SMALL_MUT_ARR_PTRS_DIRTY);
+RTS_ENTRY(stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN);
+RTS_ENTRY(stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY);
+RTS_ENTRY(stg_MUT_VAR_CLEAN);
+RTS_ENTRY(stg_MUT_VAR_DIRTY);
+RTS_ENTRY(stg_END_TSO_QUEUE);
+RTS_ENTRY(stg_GCD_CAF);
+RTS_ENTRY(stg_STM_AWOKEN);
+RTS_ENTRY(stg_MSG_TRY_WAKEUP);
+RTS_ENTRY(stg_MSG_THROWTO);
+RTS_ENTRY(stg_MSG_BLACKHOLE);
+RTS_ENTRY(stg_MSG_NULL);
+RTS_ENTRY(stg_MVAR_TSO_QUEUE);
+RTS_ENTRY(stg_catch);
+RTS_ENTRY(stg_PAP);
+RTS_ENTRY(stg_AP);
+RTS_ENTRY(stg_AP_NOUPD);
+RTS_ENTRY(stg_AP_STACK);
+RTS_ENTRY(stg_AP_STACK_NOUPD);
+RTS_ENTRY(stg_dummy_ret);
+RTS_ENTRY(stg_raise);
+RTS_ENTRY(stg_raise_ret);
+RTS_ENTRY(stg_atomically);
+RTS_ENTRY(stg_TVAR_WATCH_QUEUE);
+RTS_ENTRY(stg_TREC_CHUNK);
+RTS_ENTRY(stg_TREC_HEADER);
+RTS_ENTRY(stg_END_STM_WATCH_QUEUE);
+RTS_ENTRY(stg_END_STM_CHUNK_LIST);
+RTS_ENTRY(stg_NO_TREC);
+RTS_ENTRY(stg_COMPACT_NFDATA_CLEAN);
+RTS_ENTRY(stg_COMPACT_NFDATA_DIRTY);
+RTS_ENTRY(stg_SRT_1);
+RTS_ENTRY(stg_SRT_2);
+RTS_ENTRY(stg_SRT_3);
+RTS_ENTRY(stg_SRT_4);
+RTS_ENTRY(stg_SRT_5);
+RTS_ENTRY(stg_SRT_6);
+RTS_ENTRY(stg_SRT_7);
+RTS_ENTRY(stg_SRT_8);
+RTS_ENTRY(stg_SRT_9);
+RTS_ENTRY(stg_SRT_10);
+RTS_ENTRY(stg_SRT_11);
+RTS_ENTRY(stg_SRT_12);
+RTS_ENTRY(stg_SRT_13);
+RTS_ENTRY(stg_SRT_14);
+RTS_ENTRY(stg_SRT_15);
+RTS_ENTRY(stg_SRT_16);
+
+/* closures */
+
+RTS_CLOSURE(stg_END_TSO_QUEUE_closure);
+RTS_CLOSURE(stg_STM_AWOKEN_closure);
+RTS_CLOSURE(stg_NO_FINALIZER_closure);
+RTS_CLOSURE(stg_dummy_ret_closure);
+RTS_CLOSURE(stg_forceIO_closure);
+
+RTS_CLOSURE(stg_END_STM_WATCH_QUEUE_closure);
+RTS_CLOSURE(stg_END_STM_CHUNK_LIST_closure);
+RTS_CLOSURE(stg_NO_TREC_closure);
+
+RTS_ENTRY(stg_NO_FINALIZER);
+
+#if IN_STG_CODE
+extern DLL_IMPORT_RTS StgWordArray stg_CHARLIKE_closure;
+extern DLL_IMPORT_RTS StgWordArray stg_INTLIKE_closure;
+#else
+extern DLL_IMPORT_RTS StgIntCharlikeClosure stg_CHARLIKE_closure[];
+extern DLL_IMPORT_RTS StgIntCharlikeClosure stg_INTLIKE_closure[];
+#endif
+
+/* StgStartup */
+
+RTS_RET(stg_forceIO);
+RTS_RET(stg_noforceIO);
+
+/* standard entry points */
+
+/* standard selector thunks */
+
+RTS_ENTRY(stg_sel_0_upd);
+RTS_ENTRY(stg_sel_1_upd);
+RTS_ENTRY(stg_sel_2_upd);
+RTS_ENTRY(stg_sel_3_upd);
+RTS_ENTRY(stg_sel_4_upd);
+RTS_ENTRY(stg_sel_5_upd);
+RTS_ENTRY(stg_sel_6_upd);
+RTS_ENTRY(stg_sel_7_upd);
+RTS_ENTRY(stg_sel_8_upd);
+RTS_ENTRY(stg_sel_9_upd);
+RTS_ENTRY(stg_sel_10_upd);
+RTS_ENTRY(stg_sel_11_upd);
+RTS_ENTRY(stg_sel_12_upd);
+RTS_ENTRY(stg_sel_13_upd);
+RTS_ENTRY(stg_sel_14_upd);
+RTS_ENTRY(stg_sel_15_upd);
+
+RTS_ENTRY(stg_sel_0_noupd);
+RTS_ENTRY(stg_sel_1_noupd);
+RTS_ENTRY(stg_sel_2_noupd);
+RTS_ENTRY(stg_sel_3_noupd);
+RTS_ENTRY(stg_sel_4_noupd);
+RTS_ENTRY(stg_sel_5_noupd);
+RTS_ENTRY(stg_sel_6_noupd);
+RTS_ENTRY(stg_sel_7_noupd);
+RTS_ENTRY(stg_sel_8_noupd);
+RTS_ENTRY(stg_sel_9_noupd);
+RTS_ENTRY(stg_sel_10_noupd);
+RTS_ENTRY(stg_sel_11_noupd);
+RTS_ENTRY(stg_sel_12_noupd);
+RTS_ENTRY(stg_sel_13_noupd);
+RTS_ENTRY(stg_sel_14_noupd);
+RTS_ENTRY(stg_sel_15_noupd);
+
+/* standard ap thunks */
+
+RTS_THUNK(stg_ap_1_upd);
+RTS_THUNK(stg_ap_2_upd);
+RTS_THUNK(stg_ap_3_upd);
+RTS_THUNK(stg_ap_4_upd);
+RTS_THUNK(stg_ap_5_upd);
+RTS_THUNK(stg_ap_6_upd);
+RTS_THUNK(stg_ap_7_upd);
+
+/* standard application routines (see also utils/genapply,
+ * and GHC.StgToCmm.ArgRep).
+ */
+RTS_RET(stg_ap_v);
+RTS_RET(stg_ap_f);
+RTS_RET(stg_ap_d);
+RTS_RET(stg_ap_l);
+RTS_RET(stg_ap_v16);
+RTS_RET(stg_ap_v32);
+RTS_RET(stg_ap_v64);
+RTS_RET(stg_ap_n);
+RTS_RET(stg_ap_p);
+RTS_RET(stg_ap_pv);
+RTS_RET(stg_ap_pp);
+RTS_RET(stg_ap_ppv);
+RTS_RET(stg_ap_ppp);
+RTS_RET(stg_ap_pppv);
+RTS_RET(stg_ap_pppp);
+RTS_RET(stg_ap_ppppp);
+RTS_RET(stg_ap_pppppp);
+
+RTS_FUN_DECL(stg_ap_0_fast);
+RTS_FUN_DECL(stg_ap_v_fast);
+RTS_FUN_DECL(stg_ap_f_fast);
+RTS_FUN_DECL(stg_ap_d_fast);
+RTS_FUN_DECL(stg_ap_l_fast);
+RTS_FUN_DECL(stg_ap_v16_fast);
+RTS_FUN_DECL(stg_ap_v32_fast);
+RTS_FUN_DECL(stg_ap_v64_fast);
+RTS_FUN_DECL(stg_ap_n_fast);
+RTS_FUN_DECL(stg_ap_p_fast);
+RTS_FUN_DECL(stg_ap_pv_fast);
+RTS_FUN_DECL(stg_ap_pp_fast);
+RTS_FUN_DECL(stg_ap_ppv_fast);
+RTS_FUN_DECL(stg_ap_ppp_fast);
+RTS_FUN_DECL(stg_ap_pppv_fast);
+RTS_FUN_DECL(stg_ap_pppp_fast);
+RTS_FUN_DECL(stg_ap_ppppp_fast);
+RTS_FUN_DECL(stg_ap_pppppp_fast);
+RTS_FUN_DECL(stg_PAP_apply);
+
+/* standard GC & stack check entry points, all defined in HeapStackCheck.cmm */
+
+RTS_FUN_DECL(stg_gc_noregs);
+
+RTS_RET(stg_ret_v);
+RTS_RET(stg_ret_p);
+RTS_RET(stg_ret_n);
+RTS_RET(stg_ret_f);
+RTS_RET(stg_ret_d);
+RTS_RET(stg_ret_l);
+RTS_RET(stg_ret_t);
+
+RTS_FUN_DECL(stg_gc_prim);
+RTS_FUN_DECL(stg_gc_prim_p);
+RTS_FUN_DECL(stg_gc_prim_pp);
+RTS_FUN_DECL(stg_gc_prim_n);
+
+RTS_RET(stg_gc_prim_p_ll_ret);
+RTS_FUN_DECL(stg_gc_prim_p_ll);
+
+RTS_RET(stg_enter);
+RTS_FUN_DECL(__stg_gc_enter_1);
+
+RTS_FUN_DECL(stg_gc_unpt_r1);
+RTS_FUN_DECL(stg_gc_unbx_r1);
+RTS_FUN_DECL(stg_gc_f1);
+RTS_FUN_DECL(stg_gc_d1);
+RTS_FUN_DECL(stg_gc_l1);
+RTS_FUN_DECL(stg_gc_pp);
+RTS_FUN_DECL(stg_gc_ppp);
+RTS_FUN_DECL(stg_gc_pppp);
+
+RTS_RET(stg_gc_fun);
+RTS_FUN_DECL(__stg_gc_fun);
+
+RTS_FUN_DECL(stg_yield_noregs);
+RTS_FUN_DECL(stg_yield_to_interpreter);
+RTS_FUN_DECL(stg_block_noregs);
+RTS_FUN_DECL(stg_block_blackhole);
+RTS_FUN_DECL(stg_block_blackhole_finally);
+RTS_FUN_DECL(stg_block_takemvar);
+RTS_FUN_DECL(stg_block_readmvar);
+RTS_RET(stg_block_takemvar);
+RTS_RET(stg_block_readmvar);
+RTS_FUN_DECL(stg_block_putmvar);
+RTS_RET(stg_block_putmvar);
+#if defined(mingw32_HOST_OS)
+RTS_FUN_DECL(stg_block_async);
+RTS_RET(stg_block_async);
+RTS_FUN_DECL(stg_block_async_void);
+RTS_RET(stg_block_async_void);
+#endif
+RTS_FUN_DECL(stg_block_stmwait);
+RTS_FUN_DECL(stg_block_throwto);
+RTS_RET(stg_block_throwto);
+
+RTS_FUN_DECL(stg_readIOPortzh);
+RTS_FUN_DECL(stg_writeIOPortzh);
+RTS_FUN_DECL(stg_newIOPortzh);
+
+/* Entry/exit points from StgStartup.cmm */
+
+RTS_RET(stg_stop_thread);
+
+RTS_FUN_DECL(stg_returnToStackTop);
+RTS_FUN_DECL(stg_returnToSched);
+RTS_FUN_DECL(stg_returnToSchedNotPaused);
+RTS_FUN_DECL(stg_returnToSchedButFirst);
+RTS_FUN_DECL(stg_threadFinished);
+
+RTS_FUN_DECL(StgReturn);
+
+/* -----------------------------------------------------------------------------
+ PrimOps
+ -------------------------------------------------------------------------- */
+
+RTS_FUN_DECL(stg_decodeFloatzuIntzh);
+RTS_FUN_DECL(stg_decodeDoublezu2Intzh);
+RTS_FUN_DECL(stg_decodeDoublezuInt64zh);
+
+RTS_FUN_DECL(stg_unsafeThawArrayzh);
+RTS_FUN_DECL(stg_casArrayzh);
+RTS_FUN_DECL(stg_newByteArrayzh);
+RTS_FUN_DECL(stg_newPinnedByteArrayzh);
+RTS_FUN_DECL(stg_newAlignedPinnedByteArrayzh);
+RTS_FUN_DECL(stg_isByteArrayPinnedzh);
+RTS_FUN_DECL(stg_isMutableByteArrayPinnedzh);
+RTS_FUN_DECL(stg_shrinkMutableByteArrayzh);
+RTS_FUN_DECL(stg_resizzeMutableByteArrayzh);
+RTS_FUN_DECL(stg_shrinkSmallMutableArrayzh);
+RTS_FUN_DECL(stg_casIntArrayzh);
+RTS_FUN_DECL(stg_casInt8Arrayzh);
+RTS_FUN_DECL(stg_casInt16Arrayzh);
+RTS_FUN_DECL(stg_casInt32Arrayzh);
+RTS_FUN_DECL(stg_casInt64Arrayzh);
+RTS_FUN_DECL(stg_newArrayzh);
+RTS_FUN_DECL(stg_newArrayArrayzh);
+RTS_FUN_DECL(stg_copyArrayzh);
+RTS_FUN_DECL(stg_copyMutableArrayzh);
+RTS_FUN_DECL(stg_copyArrayArrayzh);
+RTS_FUN_DECL(stg_copyMutableArrayArrayzh);
+RTS_FUN_DECL(stg_cloneArrayzh);
+RTS_FUN_DECL(stg_cloneMutableArrayzh);
+RTS_FUN_DECL(stg_freezzeArrayzh);
+RTS_FUN_DECL(stg_thawArrayzh);
+
+RTS_FUN_DECL(stg_newSmallArrayzh);
+RTS_FUN_DECL(stg_unsafeThawSmallArrayzh);
+RTS_FUN_DECL(stg_cloneSmallArrayzh);
+RTS_FUN_DECL(stg_cloneSmallMutableArrayzh);
+RTS_FUN_DECL(stg_freezzeSmallArrayzh);
+RTS_FUN_DECL(stg_thawSmallArrayzh);
+RTS_FUN_DECL(stg_copySmallArrayzh);
+RTS_FUN_DECL(stg_copySmallMutableArrayzh);
+RTS_FUN_DECL(stg_casSmallArrayzh);
+
+RTS_FUN_DECL(stg_newMutVarzh);
+RTS_FUN_DECL(stg_atomicModifyMutVar2zh);
+RTS_FUN_DECL(stg_atomicModifyMutVarzuzh);
+RTS_FUN_DECL(stg_casMutVarzh);
+
+RTS_FUN_DECL(stg_isEmptyMVarzh);
+RTS_FUN_DECL(stg_newMVarzh);
+RTS_FUN_DECL(stg_takeMVarzh);
+RTS_FUN_DECL(stg_putMVarzh);
+RTS_FUN_DECL(stg_readMVarzh);
+RTS_FUN_DECL(stg_tryTakeMVarzh);
+RTS_FUN_DECL(stg_tryPutMVarzh);
+RTS_FUN_DECL(stg_tryReadMVarzh);
+
+RTS_FUN_DECL(stg_waitReadzh);
+RTS_FUN_DECL(stg_waitWritezh);
+RTS_FUN_DECL(stg_delayzh);
+#if defined(mingw32_HOST_OS)
+RTS_FUN_DECL(stg_asyncReadzh);
+RTS_FUN_DECL(stg_asyncWritezh);
+RTS_FUN_DECL(stg_asyncDoProczh);
+#endif
+
+RTS_FUN_DECL(stg_catchzh);
+RTS_FUN_DECL(stg_raisezh);
+RTS_FUN_DECL(stg_raiseDivZZerozh);
+RTS_FUN_DECL(stg_raiseUnderflowzh);
+RTS_FUN_DECL(stg_raiseOverflowzh);
+RTS_FUN_DECL(stg_raiseIOzh);
+RTS_FUN_DECL(stg_paniczh);
+RTS_FUN_DECL(stg_absentErrorzh);
+
+RTS_FUN_DECL(stg_makeStableNamezh);
+RTS_FUN_DECL(stg_makeStablePtrzh);
+RTS_FUN_DECL(stg_deRefStablePtrzh);
+
+RTS_FUN_DECL(stg_compactAddzh);
+RTS_FUN_DECL(stg_compactAddWithSharingzh);
+RTS_FUN_DECL(stg_compactNewzh);
+RTS_FUN_DECL(stg_compactAppendzh);
+RTS_FUN_DECL(stg_compactResizzezh);
+RTS_FUN_DECL(stg_compactGetRootzh);
+RTS_FUN_DECL(stg_compactContainszh);
+RTS_FUN_DECL(stg_compactContainsAnyzh);
+RTS_FUN_DECL(stg_compactGetFirstBlockzh);
+RTS_FUN_DECL(stg_compactGetNextBlockzh);
+RTS_FUN_DECL(stg_compactAllocateBlockzh);
+RTS_FUN_DECL(stg_compactFixupPointerszh);
+RTS_FUN_DECL(stg_compactSizzezh);
+
+RTS_FUN_DECL(stg_forkzh);
+RTS_FUN_DECL(stg_forkOnzh);
+RTS_FUN_DECL(stg_yieldzh);
+RTS_FUN_DECL(stg_killMyself);
+RTS_FUN_DECL(stg_killThreadzh);
+RTS_FUN_DECL(stg_getMaskingStatezh);
+RTS_FUN_DECL(stg_maskAsyncExceptionszh);
+RTS_FUN_DECL(stg_maskUninterruptiblezh);
+RTS_FUN_DECL(stg_unmaskAsyncExceptionszh);
+RTS_FUN_DECL(stg_myThreadIdzh);
+RTS_FUN_DECL(stg_labelThreadzh);
+RTS_FUN_DECL(stg_isCurrentThreadBoundzh);
+RTS_FUN_DECL(stg_threadStatuszh);
+
+RTS_FUN_DECL(stg_mkWeakzh);
+RTS_FUN_DECL(stg_mkWeakNoFinalizzerzh);
+RTS_FUN_DECL(stg_mkWeakForeignzh);
+RTS_FUN_DECL(stg_addCFinalizzerToWeakzh);
+RTS_FUN_DECL(stg_finalizzeWeakzh);
+RTS_FUN_DECL(stg_deRefWeakzh);
+
+RTS_FUN_DECL(stg_runRWzh);
+
+RTS_FUN_DECL(stg_newBCOzh);
+RTS_FUN_DECL(stg_mkApUpd0zh);
+
+RTS_FUN_DECL(stg_retryzh);
+RTS_FUN_DECL(stg_catchRetryzh);
+RTS_FUN_DECL(stg_catchSTMzh);
+RTS_FUN_DECL(stg_atomicallyzh);
+RTS_FUN_DECL(stg_newTVarzh);
+RTS_FUN_DECL(stg_readTVarzh);
+RTS_FUN_DECL(stg_readTVarIOzh);
+RTS_FUN_DECL(stg_writeTVarzh);
+
+RTS_FUN_DECL(stg_unpackClosurezh);
+RTS_FUN_DECL(stg_closureSizzezh);
+RTS_FUN_DECL(stg_whereFromzh);
+RTS_FUN_DECL(stg_getApStackValzh);
+RTS_FUN_DECL(stg_getSparkzh);
+RTS_FUN_DECL(stg_numSparkszh);
+
+RTS_FUN_DECL(stg_noDuplicatezh);
+
+RTS_FUN_DECL(stg_traceCcszh);
+RTS_FUN_DECL(stg_clearCCSzh);
+RTS_FUN_DECL(stg_traceEventzh);
+RTS_FUN_DECL(stg_traceBinaryEventzh);
+RTS_FUN_DECL(stg_traceMarkerzh);
+RTS_FUN_DECL(stg_getThreadAllocationCounterzh);
+RTS_FUN_DECL(stg_setThreadAllocationCounterzh);
+
+
+/* Other misc stuff */
+// See wiki:commentary/compiler/backends/ppr-c#prototypes
+
+#if IN_STG_CODE && !IN_STGCRUN
+
+// Interpreter.c
+extern StgWord rts_stop_next_breakpoint[];
+extern StgWord rts_stop_on_exception[];
+extern StgWord rts_breakpoint_io_action[];
+
+// Schedule.c
+extern StgWord RTS_VAR(blocked_queue_hd), RTS_VAR(blocked_queue_tl);
+extern StgWord RTS_VAR(sleeping_queue);
+extern StgWord RTS_VAR(sched_mutex);
+
+// Apply.cmm
+// canned bitmap for each arg type
+extern const StgWord stg_arg_bitmaps[];
+extern const StgWord stg_ap_stack_entries[];
+extern const StgWord stg_stack_save_entries[];
+
+// Storage.c
+extern unsigned int RTS_VAR(g0);
+extern unsigned int RTS_VAR(large_alloc_lim);
+extern StgWord RTS_VAR(atomic_modify_mutvar_mutex);
+
+// RtsFlags
+extern StgWord RTS_VAR(RtsFlags); // bogus type
+
+// StablePtr.c
+extern StgWord RTS_VAR(stable_ptr_table);
+
+// StableName.c
+extern StgWord RTS_VAR(stable_name_table);
+
+// Profiling.c
+extern unsigned int RTS_VAR(era);
+extern unsigned int RTS_VAR(entering_PAP);
+extern StgWord CCS_OVERHEAD[];
+extern StgWord CCS_SYSTEM[];
+
+// Calls to these rts functions are generated directly
+// by codegen (see GHC.StgToCmm.Prof)
+// and don't require (don't emit) forward declarations.
+//
+// In unregisterised mode (when building via .hc files)
+// the calls are ordinary C calls. Functions must be in
+// scope and must match prototype assumed by
+// 'GHC.StgToCmm.Prof'
+// as opposed to real prototype declared in
+// 'rts/include/rts/prof/CCS.h'
+void enterFunCCS (void *reg, void *ccsfn);
+void * pushCostCentre (void *ccs, void *cc);
+
+// Capability.c
+extern unsigned int n_capabilities;
+
+/* -----------------------------------------------------------------------------
+ Nonmoving GC write barrier
+ -------------------------------------------------------------------------- */
+
+#include <rts/NonMoving.h>
+
+
+#endif
diff --git a/rts/include/stg/Prim.h b/rts/include/stg/Prim.h
new file mode 100644
index 0000000000..20b3d45bee
--- /dev/null
+++ b/rts/include/stg/Prim.h
@@ -0,0 +1,139 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2014-2014
+ *
+ * Declarations for C fallback primitives implemented by 'ghc-prim' package.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/* libraries/ghc-prim/cbits/atomic.c */
+StgWord hs_atomic_add8(StgWord x, StgWord val);
+StgWord hs_atomic_add16(StgWord x, StgWord val);
+StgWord hs_atomic_add32(StgWord x, StgWord val);
+StgWord64 hs_atomic_add64(StgWord x, StgWord64 val);
+StgWord hs_atomic_sub8(StgWord x, StgWord val);
+StgWord hs_atomic_sub16(StgWord x, StgWord val);
+StgWord hs_atomic_sub32(StgWord x, StgWord val);
+StgWord64 hs_atomic_sub64(StgWord x, StgWord64 val);
+StgWord hs_atomic_and8(StgWord x, StgWord val);
+StgWord hs_atomic_and16(StgWord x, StgWord val);
+StgWord hs_atomic_and32(StgWord x, StgWord val);
+StgWord64 hs_atomic_and64(StgWord x, StgWord64 val);
+StgWord hs_atomic_nand8(StgWord x, StgWord val);
+StgWord hs_atomic_nand16(StgWord x, StgWord val);
+StgWord hs_atomic_nand32(StgWord x, StgWord val);
+StgWord64 hs_atomic_nand64(StgWord x, StgWord64 val);
+StgWord hs_atomic_or8(StgWord x, StgWord val);
+StgWord hs_atomic_or16(StgWord x, StgWord val);
+StgWord hs_atomic_or32(StgWord x, StgWord val);
+StgWord64 hs_atomic_or64(StgWord x, StgWord64 val);
+StgWord hs_atomic_xor8(StgWord x, StgWord val);
+StgWord hs_atomic_xor16(StgWord x, StgWord val);
+StgWord hs_atomic_xor32(StgWord x, StgWord val);
+StgWord64 hs_atomic_xor64(StgWord x, StgWord64 val);
+StgWord hs_cmpxchg8(StgWord x, StgWord old, StgWord new_);
+StgWord hs_cmpxchg16(StgWord x, StgWord old, StgWord new_);
+StgWord hs_cmpxchg32(StgWord x, StgWord old, StgWord new_);
+StgWord hs_cmpxchg64(StgWord x, StgWord64 old, StgWord64 new_);
+StgWord hs_atomicread8(StgWord x);
+StgWord hs_atomicread16(StgWord x);
+StgWord hs_atomicread32(StgWord x);
+StgWord64 hs_atomicread64(StgWord x);
+void hs_atomicwrite8(StgWord x, StgWord val);
+void hs_atomicwrite16(StgWord x, StgWord val);
+void hs_atomicwrite32(StgWord x, StgWord val);
+void hs_atomicwrite64(StgWord x, StgWord64 val);
+StgWord hs_xchg8(StgWord x, StgWord val);
+StgWord hs_xchg16(StgWord x, StgWord val);
+StgWord hs_xchg32(StgWord x, StgWord val);
+StgWord hs_xchg64(StgWord x, StgWord val);
+
+/* libraries/ghc-prim/cbits/bswap.c */
+StgWord16 hs_bswap16(StgWord16 x);
+StgWord32 hs_bswap32(StgWord32 x);
+StgWord64 hs_bswap64(StgWord64 x);
+
+/* libraries/ghc-prim/cbits/bitrev.c
+This was done as part of issue #16164.
+See Note [Bit reversal primop] for more details about the implementation.*/
+StgWord hs_bitrev8(StgWord x);
+StgWord16 hs_bitrev16(StgWord16 x);
+StgWord32 hs_bitrev32(StgWord32 x);
+StgWord64 hs_bitrev64(StgWord64 x);
+
+/* libraries/ghc-prim/cbits/longlong.c */
+#if WORD_SIZE_IN_BITS < 64
+StgInt hs_eq64 (StgWord64 a, StgWord64 b);
+StgInt hs_ne64 (StgWord64 a, StgWord64 b);
+StgInt hs_gtWord64 (StgWord64 a, StgWord64 b);
+StgInt hs_geWord64 (StgWord64 a, StgWord64 b);
+StgInt hs_ltWord64 (StgWord64 a, StgWord64 b);
+StgInt hs_leWord64 (StgWord64 a, StgWord64 b);
+StgInt hs_gtInt64 (StgInt64 a, StgInt64 b);
+StgInt hs_geInt64 (StgInt64 a, StgInt64 b);
+StgInt hs_ltInt64 (StgInt64 a, StgInt64 b);
+StgInt hs_leInt64 (StgInt64 a, StgInt64 b);
+StgInt64 hs_neg64 (StgInt64 a);
+StgWord64 hs_add64 (StgWord64 a, StgWord64 b);
+StgWord64 hs_sub64 (StgWord64 a, StgWord64 b);
+StgWord64 hs_mul64 (StgWord64 a, StgWord64 b);
+StgWord64 hs_remWord64 (StgWord64 a, StgWord64 b);
+StgWord64 hs_quotWord64 (StgWord64 a, StgWord64 b);
+StgInt64 hs_remInt64 (StgInt64 a, StgInt64 b);
+StgInt64 hs_quotInt64 (StgInt64 a, StgInt64 b);
+StgWord64 hs_and64 (StgWord64 a, StgWord64 b);
+StgWord64 hs_or64 (StgWord64 a, StgWord64 b);
+StgWord64 hs_xor64 (StgWord64 a, StgWord64 b);
+StgWord64 hs_not64 (StgWord64 a);
+StgWord64 hs_uncheckedShiftL64 (StgWord64 a, StgInt b);
+StgWord64 hs_uncheckedShiftRL64 (StgWord64 a, StgInt b);
+StgInt64 hs_uncheckedIShiftRA64 (StgInt64 a, StgInt b);
+StgInt64 hs_intToInt64 (StgInt i);
+StgInt hs_int64ToInt (StgInt64 i);
+StgWord64 hs_int64ToWord64 (StgInt64 i);
+StgWord64 hs_wordToWord64 (StgWord w);
+StgWord hs_word64ToWord (StgWord64 w);
+StgInt64 hs_word64ToInt64 (StgWord64 w);
+#endif
+
+/* libraries/ghc-prim/cbits/pdep.c */
+StgWord64 hs_pdep64(StgWord64 src, StgWord64 mask);
+StgWord hs_pdep32(StgWord src, StgWord mask);
+StgWord hs_pdep16(StgWord src, StgWord mask);
+StgWord hs_pdep8(StgWord src, StgWord mask);
+
+/* libraries/ghc-prim/cbits/pext.c */
+StgWord64 hs_pext64(StgWord64 src, StgWord64 mask);
+StgWord hs_pext32(StgWord src, StgWord mask);
+StgWord hs_pext16(StgWord src, StgWord mask);
+StgWord hs_pext8(StgWord src, StgWord mask);
+
+/* libraries/ghc-prim/cbits/popcnt.c */
+StgWord hs_popcnt8(StgWord x);
+StgWord hs_popcnt16(StgWord x);
+StgWord hs_popcnt32(StgWord x);
+StgWord hs_popcnt64(StgWord64 x);
+StgWord hs_popcnt(StgWord x);
+
+/* libraries/ghc-prim/cbits/word2float.c */
+StgFloat hs_word2float32(StgWord x);
+StgDouble hs_word2float64(StgWord x);
+
+/* libraries/ghc-prim/cbits/clz.c */
+StgWord hs_clz8(StgWord x);
+StgWord hs_clz16(StgWord x);
+StgWord hs_clz32(StgWord x);
+StgWord hs_clz64(StgWord64 x);
+
+/* libraries/ghc-prim/cbits/ctz.c */
+StgWord hs_ctz8(StgWord x);
+StgWord hs_ctz16(StgWord x);
+StgWord hs_ctz32(StgWord x);
+StgWord hs_ctz64(StgWord64 x);
diff --git a/rts/include/stg/Regs.h b/rts/include/stg/Regs.h
new file mode 100644
index 0000000000..e37ce3eba4
--- /dev/null
+++ b/rts/include/stg/Regs.h
@@ -0,0 +1,528 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2012
+ *
+ * Registers in the STG machine.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+/*
+ * The STG machine has a collection of "registers", each one of which
+ * may or may not correspond to an actual machine register when
+ * running code.
+ *
+ * The register set is backed by a table in memory (struct
+ * StgRegTable). If a particular STG register is not mapped to a
+ * machine register, then the appropriate slot in this table is used
+ * instead.
+ *
+ * This table is itself pointed to by another register, BaseReg. If
+ * BaseReg is not in a machine register, then the register table is
+ * used from an absolute location (MainCapability).
+ *
+ */
+
+typedef struct {
+ StgWord stgEagerBlackholeInfo;
+ StgFunPtr stgGCEnter1;
+ StgFunPtr stgGCFun;
+} StgFunTable;
+
+/*
+ * Vanilla registers are given this union type, which is purely so
+ * that we can cast the vanilla reg to a variety of types with the
+ * minimum of syntax. eg. R1.w instead of (StgWord)R1.
+ */
+typedef union {
+ StgWord w;
+ StgAddr a;
+ StgChar c;
+ StgFloat f;
+ StgInt i;
+ StgPtr p;
+} StgUnion;
+
+/*
+ * This is the table that holds shadow-locations for all the STG
+ * registers. The shadow locations are used when:
+ *
+ * 1) the particular register isn't mapped to a real machine
+ * register, probably because there's a shortage of real registers.
+ * 2) caller-saves registers are saved across a CCall
+ */
+typedef struct {
+ StgUnion rR1;
+ StgUnion rR2;
+ StgUnion rR3;
+ StgUnion rR4;
+ StgUnion rR5;
+ StgUnion rR6;
+ StgUnion rR7;
+ StgUnion rR8;
+ StgUnion rR9; /* used occasionally by heap/stack checks */
+ StgUnion rR10; /* used occasionally by heap/stack checks */
+ StgFloat rF1;
+ StgFloat rF2;
+ StgFloat rF3;
+ StgFloat rF4;
+ StgFloat rF5;
+ StgFloat rF6;
+ StgDouble rD1;
+ StgDouble rD2;
+ StgDouble rD3;
+ StgDouble rD4;
+ StgDouble rD5;
+ StgDouble rD6;
+ StgWord128 rXMM1;
+ StgWord128 rXMM2;
+ StgWord128 rXMM3;
+ StgWord128 rXMM4;
+ StgWord128 rXMM5;
+ StgWord128 rXMM6;
+ StgWord256 rYMM1;
+ StgWord256 rYMM2;
+ StgWord256 rYMM3;
+ StgWord256 rYMM4;
+ StgWord256 rYMM5;
+ StgWord256 rYMM6;
+ StgWord512 rZMM1;
+ StgWord512 rZMM2;
+ StgWord512 rZMM3;
+ StgWord512 rZMM4;
+ StgWord512 rZMM5;
+ StgWord512 rZMM6;
+ StgWord64 rL1;
+ StgPtr rSp;
+ StgPtr rSpLim;
+ StgPtr rHp;
+ StgPtr rHpLim;
+ struct CostCentreStack_ * rCCCS; /* current cost-centre-stack */
+ struct StgTSO_ * rCurrentTSO;
+ struct nursery_ * rNursery;
+ struct bdescr_ * rCurrentNursery; /* Hp/HpLim point into this block */
+ struct bdescr_ * rCurrentAlloc; /* for allocation using allocate() */
+ StgWord rHpAlloc; /* number of *bytes* being allocated in heap */
+ StgWord rRet; /* holds the return code of the thread */
+} StgRegTable;
+
+#if IN_STG_CODE
+
+/*
+ * Registers Hp and HpLim are global across the entire system, and are
+ * copied into the RegTable or registers before executing a thread.
+ *
+ * Registers Sp and SpLim are saved in the TSO for the thread, but are
+ * copied into the RegTable or registers before executing a thread.
+ *
+ * All other registers are "general purpose", and are used for passing
+ * arguments to functions, and returning values. The code generator
+ * knows how many of these are in real registers, and avoids
+ * generating code that uses non-real registers. General purpose
+ * registers are never saved when returning to the scheduler, instead
+ * we save whatever is live at the time on the stack, and restore it
+ * later. This should reduce the context switch time, amongst other
+ * things.
+ *
+ * For argument passing, the stack will be used in preference to
+ * pseudo-registers if the architecture has too few general purpose
+ * registers.
+ *
+ * Some special RTS functions like newArray and the Integer primitives
+ * expect their arguments to be in registers R1-Rn, so we use these
+ * (pseudo-)registers in those cases.
+ */
+
+/* -----------------------------------------------------------------------------
+ * Emit the GCC-specific register declarations for each machine
+ * register being used. If any STG register isn't mapped to a machine
+ * register, then map it to an offset from BaseReg.
+ *
+ * First, the general purpose registers. The idea is, if a particular
+ * general-purpose STG register can't be mapped to a real machine
+ * register, it won't be used at all. Instead, we'll use the stack.
+ */
+
+/* define NO_REGS to omit register declarations - used in RTS C code
+ * that needs all the STG definitions but not the global register
+ * settings.
+ */
+#define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg);
+
+#if defined(REG_R1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R1,REG_R1)
+#else
+# define R1 (BaseReg->rR1)
+#endif
+
+#if defined(REG_R2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R2,REG_R2)
+#else
+# define R2 (BaseReg->rR2)
+#endif
+
+#if defined(REG_R3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R3,REG_R3)
+#else
+# define R3 (BaseReg->rR3)
+#endif
+
+#if defined(REG_R4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R4,REG_R4)
+#else
+# define R4 (BaseReg->rR4)
+#endif
+
+#if defined(REG_R5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R5,REG_R5)
+#else
+# define R5 (BaseReg->rR5)
+#endif
+
+#if defined(REG_R6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R6,REG_R6)
+#else
+# define R6 (BaseReg->rR6)
+#endif
+
+#if defined(REG_R7) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R7,REG_R7)
+#else
+# define R7 (BaseReg->rR7)
+#endif
+
+#if defined(REG_R8) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R8,REG_R8)
+#else
+# define R8 (BaseReg->rR8)
+#endif
+
+#if defined(REG_R9) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R9,REG_R9)
+#else
+# define R9 (BaseReg->rR9)
+#endif
+
+#if defined(REG_R10) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgUnion,R10,REG_R10)
+#else
+# define R10 (BaseReg->rR10)
+#endif
+
+#if defined(REG_F1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgFloat,F1,REG_F1)
+#else
+#define F1 (BaseReg->rF1)
+#endif
+
+#if defined(REG_F2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgFloat,F2,REG_F2)
+#else
+#define F2 (BaseReg->rF2)
+#endif
+
+#if defined(REG_F3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgFloat,F3,REG_F3)
+#else
+#define F3 (BaseReg->rF3)
+#endif
+
+#if defined(REG_F4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgFloat,F4,REG_F4)
+#else
+#define F4 (BaseReg->rF4)
+#endif
+
+#if defined(REG_F5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgFloat,F5,REG_F5)
+#else
+#define F5 (BaseReg->rF5)
+#endif
+
+#if defined(REG_F6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgFloat,F6,REG_F6)
+#else
+#define F6 (BaseReg->rF6)
+#endif
+
+#if defined(REG_D1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgDouble,D1,REG_D1)
+#else
+#define D1 (BaseReg->rD1)
+#endif
+
+#if defined(REG_D2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgDouble,D2,REG_D2)
+#else
+#define D2 (BaseReg->rD2)
+#endif
+
+#if defined(REG_D3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgDouble,D3,REG_D3)
+#else
+#define D3 (BaseReg->rD3)
+#endif
+
+#if defined(REG_D4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgDouble,D4,REG_D4)
+#else
+#define D4 (BaseReg->rD4)
+#endif
+
+#if defined(REG_D5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgDouble,D5,REG_D5)
+#else
+#define D5 (BaseReg->rD5)
+#endif
+
+#if defined(REG_D6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgDouble,D6,REG_D6)
+#else
+#define D6 (BaseReg->rD6)
+#endif
+
+#if defined(REG_XMM1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord128,XMM1,REG_XMM1)
+#else
+#define XMM1 (BaseReg->rXMM1)
+#endif
+
+#if defined(REG_XMM2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord128,XMM2,REG_XMM2)
+#else
+#define XMM2 (BaseReg->rXMM2)
+#endif
+
+#if defined(REG_XMM3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord128,XMM3,REG_XMM3)
+#else
+#define XMM3 (BaseReg->rXMM3)
+#endif
+
+#if defined(REG_XMM4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord128,XMM4,REG_XMM4)
+#else
+#define XMM4 (BaseReg->rXMM4)
+#endif
+
+#if defined(REG_XMM5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord128,XMM5,REG_XMM5)
+#else
+#define XMM5 (BaseReg->rXMM5)
+#endif
+
+#if defined(REG_XMM6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord128,XMM6,REG_XMM6)
+#else
+#define XMM6 (BaseReg->rXMM6)
+#endif
+
+#if defined(REG_YMM1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM1,REG_YMM1)
+#else
+#define YMM1 (BaseReg->rYMM1)
+#endif
+
+#if defined(REG_YMM2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM2,REG_YMM2)
+#else
+#define YMM2 (BaseReg->rYMM2)
+#endif
+
+#if defined(REG_YMM3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM3,REG_YMM3)
+#else
+#define YMM3 (BaseReg->rYMM3)
+#endif
+
+#if defined(REG_YMM4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM4,REG_YMM4)
+#else
+#define YMM4 (BaseReg->rYMM4)
+#endif
+
+#if defined(REG_YMM5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM5,REG_YMM5)
+#else
+#define YMM5 (BaseReg->rYMM5)
+#endif
+
+#if defined(REG_YMM6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord256,YMM6,REG_YMM6)
+#else
+#define YMM6 (BaseReg->rYMM6)
+#endif
+
+#if defined(REG_ZMM1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM1,REG_ZMM1)
+#else
+#define ZMM1 (BaseReg->rZMM1)
+#endif
+
+#if defined(REG_ZMM2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM2,REG_ZMM2)
+#else
+#define ZMM2 (BaseReg->rZMM2)
+#endif
+
+#if defined(REG_ZMM3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM3,REG_ZMM3)
+#else
+#define ZMM3 (BaseReg->rZMM3)
+#endif
+
+#if defined(REG_ZMM4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM4,REG_ZMM4)
+#else
+#define ZMM4 (BaseReg->rZMM4)
+#endif
+
+#if defined(REG_ZMM5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM5,REG_ZMM5)
+#else
+#define ZMM5 (BaseReg->rZMM5)
+#endif
+
+#if defined(REG_ZMM6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord512,ZMM6,REG_ZMM6)
+#else
+#define ZMM6 (BaseReg->rZMM6)
+#endif
+
+#if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord64,L1,REG_L1)
+#else
+#define L1 (BaseReg->rL1)
+#endif
+
+/*
+ * If BaseReg isn't mapped to a machine register, just use the global
+ * address of the current register table (CurrentRegTable in
+ * concurrent Haskell, MainRegTable otherwise).
+ */
+
+/* A capability is a combination of a FunTable and a RegTable. In STG
+ * code, BaseReg normally points to the RegTable portion of this
+ * structure, so that we can index both forwards and backwards to take
+ * advantage of shorter instruction forms on some archs (eg. x86).
+ * This is a cut-down version of the Capability structure; the full
+ * version is defined in Capability.h.
+ */
+struct PartCapability_ {
+ StgFunTable f;
+ StgRegTable r;
+};
+
+/* No such thing as a MainCapability under THREADED_RTS - each thread must have
+ * its own Capability.
+ */
+#if IN_STG_CODE && !(defined(THREADED_RTS) && !defined(NOSMP))
+extern W_ MainCapability[];
+#endif
+
+/*
+ * Assigning to BaseReg (the ASSIGN_BaseReg macro): this happens on
+ * return from a "safe" foreign call, when the thread might be running
+ * on a new Capability. Obviously if BaseReg is not a register, then
+ * we are restricted to a single Capability (this invariant is enforced
+ * in Capability.c:initCapabilities), and assigning to BaseReg can be omitted.
+ */
+
+#if defined(REG_Base) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgRegTable *,BaseReg,REG_Base)
+#define ASSIGN_BaseReg(e) (BaseReg = (e))
+#else
+#if defined(THREADED_RTS) && !defined(NOSMP)
+#error BaseReg must be in a register for THREADED_RTS
+#endif
+#define BaseReg (&((struct PartCapability_ *)MainCapability)->r)
+#define ASSIGN_BaseReg(e) (e)
+#endif
+
+#if defined(REG_Sp) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(P_,Sp,REG_Sp)
+#else
+#define Sp (BaseReg->rSp)
+#endif
+
+#if defined(REG_SpLim) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(P_,SpLim,REG_SpLim)
+#else
+#define SpLim (BaseReg->rSpLim)
+#endif
+
+#if defined(REG_Hp) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(P_,Hp,REG_Hp)
+#else
+#define Hp (BaseReg->rHp)
+#endif
+
+#if defined(REG_HpLim) && !defined(NO_GLOBAL_REG_DECLS)
+#error HpLim cannot be in a register
+#else
+#define HpLim (BaseReg->rHpLim)
+#endif
+
+#if defined(REG_CCCS) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(struct CostCentreStack_ *,CCCS,REG_CCCS)
+#else
+#define CCCS (BaseReg->rCCCS)
+#endif
+
+#if defined(REG_CurrentTSO) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(struct _StgTSO *,CurrentTSO,REG_CurrentTSO)
+#else
+#define CurrentTSO (BaseReg->rCurrentTSO)
+#endif
+
+#if defined(REG_CurrentNursery) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(bdescr *,CurrentNursery,REG_CurrentNursery)
+#else
+#define CurrentNursery (BaseReg->rCurrentNursery)
+#endif
+
+#if defined(REG_HpAlloc) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc)
+#else
+#define HpAlloc (BaseReg->rHpAlloc)
+#endif
+
+/* -----------------------------------------------------------------------------
+ Get absolute function pointers from the register table, to save
+ code space. On x86,
+
+ jmp *-12(%ebx)
+
+ is shorter than
+
+ jmp absolute_address
+
+ as long as the offset is within the range of a signed byte
+ (-128..+127). So we pick some common absolute_addresses and put
+ them in the register table. As a bonus, linking time should also
+ be reduced.
+
+ Other possible candidates in order of importance:
+
+ stg_upd_frame_info
+ stg_CAF_BLACKHOLE_info
+ stg_IND_STATIC_info
+
+ anything else probably isn't worth the effort.
+
+ -------------------------------------------------------------------------- */
+
+
+#define FunReg ((StgFunTable *)((void *)BaseReg - STG_FIELD_OFFSET(struct PartCapability_, r)))
+
+#define stg_EAGER_BLACKHOLE_info (FunReg->stgEagerBlackholeInfo)
+#define stg_gc_enter_1 (FunReg->stgGCEnter1)
+#define stg_gc_fun (FunReg->stgGCFun)
+
+#endif /* IN_STG_CODE */
diff --git a/rts/include/stg/SMP.h b/rts/include/stg/SMP.h
new file mode 100644
index 0000000000..de706da290
--- /dev/null
+++ b/rts/include/stg/SMP.h
@@ -0,0 +1,579 @@
+/* ----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2005-2011
+ *
+ * Macros for multi-CPU support
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+#if defined(arm_HOST_ARCH) && defined(arm_HOST_ARCH_PRE_ARMv6)
+void arm_atomic_spin_lock(void);
+void arm_atomic_spin_unlock(void);
+#endif
+
+#if defined(THREADED_RTS)
+
+/* ----------------------------------------------------------------------------
+ Atomic operations
+ ------------------------------------------------------------------------- */
+
+#if !IN_STG_CODE || IN_STGCRUN
+// We only want the barriers, e.g. write_barrier(), declared in .hc
+// files. Defining the other inline functions here causes type
+// mismatch errors from gcc, because the generated C code is assuming
+// that there are no prototypes in scope.
+
+/*
+ * The atomic exchange operation: xchg(p,w) exchanges the value
+ * pointed to by p with the value w, returning the old value.
+ *
+ * Used for locking closures during updates (see lockClosure()
+ * in rts/include/rts/storage/SMPClosureOps.h) and the MVar primops.
+ */
+EXTERN_INLINE StgWord xchg(StgPtr p, StgWord w);
+
+/*
+ * Compare-and-swap. Atomically does this:
+ *
+ * cas(p,o,n) {
+ * r = *p;
+ * if (r == o) { *p = n };
+ * return r;
+ * }
+ */
+EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n);
+EXTERN_INLINE StgWord8 cas_word8(StgWord8 *volatile p, StgWord8 o, StgWord8 n);
+
+/*
+ * Compare-and-swap
+ * this uses a seq_cst success memory order and a relaxed failure memory order
+ */
+EXTERN_INLINE StgWord cas_seq_cst_relaxed(StgVolatilePtr p, StgWord o, StgWord n);
+
+
+/*
+ * Atomic addition by the provided quantity
+ *
+ * atomic_inc(p, n) {
+ * return ((*p) += n);
+ * }
+ */
+EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p, StgWord n);
+
+
+/*
+ * Atomic decrement
+ *
+ * atomic_dec(p) {
+ * return --(*p);
+ * }
+ */
+EXTERN_INLINE StgWord atomic_dec(StgVolatilePtr p);
+
+/*
+ * Busy-wait nop: this is a hint to the CPU that we are currently in a
+ * busy-wait loop waiting for another CPU to change something. On a
+ * hypertreaded CPU it should yield to another thread, for example.
+ */
+EXTERN_INLINE void busy_wait_nop(void);
+
+#endif // !IN_STG_CODE
+
+/*
+ * Various kinds of memory barrier.
+ * write_barrier: prevents future stores occurring before preceding stores.
+ * store_load_barrier: prevents future loads occurring before preceding stores.
+ * load_load_barrier: prevents future loads occurring before earlier loads.
+ *
+ * Reference for these: "The JSR-133 Cookbook for Compiler Writers"
+ * http://gee.cs.oswego.edu/dl/jmm/cookbook.html
+ *
+ * To check whether you got these right, try the test in
+ * testsuite/tests/rts/testwsdeque.c
+ * This tests the work-stealing deque implementation, which relies on
+ * properly working store_load and load_load memory barriers.
+ */
+EXTERN_INLINE void write_barrier(void);
+EXTERN_INLINE void store_load_barrier(void);
+EXTERN_INLINE void load_load_barrier(void);
+
+/*
+ * Note [Heap memory barriers]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Machines with weak memory ordering semantics have consequences for how
+ * closures are observed and mutated. For example, consider a thunk that needs
+ * to be updated to an indirection. In order for the indirection to be safe for
+ * concurrent observers to enter, said observers must read the indirection's
+ * info table before they read the indirectee. Furthermore, the indirectee must
+ * be set before the info table pointer. This ensures that if the observer sees
+ * an IND info table then the indirectee is valid.
+ *
+ * When a closure is updated with an indirection, both its info table and its
+ * indirectee must be written. With weak memory ordering, these two writes can
+ * be arbitrarily reordered, and perhaps even interleaved with other threads'
+ * reads and writes (in the absence of memory barrier instructions). Consider
+ * this example of a bad reordering:
+ *
+ * - An updater writes to a closure's info table (INFO_TYPE is now IND).
+ * - A concurrent observer branches upon reading the closure's INFO_TYPE as IND.
+ * - A concurrent observer reads the closure's indirectee and enters it.
+ * - An updater writes the closure's indirectee.
+ *
+ * Here the update to the indirectee comes too late and the concurrent observer
+ * has jumped off into the abyss. Speculative execution can also cause us
+ * issues, consider:
+ *
+ * - an observer is about to case on a value in closure's info table.
+ * - the observer speculatively reads one or more of closure's fields.
+ * - an updater writes to closure's info table.
+ * - the observer takes a branch based on the new info table value, but with the
+ * old closure fields!
+ * - the updater writes to the closure's other fields, but its too late.
+ *
+ * Because of these effects, reads and writes to a closure's info table must be
+ * ordered carefully with respect to reads and writes to the closure's other
+ * fields, and memory barriers must be placed to ensure that reads and writes
+ * occur in program order. Specifically, updates to an already existing closure
+ * must follow the following pattern:
+ *
+ * - Update the closure's (non-info table) fields.
+ * - Write barrier.
+ * - Update the closure's info table.
+ *
+ * Observing the fields of an updateable closure (e.g. a THUNK) must follow the
+ * following pattern:
+ *
+ * - Read the closure's info pointer.
+ * - Read barrier.
+ * - Read the closure's (non-info table) fields.
+ *
+ * We must also take care when we expose a newly-allocated closure to other cores
+ * by writing a pointer to it to some shared data structure (e.g. an MVar#, a Message,
+ * or MutVar#). Specifically, we need to ensure that all writes constructing the
+ * closure are visible *before* the write exposing the new closure is made visible:
+ *
+ * - Allocate memory for the closure
+ * - Write the closure's info pointer and fields (ordering between this doesn't
+ * matter since the closure isn't yet visible to anyone else).
+ * - Write barrier
+ * - Make closure visible to other cores
+ *
+ * Note that thread stacks are inherently thread-local and consequently allocating an
+ * object and introducing a reference to it to our stack needs no barrier.
+ *
+ * There are several ways in which the mutator may make a newly-allocated
+ * closure visible to other cores:
+ *
+ * - Eager blackholing a THUNK:
+ * This is protected by an explicit write barrier in the eager blackholing
+ * code produced by the codegen. See GHC.StgToCmm.Bind.emitBlackHoleCode.
+ *
+ * - Lazy blackholing a THUNK:
+ * This is is protected by an explicit write barrier in the thread suspension
+ * code. See ThreadPaused.c:threadPaused.
+ *
+ * - Updating a BLACKHOLE:
+ * This case is protected by explicit write barriers in the update frame
+ * entry code (see rts/Updates.h).
+ *
+ * - Blocking on an MVar# (e.g. takeMVar#):
+ * In this case the appropriate MVar primops (e.g. stg_takeMVarzh). include
+ * explicit memory barriers to ensure that the newly-allocated
+ * MVAR_TSO_QUEUE is visible to other cores.
+ *
+ * - Write to an MVar# (e.g. putMVar#):
+ * This protected by the full barrier implied by the CAS in putMVar#.
+ *
+ * - Write to a TVar#:
+ * This is protected by the full barrier implied by the CAS in STM.c:lock_stm.
+ *
+ * - Write to an Array#, ArrayArray#, or SmallArray#:
+ * This case is protected by an explicit write barrier in the code produced
+ * for this primop by the codegen. See GHC.StgToCmm.Prim.doWritePtrArrayOp and
+ * GHC.StgToCmm.Prim.doWriteSmallPtrArrayOp. Relevant issue: #12469.
+ *
+ * - Write to MutVar# via writeMutVar#:
+ * This case is protected by an explicit write barrier in the code produced
+ * for this primop by the codegen.
+ *
+ * - Write to MutVar# via atomicModifyMutVar# or casMutVar#:
+ * This is protected by the full barrier implied by the cmpxchg operations
+ * in this primops.
+ *
+ * - Sending a Message to another capability:
+ * This is protected by the acquition and release of the target capability's
+ * lock in Messages.c:sendMessage.
+ *
+ * Finally, we must ensure that we flush all cores store buffers before
+ * entering and leaving GC, since stacks may be read by other cores. This
+ * happens as a side-effect of taking and release mutexes (which implies
+ * acquire and release barriers, respectively).
+ *
+ * N.B. recordClosureMutated places a reference to the mutated object on
+ * the capability-local mut_list. Consequently this does not require any memory
+ * barrier.
+ *
+ * During parallel GC we need to be careful during evacuation: before replacing
+ * a closure with a forwarding pointer we must commit a write barrier to ensure
+ * that the copy we made in to-space is visible to other cores.
+ *
+ * However, we can be a bit lax when *reading* during GC. Specifically, the GC
+ * can only make a very limited set of changes to existing closures:
+ *
+ * - it can replace a closure's info table with stg_WHITEHOLE.
+ * - it can replace a previously-whitehole'd closure's info table with a
+ * forwarding pointer
+ * - it can replace a previously-whitehole'd closure's info table with a
+ * valid info table pointer (done in eval_thunk_selector)
+ * - it can update the value of a pointer field after evacuating it
+ *
+ * This is quite nice since we don't need to worry about an interleaving
+ * of writes producing an invalid state: a closure's fields remain valid after
+ * an update of its info table pointer and vice-versa.
+ *
+ * After a round of parallel scavenging we must also ensure that any writes the
+ * GC thread workers made are visible to the main GC thread. This is ensured by
+ * the full barrier implied by the atomic decrement in
+ * GC.c:scavenge_until_all_done.
+ *
+ * The work-stealing queue (WSDeque) also requires barriers; these are
+ * documented in WSDeque.c.
+ *
+ */
+
+/* ----------------------------------------------------------------------------
+ Implementations
+ ------------------------------------------------------------------------- */
+
+#if !IN_STG_CODE || IN_STGCRUN
+
+/*
+ * Exchange the value pointed to by p with w and return the former. This
+ * function is used to acquire a lock. An acquire memory barrier is sufficient
+ * for a lock operation because corresponding unlock operation issues a
+ * store-store barrier (write_barrier()) immediately before releasing the lock.
+ */
+EXTERN_INLINE StgWord
+xchg(StgPtr p, StgWord w)
+{
+#if defined(HAVE_C11_ATOMICS)
+ return __atomic_exchange_n(p, w, __ATOMIC_SEQ_CST);
+#else
+ // When porting GHC to a new platform check that
+ // __sync_lock_test_and_set() actually stores w in *p.
+ // Use test rts/atomicxchg to verify that the correct value is stored.
+ // From the gcc manual:
+ // (https://gcc.gnu.org/onlinedocs/gcc-4.4.3/gcc/Atomic-Builtins.html)
+ // This built-in function, as described by Intel, is not
+ // a traditional test-and-set operation, but rather an atomic
+ // exchange operation.
+ // [...]
+ // Many targets have only minimal support for such locks,
+ // and do not support a full exchange operation. In this case,
+ // a target may support reduced functionality here by which the
+ // only valid value to store is the immediate constant 1. The
+ // exact value actually stored in *ptr is implementation defined.
+ return __sync_lock_test_and_set(p, w);
+#endif
+}
+
+/*
+ * CMPXCHG - the single-word atomic compare-and-exchange instruction. Used
+ * in the STM implementation.
+ */
+EXTERN_INLINE StgWord
+cas(StgVolatilePtr p, StgWord o, StgWord n)
+{
+#if defined(HAVE_C11_ATOMICS)
+ __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+ return o;
+#else
+ return __sync_val_compare_and_swap(p, o, n);
+#endif
+}
+
+EXTERN_INLINE StgWord8
+cas_word8(StgWord8 *volatile p, StgWord8 o, StgWord8 n)
+{
+#if defined(HAVE_C11_ATOMICS)
+ __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+ return o;
+#else
+ return __sync_val_compare_and_swap(p, o, n);
+#endif
+}
+
+EXTERN_INLINE StgWord
+cas_seq_cst_relaxed(StgVolatilePtr p, StgWord o, StgWord n) {
+#if defined(HAVE_C11_ATOMICS)
+ __atomic_compare_exchange_n(p, &o, n, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+ return o;
+#else
+ return __sync_val_compare_and_swap(p, o, n);
+#endif
+
+}
+
+// RRN: Generalized to arbitrary increments to enable fetch-and-add in
+// Haskell code (fetchAddIntArray#).
+// PT: add-and-fetch, returns new value
+EXTERN_INLINE StgWord
+atomic_inc(StgVolatilePtr p, StgWord incr)
+{
+#if defined(HAVE_C11_ATOMICS)
+ return __atomic_add_fetch(p, incr, __ATOMIC_SEQ_CST);
+#else
+ return __sync_add_and_fetch(p, incr);
+#endif
+}
+
+EXTERN_INLINE StgWord
+atomic_dec(StgVolatilePtr p)
+{
+#if defined(HAVE_C11_ATOMICS)
+ return __atomic_sub_fetch(p, 1, __ATOMIC_SEQ_CST);
+#else
+ return __sync_sub_and_fetch(p, (StgWord) 1);
+#endif
+}
+
+/*
+ * Some architectures have a way to tell the CPU that we're in a
+ * busy-wait loop, and the processor should look for something else to
+ * do (such as run another hardware thread).
+ */
+EXTERN_INLINE void
+busy_wait_nop(void)
+{
+#if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+ // On Intel, the busy-wait-nop instruction is called "pause",
+ // which is actually represented as a nop with the rep prefix.
+ // On processors before the P4 this behaves as a nop; on P4 and
+ // later it might do something clever like yield to another
+ // hyperthread. In any case, Intel recommends putting one
+ // of these in a spin lock loop.
+ __asm__ __volatile__ ("rep; nop");
+#else
+ // nothing
+#endif
+}
+
+#endif // !IN_STG_CODE
+
+/*
+ * We need to tell both the compiler AND the CPU about the barriers.
+ * It's no good preventing the CPU from reordering the operations if
+ * the compiler has already done so - hence the "memory" restriction
+ * on each of the barriers below.
+ */
+EXTERN_INLINE void
+write_barrier(void) {
+#if defined(NOSMP)
+ return;
+#elif defined(TSAN_ENABLED)
+ // RELEASE is a bit stronger than the store-store barrier provided by
+ // write_barrier, consequently we only use this case as a conservative
+ // approximation when using ThreadSanitizer. See Note [ThreadSanitizer].
+ __atomic_thread_fence(__ATOMIC_RELEASE);
+#elif defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH)
+ __asm__ __volatile__ ("" : : : "memory");
+#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
+ || defined(powerpc64le_HOST_ARCH)
+ __asm__ __volatile__ ("lwsync" : : : "memory");
+#elif defined(s390x_HOST_ARCH)
+ __asm__ __volatile__ ("" : : : "memory");
+#elif defined(sparc_HOST_ARCH)
+ /* Sparc in TSO mode does not require store/store barriers. */
+ __asm__ __volatile__ ("" : : : "memory");
+#elif defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)
+ __asm__ __volatile__ ("dmb st" : : : "memory");
+#elif defined(riscv64_HOST_ARCH)
+ __asm__ __volatile__ ("fence w,w" : : : "memory");
+#else
+#error memory barriers unimplemented on this architecture
+#endif
+}
+
+EXTERN_INLINE void
+store_load_barrier(void) {
+#if defined(NOSMP)
+ return;
+#elif defined(i386_HOST_ARCH)
+ __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory");
+#elif defined(x86_64_HOST_ARCH)
+ __asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory");
+#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
+ || defined(powerpc64le_HOST_ARCH)
+ __asm__ __volatile__ ("sync" : : : "memory");
+#elif defined(s390x_HOST_ARCH)
+ __asm__ __volatile__ ("bcr 14,0" : : : "memory");
+#elif defined(sparc_HOST_ARCH)
+ __asm__ __volatile__ ("membar #StoreLoad" : : : "memory");
+#elif defined(arm_HOST_ARCH)
+ __asm__ __volatile__ ("dmb" : : : "memory");
+#elif defined(aarch64_HOST_ARCH)
+ __asm__ __volatile__ ("dmb sy" : : : "memory");
+#elif defined(riscv64_HOST_ARCH)
+ __asm__ __volatile__ ("fence w,r" : : : "memory");
+#else
+#error memory barriers unimplemented on this architecture
+#endif
+}
+
+EXTERN_INLINE void
+load_load_barrier(void) {
+#if defined(NOSMP)
+ return;
+#elif defined(i386_HOST_ARCH)
+ __asm__ __volatile__ ("" : : : "memory");
+#elif defined(x86_64_HOST_ARCH)
+ __asm__ __volatile__ ("" : : : "memory");
+#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
+ || defined(powerpc64le_HOST_ARCH)
+ __asm__ __volatile__ ("lwsync" : : : "memory");
+#elif defined(s390x_HOST_ARCH)
+ __asm__ __volatile__ ("" : : : "memory");
+#elif defined(sparc_HOST_ARCH)
+ /* Sparc in TSO mode does not require load/load barriers. */
+ __asm__ __volatile__ ("" : : : "memory");
+#elif defined(arm_HOST_ARCH)
+ __asm__ __volatile__ ("dmb" : : : "memory");
+#elif defined(aarch64_HOST_ARCH)
+ __asm__ __volatile__ ("dmb sy" : : : "memory");
+#elif defined(riscv64_HOST_ARCH)
+ __asm__ __volatile__ ("fence w,r" : : : "memory");
+#else
+#error memory barriers unimplemented on this architecture
+#endif
+}
+
+// Load a pointer from a memory location that might be being modified
+// concurrently. This prevents the compiler from optimising away
+// multiple loads of the memory location, as it might otherwise do in
+// a busy wait loop for example.
+#define VOLATILE_LOAD(p) (*((StgVolatilePtr)(p)))
+
+// Relaxed atomic operations.
+#define RELAXED_LOAD(ptr) __atomic_load_n(ptr, __ATOMIC_RELAXED)
+#define RELAXED_STORE(ptr,val) __atomic_store_n(ptr, val, __ATOMIC_RELAXED)
+#define RELAXED_ADD(ptr,val) __atomic_add_fetch(ptr, val, __ATOMIC_RELAXED)
+
+// Acquire/release atomic operations
+#define ACQUIRE_LOAD(ptr) __atomic_load_n(ptr, __ATOMIC_ACQUIRE)
+#define RELEASE_STORE(ptr,val) __atomic_store_n(ptr, val, __ATOMIC_RELEASE)
+
+// Sequentially consistent atomic operations
+#define SEQ_CST_LOAD(ptr) __atomic_load_n(ptr, __ATOMIC_SEQ_CST)
+#define SEQ_CST_STORE(ptr,val) __atomic_store_n(ptr, val, __ATOMIC_SEQ_CST)
+#define SEQ_CST_ADD(ptr,val) __atomic_add_fetch(ptr, val, __ATOMIC_SEQ_CST)
+
+// Non-atomic addition for "approximate" counters that can be lossy
+#define NONATOMIC_ADD(ptr,val) RELAXED_STORE(ptr, RELAXED_LOAD(ptr) + val)
+
+// compare-and-swap atomic operations
+#define SEQ_CST_RELAXED_CAS(p,o,n) cas_seq_cst_relaxed(p,o,n)
+
+// Explicit fences
+//
+// These are typically necessary only in very specific cases (e.g. WSDeque)
+// where the ordered operations aren't expressive enough to capture the desired
+// ordering.
+#define RELEASE_FENCE() __atomic_thread_fence(__ATOMIC_RELEASE)
+#define SEQ_CST_FENCE() __atomic_thread_fence(__ATOMIC_SEQ_CST)
+
+/* ---------------------------------------------------------------------- */
+#else /* !THREADED_RTS */
+
+EXTERN_INLINE void write_barrier(void);
+EXTERN_INLINE void store_load_barrier(void);
+EXTERN_INLINE void load_load_barrier(void);
+EXTERN_INLINE void write_barrier () {} /* nothing */
+EXTERN_INLINE void store_load_barrier() {} /* nothing */
+EXTERN_INLINE void load_load_barrier () {} /* nothing */
+
+// Relaxed atomic operations
+#define RELAXED_LOAD(ptr) *ptr
+#define RELAXED_STORE(ptr,val) *ptr = val
+#define RELAXED_ADD(ptr,val) *ptr += val
+
+// Acquire/release atomic operations
+#define ACQUIRE_LOAD(ptr) *ptr
+#define RELEASE_STORE(ptr,val) *ptr = val
+
+// Sequentially consistent atomic operations
+#define SEQ_CST_LOAD(ptr) *ptr
+#define SEQ_CST_STORE(ptr,val) *ptr = val
+#define SEQ_CST_ADD(ptr,val) *ptr += val
+
+// Non-atomic addition for "approximate" counters that can be lossy
+#define NONATOMIC_ADD(ptr,val) *ptr += val
+
+// compare-and-swap atomic operations
+#define SEQ_CST_RELAXED_CAS(p,o,n) cas(p,o,n)
+
+// Fences
+#define RELEASE_FENCE()
+#define SEQ_CST_FENCE()
+
+#if !IN_STG_CODE || IN_STGCRUN
+INLINE_HEADER StgWord
+xchg(StgPtr p, StgWord w)
+{
+ StgWord old = *p;
+ *p = w;
+ return old;
+}
+
+EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n);
+EXTERN_INLINE StgWord
+cas(StgVolatilePtr p, StgWord o, StgWord n)
+{
+ StgWord result;
+ result = *p;
+ if (result == o) {
+ *p = n;
+ }
+ return result;
+}
+
+EXTERN_INLINE StgWord8 cas_word8(StgWord8 *volatile p, StgWord8 o, StgWord8 n);
+EXTERN_INLINE StgWord8
+cas_word8(StgWord8 *volatile p, StgWord8 o, StgWord8 n)
+{
+ StgWord8 result;
+ result = *p;
+ if (result == o) {
+ *p = n;
+ }
+ return result;
+}
+
+EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p, StgWord incr);
+EXTERN_INLINE StgWord
+atomic_inc(StgVolatilePtr p, StgWord incr)
+{
+ return ((*p) += incr);
+}
+
+
+INLINE_HEADER StgWord
+atomic_dec(StgVolatilePtr p)
+{
+ return --(*p);
+}
+#endif
+
+/* An alias for the C11 declspec */
+#define ATOMIC
+
+#define VOLATILE_LOAD(p) ((StgWord)*((StgWord*)(p)))
+
+#endif /* !THREADED_RTS */
diff --git a/rts/include/stg/Ticky.h b/rts/include/stg/Ticky.h
new file mode 100644
index 0000000000..3d9d4aee4f
--- /dev/null
+++ b/rts/include/stg/Ticky.h
@@ -0,0 +1,221 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2009
+ *
+ * Declarations for counters used by ticky-ticky profiling.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * -------------------------------------------------------------------------- */
+
+#pragma once
+
+/* These should probably be automatically generated in order to
+ keep them consistent with the macros that use them (which are
+ defined in Cmm.h) */
+
+/* Here are all the counter declarations: */
+/* If you change this list, make the corresponding change
+ in RTS_TICKY_SYMBOLS in rts/RtsSymbols.c */
+
+/* These two are explicitly declared in rts/Ticky.c, and
+ hence should not be extern'd except when using this header
+ file from STG code; hence IN_STG_CODE */
+
+#if IN_STG_CODE
+extern W_ ticky_entry_ctrs[];
+extern W_ top_ct[];
+#endif
+
+/* The rest are not explicitly declared in rts/Ticky.c. Instead
+ we use the same trick as in the former StgTicky.h: recycle the
+ same declarations for both extern decls (which are included everywhere)
+ and initializations (which only happen once)
+ TICKY_C is defined only in rts/Ticky.c */
+#if defined(TICKY_C)
+#define INIT(ializer) = ializer
+#define EXTERN
+#else
+#define INIT(ializer)
+#define EXTERN extern
+#endif
+
+EXTERN StgInt ENT_VIA_NODE_ctr INIT(0);
+EXTERN StgInt ENT_STATIC_THK_SINGLE_ctr INIT(0);
+EXTERN StgInt ENT_DYN_THK_SINGLE_ctr INIT(0);
+EXTERN StgInt ENT_STATIC_THK_MANY_ctr INIT(0);
+EXTERN StgInt ENT_DYN_THK_MANY_ctr INIT(0);
+EXTERN StgInt ENT_STATIC_FUN_DIRECT_ctr INIT(0);
+EXTERN StgInt ENT_DYN_FUN_DIRECT_ctr INIT(0);
+EXTERN StgInt ENT_STATIC_CON_ctr INIT(0);
+EXTERN StgInt ENT_DYN_CON_ctr INIT(0);
+EXTERN StgInt ENT_STATIC_IND_ctr INIT(0);
+EXTERN StgInt ENT_DYN_IND_ctr INIT(0);
+EXTERN StgInt ENT_PERM_IND_ctr INIT(0);
+EXTERN StgInt ENT_PAP_ctr INIT(0);
+EXTERN StgInt ENT_AP_ctr INIT(0);
+EXTERN StgInt ENT_AP_STACK_ctr INIT(0);
+EXTERN StgInt ENT_BH_ctr INIT(0);
+EXTERN StgInt ENT_LNE_ctr INIT(0);
+
+EXTERN StgInt UNKNOWN_CALL_ctr INIT(0);
+
+EXTERN StgInt SLOW_CALL_fast_v16_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_v_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_f_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_d_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_l_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_n_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_p_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_pv_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_pp_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_ppv_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_ppp_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_pppv_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_pppp_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_ppppp_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_fast_pppppp_ctr INIT(0);
+EXTERN StgInt VERY_SLOW_CALL_ctr INIT(0);
+
+EXTERN StgInt ticky_slow_call_unevald;
+EXTERN StgInt SLOW_CALL_ctr INIT(0);
+EXTERN StgInt MULTI_CHUNK_SLOW_CALL_ctr INIT(0);
+EXTERN StgInt MULTI_CHUNK_SLOW_CALL_CHUNKS_ctr INIT(0);
+EXTERN StgInt KNOWN_CALL_ctr INIT(0);
+EXTERN StgInt KNOWN_CALL_TOO_FEW_ARGS_ctr INIT(0);
+EXTERN StgInt KNOWN_CALL_EXTRA_ARGS_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_FUN_TOO_FEW_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_FUN_CORRECT_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_FUN_TOO_MANY_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_PAP_TOO_FEW_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_PAP_CORRECT_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_PAP_TOO_MANY_ctr INIT(0);
+EXTERN StgInt SLOW_CALL_UNEVALD_ctr INIT(0);
+
+
+EXTERN StgInt UPDF_OMITTED_ctr INIT(0);
+EXTERN StgInt UPDF_PUSHED_ctr INIT(0);
+EXTERN StgInt CATCHF_PUSHED_ctr INIT(0);
+EXTERN StgInt UPDF_RCC_PUSHED_ctr INIT(0);
+EXTERN StgInt UPDF_RCC_OMITTED_ctr INIT(0);
+
+EXTERN StgInt UPD_SQUEEZED_ctr INIT(0);
+EXTERN StgInt UPD_CON_IN_NEW_ctr INIT(0);
+EXTERN StgInt UPD_CON_IN_PLACE_ctr INIT(0);
+EXTERN StgInt UPD_PAP_IN_NEW_ctr INIT(0);
+EXTERN StgInt UPD_PAP_IN_PLACE_ctr INIT(0);
+
+EXTERN StgInt ALLOC_HEAP_ctr INIT(0);
+EXTERN StgInt ALLOC_HEAP_tot INIT(0);
+
+EXTERN StgInt HEAP_CHK_ctr INIT(0);
+EXTERN StgInt STK_CHK_ctr INIT(0);
+
+EXTERN StgInt ALLOC_RTS_ctr INIT(0);
+EXTERN StgInt ALLOC_RTS_tot INIT(0);
+
+EXTERN StgInt ALLOC_FUN_ctr INIT(0);
+EXTERN StgInt ALLOC_FUN_adm INIT(0);
+EXTERN StgInt ALLOC_FUN_gds INIT(0);
+EXTERN StgInt ALLOC_FUN_slp INIT(0);
+
+EXTERN StgInt UPD_NEW_IND_ctr INIT(0);
+EXTERN StgInt UPD_NEW_PERM_IND_ctr INIT(0);
+EXTERN StgInt UPD_OLD_IND_ctr INIT(0);
+EXTERN StgInt UPD_OLD_PERM_IND_ctr INIT(0);
+
+EXTERN StgInt UPD_BH_UPDATABLE_ctr INIT(0);
+EXTERN StgInt UPD_CAF_BH_UPDATABLE_ctr INIT(0);
+EXTERN StgInt UPD_CAF_BH_SINGLE_ENTRY_ctr INIT(0);
+
+EXTERN StgInt GC_SEL_ABANDONED_ctr INIT(0);
+EXTERN StgInt GC_SEL_MINOR_ctr INIT(0);
+EXTERN StgInt GC_SEL_MAJOR_ctr INIT(0);
+
+EXTERN StgInt GC_FAILED_PROMOTION_ctr INIT(0);
+
+EXTERN StgInt ALLOC_UP_THK_ctr INIT(0);
+EXTERN StgInt ALLOC_SE_THK_ctr INIT(0);
+EXTERN StgInt ALLOC_THK_adm INIT(0);
+EXTERN StgInt ALLOC_THK_gds INIT(0);
+EXTERN StgInt ALLOC_THK_slp INIT(0);
+
+EXTERN StgInt ALLOC_CON_ctr INIT(0);
+EXTERN StgInt ALLOC_CON_adm INIT(0);
+EXTERN StgInt ALLOC_CON_gds INIT(0);
+EXTERN StgInt ALLOC_CON_slp INIT(0);
+
+EXTERN StgInt ALLOC_TUP_ctr INIT(0);
+EXTERN StgInt ALLOC_TUP_adm INIT(0);
+EXTERN StgInt ALLOC_TUP_gds INIT(0);
+EXTERN StgInt ALLOC_TUP_slp INIT(0);
+
+EXTERN StgInt ALLOC_BH_ctr INIT(0);
+EXTERN StgInt ALLOC_BH_adm INIT(0);
+EXTERN StgInt ALLOC_BH_gds INIT(0);
+EXTERN StgInt ALLOC_BH_slp INIT(0);
+
+EXTERN StgInt ALLOC_PRIM_ctr INIT(0);
+EXTERN StgInt ALLOC_PRIM_adm INIT(0);
+EXTERN StgInt ALLOC_PRIM_gds INIT(0);
+EXTERN StgInt ALLOC_PRIM_slp INIT(0);
+
+EXTERN StgInt ALLOC_PAP_ctr INIT(0);
+EXTERN StgInt ALLOC_PAP_adm INIT(0);
+EXTERN StgInt ALLOC_PAP_gds INIT(0);
+EXTERN StgInt ALLOC_PAP_slp INIT(0);
+
+EXTERN StgInt ALLOC_TSO_ctr INIT(0);
+EXTERN StgInt ALLOC_TSO_adm INIT(0);
+EXTERN StgInt ALLOC_TSO_gds INIT(0);
+EXTERN StgInt ALLOC_TSO_slp INIT(0);
+
+EXTERN StgInt RET_NEW_ctr INIT(0);
+EXTERN StgInt RET_OLD_ctr INIT(0);
+EXTERN StgInt RET_UNBOXED_TUP_ctr INIT(0);
+
+EXTERN StgInt RET_SEMI_loads_avoided INIT(0);
+/* End of counter declarations. */
+
+/* How many bins in ticky's histograms */
+#define TICKY_BIN_COUNT 9
+
+/* Histogram declarations */
+EXTERN StgInt RET_NEW_hst[TICKY_BIN_COUNT] INIT({0});
+EXTERN StgInt RET_OLD_hst[TICKY_BIN_COUNT] INIT({0});
+EXTERN StgInt RET_UNBOXED_TUP_hst[TICKY_BIN_COUNT] INIT({0});
+/* End of histogram declarations */
+
+/* This is ugly, but the story is:
+ We got rid of StgTicky.h, which was previously
+ defining these macros for the benefit of C code
+ so, we define them here instead (to be no-ops).
+ (since those macros are only defined in Cmm.h)
+
+ Note that these macros must be defined whether
+ TICKY_TICKY is defined or not. */
+
+#if !defined(CMINUSMINUS)
+#if defined(TICKY_TICKY)
+#define TICK_BUMP_BY(ctr,n) ctr = (StgInt) ctr + n
+#else
+#define TICK_BUMP_BY(ctr,n) /* nothing */
+#endif
+
+#define TICK_BUMP(ctr) TICK_BUMP_BY(ctr,1)
+
+#define TICK_ALLOC_PRIM(x,y,z) // FIXME: update counter
+#define TICK_UPD_OLD_IND() TICK_BUMP(UPD_OLD_IND_ctr)
+#define TICK_UPD_NEW_IND() TICK_BUMP(UPD_NEW_IND_ctr)
+#define TICK_UPD_SQUEEZED() TICK_BUMP(UPD_SQUEEZED_ctr)
+#define TICK_ALLOC_HEAP_NOCTR(bytes) // FIXME: update counter
+#define TICK_GC_FAILED_PROMOTION() // FIXME: update counter
+#define TICK_ALLOC_TSO() // FIXME: update counter
+#define TICK_ALLOC_STACK(g) // FIXME: update counter
+#define TICK_ALLOC_UP_THK(g,s) // FIXME: update counter
+#define TICK_ALLOC_SE_THK(g,s) // FIXME: update counter
+
+#endif
diff --git a/rts/include/stg/Types.h b/rts/include/stg/Types.h
new file mode 100644
index 0000000000..696df3e206
--- /dev/null
+++ b/rts/include/stg/Types.h
@@ -0,0 +1,205 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 1998-2004
+ *
+ * Various C datatypes used in the run-time system. This is the
+ * lowest-level include file, after ghcconfig.h and RtsConfig.h.
+ *
+ * Do not #include this file directly: #include "Rts.h" instead.
+ *
+ * To understand the structure of the RTS headers, see the wiki:
+ * https://gitlab.haskell.org/ghc/ghc/wikis/commentary/source-tree/includes
+ *
+ * NOTE: assumes #include "ghcconfig.h"
+ *
+ * Works with or without _POSIX_SOURCE.
+ *
+ * WARNING: Keep this file, MachDeps.h, and HsFFI.h in synch!
+ *
+ * ---------------------------------------------------------------------------*/
+
+#pragma once
+
+#if defined(mingw32_HOST_OS)
+# if defined(__USE_MINGW_ANSI_STDIO)
+# if __USE_MINGW_ANSI_STDIO != 1
+# warning "Mismatch between __USE_MINGW_ANSI_STDIO definitions. \
+If using Rts.h make sure it is the first header included."
+# endif
+# else
+/* Inform mingw we want the ISO rather than Windows printf format specifiers. */
+# define __USE_MINGW_ANSI_STDIO 1
+#endif
+#endif
+
+/* ISO C 99 says:
+ * "C++ implementations should define these macros only when
+ * __STDC_LIMIT_MACROS is defined before <stdint.h> is included."
+ *
+ * So we need to define it for now to compile with C++ compilers.
+ * However, C++11 does not require it anymore so we can remove this once we
+ * upgrade to requiring C++11 or newer.
+ */
+#define __STDC_LIMIT_MACROS
+#include <inttypes.h>
+
+
+/*
+ * This module should define types *only*, all beginning with "Stg".
+ *
+ * Specifically:
+
+ StgInt8, 16, 32, 64
+ StgWord8, 16, 32, 64
+ StgChar, StgFloat, StgDouble
+
+ ***** All the same size (i.e. sizeof(void *)): *****
+ StgPtr Basic pointer type
+ StgWord Unit of heap allocation
+ StgInt Signed version of StgWord
+ StgAddr Generic address type
+
+ StgBool, StgVoid, StgPtr, StgOffset,
+ StgCode, StgStablePtr, StgFunPtr,
+ StgUnion.
+ */
+
+/*
+ * First, platform-dependent definitions of size-specific integers.
+ */
+
+typedef int8_t StgInt8;
+typedef uint8_t StgWord8;
+
+#define STG_INT8_MIN INT8_MIN
+#define STG_INT8_MAX INT8_MAX
+#define STG_WORD8_MAX UINT8_MAX
+
+#define FMT_Word8 PRIu8
+#define FMT_HexWord8 PRIx8
+
+typedef int16_t StgInt16;
+typedef uint16_t StgWord16;
+
+#define STG_INT16_MIN INT16_MIN
+#define STG_INT16_MAX INT16_MAX
+#define STG_WORD16_MAX UINT16_MAX
+
+#define FMT_Word16 PRIu16
+#define FMT_HexWord16 PRIx16
+
+typedef int32_t StgInt32;
+typedef uint32_t StgWord32;
+
+#define STG_INT32_MIN INT32_MIN
+#define STG_INT32_MAX INT32_MAX
+#define STG_WORD32_MAX UINT32_MAX
+
+#define FMT_Word32 PRIu32
+#define FMT_HexWord32 PRIx32
+#define FMT_Int32 PRId32
+
+typedef int64_t StgInt64;
+typedef uint64_t StgWord64;
+
+#define STG_INT64_MIN INT64_MIN
+#define STG_INT64_MAX INT64_MAX
+#define STG_WORD64_MAX UINT64_MAX
+
+#define FMT_Word64 PRIu64
+#define FMT_HexWord64 PRIx64
+#define FMT_Int64 PRId64
+
+typedef struct { StgWord64 h; StgWord64 l; } StgWord128;
+
+typedef struct { StgWord128 h; StgWord128 l; } StgWord256;
+
+typedef struct { StgWord256 h; StgWord256 l; } StgWord512;
+
+/*
+ * Stg{Int,Word} are defined such that they have the exact same size as a
+ * void pointer.
+ */
+
+#if SIZEOF_VOID_P == 8
+typedef int64_t StgInt;
+typedef uint64_t StgWord;
+
+typedef int32_t StgHalfInt;
+typedef uint32_t StgHalfWord;
+
+#define STG_INT_MIN INT64_MIN
+#define STG_INT_MAX INT64_MAX
+#define STG_WORD_MAX UINT64_MAX
+
+#define FMT_Word FMT_Word64
+#define FMT_HexWord FMT_HexWord64
+#define FMT_Int FMT_Int64
+
+#define strToStgWord strtoull
+
+#elif SIZEOF_VOID_P == 4
+typedef int32_t StgInt;
+typedef uint32_t StgWord;
+
+typedef int16_t StgHalfInt;
+typedef uint16_t StgHalfWord;
+
+#define STG_INT_MIN INT32_MIN
+#define STG_INT_MAX INT32_MAX
+#define STG_WORD_MAX UINT32_MAX
+
+#define FMT_Word FMT_Word32
+#define FMT_HexWord FMT_HexWord32
+#define FMT_Int FMT_Int32
+
+#define strToStgWord strtoul
+
+#else
+#error GHC untested on this architecture: sizeof(void *) != 4 or 8
+#endif
+
+#define W_MASK (sizeof(W_)-1)
+
+/*
+ * Other commonly-used STG datatypes.
+ */
+
+typedef void* StgAddr;
+typedef StgWord32 StgChar;
+typedef int StgBool;
+typedef float StgFloat;
+typedef double StgDouble;
+typedef StgWord* StgPtr; /* heap or stack pointer */
+typedef StgWord volatile* StgVolatilePtr; /* pointer to volatile word */
+typedef StgWord StgOffset; /* byte offset within closure */
+typedef StgWord8 StgCode; /* close enough */
+typedef void* StgStablePtr;
+typedef StgWord8* StgByteArray;
+
+/*
+ Types for generated C functions when compiling via C.
+
+ The C functions take no arguments, and return a pointer to the next
+ function to be called use: Ptr to Fun that returns a Ptr to Fun
+ which returns Ptr to void
+
+ Note: Neither StgFunPtr not StgFun is quite right (that is,
+ StgFunPtr != StgFun*). So, the functions we define all have type
+ StgFun but we always have to cast them to StgFunPtr when we assign
+ them to something.
+ The only way round this would be to write a recursive type but
+ C only allows that if you're defining a struct or union.
+*/
+
+typedef void *(*(*StgFunPtr)(void))(void);
+typedef StgFunPtr StgFun(void);
+
+/*
+ * Forward declarations for the unregisterised backend, which
+ * only depends upon Stg.h and not the entirety of Rts.h, which
+ * is where these are defined.
+ */
+struct StgClosure_;
+struct StgThunk_;
+struct Capability_;
diff --git a/rts/package.conf.in b/rts/package.conf.in
index b0796595ff..50a372b2f0 100644
--- a/rts/package.conf.in
+++ b/rts/package.conf.in
@@ -68,11 +68,11 @@ extra-libraries:
include-dirs: INCLUDE_DIR FFI_INCLUDE_DIR
#else /* !INSTALLING */
include-dirs: TOP"/rts/dist/build"
- TOP"/includes"
- TOP"/includes/dist-derivedconstants/header"
+ TOP"/rts/include"
+ TOP"/rts/include/dist-derivedconstants/header"
FFI_INCLUDE_DIR
LIBDW_INCLUDE_DIR
- TOP"/includes/dist-install/build"
+ TOP"/rts/include/dist-install/build"
#endif
includes: Stg.h
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
index a08e007c2a..6e535a777c 100644
--- a/rts/rts.cabal.in
+++ b/rts/rts.cabal.in
@@ -51,7 +51,7 @@ flag thread-sanitizer
description:
Enable checking for data races using the ThreadSanitizer (TSAN)
mechanism supported by GCC and Clang. See Note [ThreadSanitizer]
- in @includes/rts/TSANUtils.h@.
+ in @rts/include/rts/TSANUtils.h@.
default: False
library
@@ -141,13 +141,13 @@ library
if !flag(smp)
cpp-options: -DNOSMP
- include-dirs: build ../includes includes
- includes/dist-derivedconstants/header @FFIIncludeDir@
+ include-dirs: build include
+ include/dist-derivedconstants/header @FFIIncludeDir@
@LibdwIncludeDir@
includes: Stg.h
install-includes: Cmm.h HsFFI.h MachDeps.h Rts.h RtsAPI.h Stg.h
ghcautoconf.h ghcconfig.h ghcplatform.h ghcversion.h
- -- ^ from ../includes
+ -- ^ from include
DerivedConstants.h ffi.h ffitarget.h
-- ^ generated
rts/Adjustor.h
@@ -176,6 +176,7 @@ library
rts/PrimFloat.h
rts/Profiling.h
rts/IPE.h
+ rts/PosixSource.h
rts/Signals.h
rts/SpinLock.h
rts/StableName.h
diff --git a/rts/sm/Compact.h b/rts/sm/Compact.h
index be9a09d4ab..9f4b396005 100644
--- a/rts/sm/Compact.h
+++ b/rts/sm/Compact.h
@@ -48,7 +48,7 @@
where we allocate single-word heap objects (e.g. a non-top-level FUN with
empty payload) we add one non-pointer field to the payload so that the object
will have two words. The minimum amount of words in the payload is defined in
- includes/rts/Constants.h as MIN_PAYLOAD_SIZE.
+ rts/include/rts/Constants.h as MIN_PAYLOAD_SIZE.
(See also !1701 where we discussed lifting this restriction and allocating
two bits per object)