diff options
Diffstat (limited to 'includes/stg')
-rw-r--r-- | includes/stg/DLL.h | 48 | ||||
-rw-r--r-- | includes/stg/MachRegs.h | 768 | ||||
-rw-r--r-- | includes/stg/MiscClosures.h | 642 | ||||
-rw-r--r-- | includes/stg/Regs.h | 667 | ||||
-rw-r--r-- | includes/stg/SMP.h | 313 | ||||
-rw-r--r-- | includes/stg/TailCalls.h | 304 | ||||
-rw-r--r-- | includes/stg/Ticky.h | 188 | ||||
-rw-r--r-- | includes/stg/Types.h | 135 |
8 files changed, 3065 insertions, 0 deletions
diff --git a/includes/stg/DLL.h b/includes/stg/DLL.h new file mode 100644 index 0000000000..5e824271bf --- /dev/null +++ b/includes/stg/DLL.h @@ -0,0 +1,48 @@ +#ifndef __STGDLL_H__ +#define __STGDLL_H__ 1 + +#if defined(__PIC__) && defined(mingw32_TARGET_OS) +# define DLL_IMPORT_DATA_REF(x) (_imp__##x) +# define DLL_IMPORT_DATA_VARNAME(x) *_imp__##x +# if __GNUC__ && !defined(__declspec) +# define DLLIMPORT +# else +# define DLLIMPORT __declspec(dllimport) +# define DLLIMPORT_DATA(x) _imp__##x +# endif +#else +# define DLL_IMPORT_DATA_REF(x) (&(x)) +# define DLL_IMPORT_DATA_VARNAME(x) x +# define DLLIMPORT +#endif + +/* The view of the ghc/includes/ header files differ ever so + slightly depending on whether the RTS is being compiled + or not - so we're forced to distinguish between two. + [oh, you want details :) : Data symbols defined by the RTS + have to be accessed through an extra level of indirection + when compiling generated .hc code compared to when the RTS + sources are being processed. This is only the case when + using Win32 DLLs. ] +*/ +#ifdef COMPILING_RTS +#define DLL_IMPORT DLLIMPORT +#define DLL_IMPORT_RTS +#define DLL_IMPORT_DATA_VAR(x) x +#else +#define DLL_IMPORT +#define DLL_IMPORT_RTS DLLIMPORT +# if defined(__PIC__) && defined(mingw32_TARGET_OS) +# define DLL_IMPORT_DATA_VAR(x) _imp__##x +# else +# define DLL_IMPORT_DATA_VAR(x) x +# endif +#endif + +#ifdef COMPILING_STDLIB +#define DLL_IMPORT_STDLIB +#else +#define DLL_IMPORT_STDLIB DLLIMPORT +#endif + +#endif /* __STGDLL_H__ */ diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h new file mode 100644 index 0000000000..d6075326db --- /dev/null +++ b/includes/stg/MachRegs.h @@ -0,0 +1,768 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-1999 + * + * Registers used in STG code. Might or might not correspond to + * actual machine registers. + * + * ---------------------------------------------------------------------------*/ + +#ifndef MACHREGS_H +#define MACHREGS_H + +/* This file is #included into Haskell code in the compiler: #defines + * only in here please. + */ + +/* + * Defining NO_REGS causes no global registers to be used. NO_REGS is + * typically defined by GHC, via a command-line option passed to gcc, + * when the -funregisterised flag is given. + * + * NB. When NO_REGS is on, calling & return conventions may be + * different. For example, all function arguments will be passed on + * the stack, and components of an unboxed tuple will be returned on + * the stack rather than in registers. + */ +#ifndef NO_REGS + +/* NOTE: when testing the platform in this file we must test either + * *_HOST_ARCH and *_TARGET_ARCH, depending on whether COMPILING_GHC + * is set. This is because when we're compiling the RTS and HC code, + * the platform we're running on is the HOST, but when compiling GHC + * we want to know about the register mapping on the TARGET platform. + */ +#ifdef COMPILING_GHC +#define alpha_REGS alpha_TARGET_ARCH +#define hppa1_1_REGS hppa1_1_TARGET_ARCH +#define i386_REGS i386_TARGET_ARCH +#define x86_64_REGS x86_64_TARGET_ARCH +#define m68k_REGS m68k_TARGET_ARCH +#define mips_REGS (mipsel_TARGET_ARCH || mipseb_TARGET_ARCH) +#define powerpc_REGS (powerpc_TARGET_ARCH || powerpc64_TARGET_ARCH || rs6000_TARGET_ARCH) +#define ia64_REGS ia64_TARGET_ARCH +#define sparc_REGS sparc_TARGET_ARCH +#define darwin_REGS darwin_TARGET_OS +#else +#define alpha_REGS alpha_HOST_ARCH +#define hppa1_1_REGS hppa1_1_HOST_ARCH +#define i386_REGS i386_HOST_ARCH +#define x86_64_REGS x86_64_HOST_ARCH +#define m68k_REGS m68k_HOST_ARCH +#define mips_REGS (mipsel_HOST_ARCH || mipseb_HOST_ARCH) +#define powerpc_REGS (powerpc_HOST_ARCH || powerpc64_HOST_ARCH || rs6000_HOST_ARCH) +#define ia64_REGS ia64_HOST_ARCH +#define sparc_REGS sparc_HOST_ARCH +#define darwin_REGS darwin_HOST_OS +#endif + +/* ---------------------------------------------------------------------------- + Caller saves and callee-saves regs. + + Caller-saves regs have to be saved around C-calls made from STG + land, so this file defines CALLER_SAVES_<reg> for each <reg> that + is designated caller-saves in that machine's C calling convention. + -------------------------------------------------------------------------- */ + +/* ----------------------------------------------------------------------------- + The DEC Alpha register mapping + + Alpha registers + \tr{$9}--\tr{$14} are our ``prize'' callee-save registers. + \tr{$15} is the frame pointer, and \tr{$16}--\tr{$21} are argument + registers. (These are off-limits.) We can steal some of the \tr{$22}-and-up + caller-save registers provided we do the appropriate save/restore stuff. + + \tr{$f2}--\tr{$f9} are some callee-save floating-point registers. + + We cannot use \tr{$23} (aka t9), \tr{$24} (aka t10), \tr{$25} (aka + t11), \tr{$27} (aka pv), or \tr{$28} (aka at), because they are + occasionally required by the assembler to handle non-primitive + instructions (e.g. ldb, remq). Sigh! + + Cheat sheet for GDB: + + GDB here Main map + === ==== ======== + s5 $14 R1 + t1 $2 R2 + t2 $3 R3 + t3 $4 R4 + t4 $5 R5 + t5 $6 R6 + t6 $7 R7 + t7 $8 R8 + s0 $9 Sp + s2 $11 SpLim + s3 $12 Hp + t8 $22 NCG_reserved + t12 $27 NCG_reserved + -------------------------------------------------------------------------- */ + +#if alpha_REGS +# define REG(x) __asm__("$" #x) + +# define CALLER_SAVES_R2 +# define CALLER_SAVES_R3 +# define CALLER_SAVES_R4 +# define CALLER_SAVES_R5 +# define CALLER_SAVES_R6 +# define CALLER_SAVES_R7 +# define CALLER_SAVES_R8 + +# define CALLER_SAVES_USER + +# define REG_R1 14 +# define REG_R2 2 +# define REG_R3 3 +# define REG_R4 4 +# define REG_R5 5 +# define REG_R6 6 +# define REG_R7 7 +# define REG_R8 8 + +# define REG_F1 f2 +# define REG_F2 f3 +# define REG_F3 f4 +# define REG_F4 f5 + +# define REG_D1 f6 +# define REG_D2 f7 + +# define REG_Sp 9 +# define REG_SpLim 11 + +# define REG_Hp 12 + +# define NCG_Reserved_I1 22 +# define NCG_Reserved_I2 27 +# define NCG_Reserved_F1 f29 +# define NCG_Reserved_F2 f30 + +#endif /* alpha_REGS */ + +/* ----------------------------------------------------------------------------- + The HP-PA register mapping + + We cater for HP-PA 1.1. + + \tr{%r0}--\tr{%r1} are special. + \tr{%r2} is the return pointer. + \tr{%r3} is the frame pointer. + \tr{%r4}--\tr{%r18} are callee-save registers. + \tr{%r19} is a linkage table register for HPUX 8.0 shared libraries. + \tr{%r20}--\tr{%r22} are caller-save registers. + \tr{%r23}--\tr{%r26} are parameter registers. + \tr{%r27} is a global data pointer. + \tr{%r28}--\tr{%r29} are temporaries. + \tr{%r30} is the stack pointer. + \tr{%r31} is a temporary. + + \tr{%fr12}--\tr{%fr15} are some callee-save floating-point registers. + \tr{%fr8}--\tr{%fr11} are some available caller-save fl-pt registers. + -------------------------------------------------------------------------- */ + +#if hppa1_1_REGS + +#define REG(x) __asm__("%" #x) + +#define REG_R1 r11 +#define REG_R2 r12 +#define REG_R3 r13 +#define REG_R4 r14 +#define REG_R5 r15 +#define REG_R6 r16 +#define REG_R7 r17 +#define REG_R8 r18 + +#define REG_F1 fr12 +#define REG_F2 fr12R +#define REG_F3 fr13 +#define REG_F4 fr13R + +#define REG_D1 fr20 /* L & R */ +#define REG_D2 fr21 /* L & R */ + +#define REG_Sp r4 +#define REG_SpLim r6 + +#define REG_Hp r7 + +#define NCG_Reserved_I1 r28 +#define NCG_Reserved_I2 r29 +#define NCG_Reserved_F1 fr8 +#define NCG_Reserved_F2 fr8R +#define NCG_Reserved_D1 fr10 +#define NCG_Reserved_D2 fr11 + +#endif /* hppa */ + +/* ----------------------------------------------------------------------------- + The x86 register mapping + + Ok, we've only got 6 general purpose registers, a frame pointer and a + stack pointer. \tr{%eax} and \tr{%edx} are return values from C functions, + hence they get trashed across ccalls and are caller saves. \tr{%ebx}, + \tr{%esi}, \tr{%edi}, \tr{%ebp} are all callee-saves. + + Reg STG-Reg + --------------- + ebx Base + ebp Sp + esi R1 + edi Hp + + Leaving SpLim out of the picture. + -------------------------------------------------------------------------- */ + + +#if i386_REGS + +#define REG(x) __asm__("%" #x) + +#ifndef not_doing_dynamic_linking +#define REG_Base ebx +#endif +#define REG_Sp ebp + +#ifndef STOLEN_X86_REGS +#define STOLEN_X86_REGS 4 +#endif + +#if STOLEN_X86_REGS >= 3 +# define REG_R1 esi +#endif + +#if STOLEN_X86_REGS >= 4 +# define REG_Hp edi +#endif + +#define MAX_REAL_VANILLA_REG 1 /* always, since it defines the entry conv */ +#define MAX_REAL_FLOAT_REG 0 +#define MAX_REAL_DOUBLE_REG 0 +#define MAX_REAL_LONG_REG 0 + +#endif /* iX86 */ + +/* ----------------------------------------------------------------------------- + The x86-64 register mapping + + %rax caller-saves, don't steal this one + %rbx YES + %rcx arg reg, caller-saves + %rdx arg reg, caller-saves + %rsi arg reg, caller-saves + %rdi arg reg, caller-saves + %rbp YES (our *prime* register) + %rsp (unavailable - stack pointer) + %r8 arg reg, caller-saves + %r9 arg reg, caller-saves + %r10 caller-saves + %r11 caller-saves + %r12 YES + %r13 YES + %r14 YES + %r15 YES + + %xmm0-7 arg regs, caller-saves + %xmm8-15 caller-saves + + Use the caller-saves regs for Rn, because we don't always have to + save those (as opposed to Sp/Hp/SpLim etc. which always have to be + saved). + + --------------------------------------------------------------------------- */ + +#if x86_64_REGS + +#define REG(x) __asm__("%" #x) + +#define REG_Base r13 +#define REG_Sp rbp +#define REG_Hp r12 +#define REG_R1 rbx +#define REG_R2 r14 +#define REG_R3 rsi +#define REG_R4 rdi +#define REG_R5 r8 +#define REG_R6 r9 +#define REG_SpLim r15 + +#define REG_F1 xmm1 +#define REG_F2 xmm2 +#define REG_F3 xmm3 +#define REG_F4 xmm4 + +#define REG_D1 xmm5 +#define REG_D2 xmm6 + +#define CALLER_SAVES_R3 +#define CALLER_SAVES_R4 +#define CALLER_SAVES_R5 +#define CALLER_SAVES_R6 + +#define CALLER_SAVES_F1 +#define CALLER_SAVES_F2 +#define CALLER_SAVES_F3 +#define CALLER_SAVES_F4 + +#define CALLER_SAVES_D1 +#define CALLER_SAVES_D2 + +#define MAX_REAL_VANILLA_REG 6 +#define MAX_REAL_FLOAT_REG 4 +#define MAX_REAL_DOUBLE_REG 2 +#define MAX_REAL_LONG_REG 0 + +#endif /* x86_64 */ + +/* ----------------------------------------------------------------------------- + The Motorola 680x0 register mapping + + A Sun3 (mc680x0) has eight address registers, \tr{a0} to \tr{a7}, and + eight data registers, \tr{d0} to \tr{d7}. Address operations have to + be done through address registers; data registers are used for + comparison values and data. + + Here's the register-usage picture for m68k boxes with GCC. + + \begin{tabular}{ll} + a0 & used directly by GCC \\ + a1 & used directly by GCC \\ + \\ + a2..a5 & callee-saved: available for STG registers \\ + & (a5 may be special, ``global'' register for PIC?) \\ + \\ + a6 & C-stack frame pointer \\ + a7 & C-stack pointer \\ + \\ + d0 & used directly by GCC \\ + d1 & used directly by GCC \\ + d2 & really needed for local optimisation by GCC \\ + \\ + d3..d7 & callee-saved: available for STG registers + \\ + fp0 & call-clobbered \\ + fp1 & call-clobbered \\ + fp2..fp7 & callee-saved: available for STG registers + \end{tabular} + -------------------------------------------------------------------------- */ + +#if m68k_REGS + +#define REG(x) __asm__(#x) + +#define REG_Base a2 + +#define REG_Sp a3 +#define REG_SpLim d3 + +#define REG_Hp d4 + +#define REG_R1 a5 +#define REG_R2 d6 +#define MAX_REAL_VANILLA_REG 2 + +#define REG_Ret d7 + +#define REG_F1 fp2 +#define REG_F2 fp3 +#define REG_F3 fp4 +#define REG_F4 fp5 + +#define REG_D1 fp6 +#define REG_D2 fp7 + +#endif /* m68k */ + +/* ----------------------------------------------------------------------------- + The DECstation (MIPS) register mapping + + Here's at least some simple stuff about registers on a MIPS. + + \tr{s0}--\tr{s7} are callee-save integer registers; they are our + ``prize'' stolen registers. There is also a wad of callee-save + floating-point registers, \tr{$f20}--\tr{$f31}; we'll use some of + those. + + \tr{t0}--\tr{t9} are caller-save (``temporary?'') integer registers. + We can steal some, but we might have to save/restore around ccalls. + -------------------------------------------------------------------------- */ + +#if mips_REGS + +#define REG(x) __asm__("$" #x) + +#define CALLER_SAVES_R5 +#define CALLER_SAVES_R6 +#define CALLER_SAVES_R7 +#define CALLER_SAVES_R8 + +#define CALLER_SAVES_USER + +#define REG_R1 16 +#define REG_R2 17 +#define REG_R3 18 +#define REG_R4 19 +#define REG_R5 12 +#define REG_R6 13 +#define REG_R7 14 +#define REG_R8 15 + +#define REG_F1 f20 +#define REG_F2 f22 +#define REG_F3 f24 +#define REG_F4 f26 + +#define REG_D1 f28 +#define REG_D2 f30 + +#define REG_Sp 20 +#define REG_SpLim 21 + +#define REG_Hp 22 + +#define REG_Base 30 + +#endif /* mipse[lb] */ + +/* ----------------------------------------------------------------------------- + The PowerPC register mapping + + 0 system glue? (caller-save, volatile) + 1 SP (callee-save, non-volatile) + 2 AIX, powerpc64-linux: + RTOC (a strange special case) + darwin: + (caller-save, volatile) + powerpc32-linux: + reserved for use by system + + 3-10 args/return (caller-save, volatile) + 11,12 system glue? (caller-save, volatile) + 13 on 64-bit: reserved for thread state pointer + on 32-bit: (callee-save, non-volatile) + 14-31 (callee-save, non-volatile) + + f0 (caller-save, volatile) + f1-f13 args/return (caller-save, volatile) + f14-f31 (callee-save, non-volatile) + + \tr{14}--\tr{31} are wonderful callee-save registers on all ppc OSes. + \tr{0}--\tr{12} are caller-save registers. + + \tr{%f14}--\tr{%f31} are callee-save floating-point registers. + + We can do the Whole Business with callee-save registers only! + -------------------------------------------------------------------------- */ + +#if powerpc_REGS + +#define REG(x) __asm__(#x) + +#define REG_R1 r14 +#define REG_R2 r15 +#define REG_R3 r16 +#define REG_R4 r17 +#define REG_R5 r18 +#define REG_R6 r19 +#define REG_R7 r20 +#define REG_R8 r21 + +#if darwin_REGS + +#define REG_F1 f14 +#define REG_F2 f15 +#define REG_F3 f16 +#define REG_F4 f17 + +#define REG_D1 f18 +#define REG_D2 f19 + +#else + +#define REG_F1 fr14 +#define REG_F2 fr15 +#define REG_F3 fr16 +#define REG_F4 fr17 + +#define REG_D1 fr18 +#define REG_D2 fr19 + +#endif + +#define REG_Sp r22 +#define REG_SpLim r24 + +#define REG_Hp r25 + +#define REG_Base r27 + +#endif /* powerpc */ + +/* ----------------------------------------------------------------------------- + The IA64 register mapping + + We place the general registers in the locals area of the register stack, + so that the call mechanism takes care of saving them for us. We reserve + the first 16 for gcc's use - since gcc uses the highest used register to + determine the register stack frame size, this gives us a constant size + register stack frame. + + \tr{f16-f32} are the callee-saved floating point registers. + -------------------------------------------------------------------------- */ + +#if ia64_REGS + +#define REG(x) __asm__(#x) + +#define REG_R1 loc16 +#define REG_R2 loc17 +#define REG_R3 loc18 +#define REG_R4 loc19 +#define REG_R5 loc20 +#define REG_R6 loc21 +#define REG_R7 loc22 +#define REG_R8 loc23 + +#define REG_F1 f16 +#define REG_F2 f17 +#define REG_F3 f18 +#define REG_F4 f19 + +#define REG_D1 f20 +#define REG_D2 f21 + +#define REG_Sp loc24 +#define REG_SpLim loc26 + +#define REG_Hp loc27 + +#endif /* ia64 */ + +/* ----------------------------------------------------------------------------- + The Sun SPARC register mapping + + !! IMPORTANT: if you change this register mapping you must also update + compiler/nativeGen/SPARC/Regs.hs. That file handles the + mapping for the NCG. This one only affects via-c code. + + The SPARC register (window) story: Remember, within the Haskell + Threaded World, we essentially ``shut down'' the register-window + mechanism---the window doesn't move at all while in this World. It + *does* move, of course, if we call out to arbitrary~C... + + The %i, %l, and %o registers (8 each) are the input, local, and + output registers visible in one register window. The 8 %g (global) + registers are visible all the time. + + zero: always zero + scratch: volatile across C-fn calls. used by linker. + app: usable by application + system: reserved for system + + alloc: allocated to in the register allocator, intra-closure only + + GHC usage v8 ABI v9 ABI + Global + %g0 zero zero zero + %g1 alloc scratch scrach + %g2 alloc app app + %g3 alloc app app + %g4 alloc app scratch + %g5 system scratch + %g6 system system + %g7 system system + + Output: can be zapped by callee + %o0-o5 alloc caller saves + %o6 C stack ptr + %o7 C ret addr + + Local: maintained by register windowing mechanism + %l0 alloc + %l1 R1 + %l2 R2 + %l3 R3 + %l4 R4 + %l5 R5 + %l6 alloc + %l7 alloc + + Input + %i0 Sp + %i1 Base + %i2 SpLim + %i3 Hp + %i4 alloc + %i5 R6 + %i6 C frame ptr + %i7 C ret addr + + The paired nature of the floating point registers causes complications for + the native code generator. For convenience, we pretend that the first 22 + fp regs %f0 .. %f21 are actually 11 double regs, and the remaining 10 are + float (single) regs. The NCG acts accordingly. That means that the + following FP assignment is rather fragile, and should only be changed + with extreme care. The current scheme is: + + %f0 /%f1 FP return from C + %f2 /%f3 D1 + %f4 /%f5 D2 + %f6 /%f7 ncg double spill tmp #1 + %f8 /%f9 ncg double spill tmp #2 + %f10/%f11 allocatable + %f12/%f13 allocatable + %f14/%f15 allocatable + %f16/%f17 allocatable + %f18/%f19 allocatable + %f20/%f21 allocatable + + %f22 F1 + %f23 F2 + %f24 F3 + %f25 F4 + %f26 ncg single spill tmp #1 + %f27 ncg single spill tmp #2 + %f28 allocatable + %f29 allocatable + %f30 allocatable + %f31 allocatable + + -------------------------------------------------------------------------- */ + +#if sparc_REGS + +#define REG(x) __asm__("%" #x) + +#define CALLER_SAVES_USER + +#define CALLER_SAVES_F1 +#define CALLER_SAVES_F2 +#define CALLER_SAVES_F3 +#define CALLER_SAVES_F4 +#define CALLER_SAVES_D1 +#define CALLER_SAVES_D2 + +#define REG_R1 l1 +#define REG_R2 l2 +#define REG_R3 l3 +#define REG_R4 l4 +#define REG_R5 l5 +#define REG_R6 i5 + +#define REG_F1 f22 +#define REG_F2 f23 +#define REG_F3 f24 +#define REG_F4 f25 + +/* for each of the double arg regs, + Dn_2 is the high half. */ + +#define REG_D1 f2 +#define REG_D1_2 f3 + +#define REG_D2 f4 +#define REG_D2_2 f5 + +#define REG_Sp i0 +#define REG_SpLim i2 + +#define REG_Hp i3 + +#define REG_Base i1 + +/* +#define NCG_SpillTmp_I1 g1 +#define NCG_SpillTmp_I2 g2 +#define NCG_SpillTmp_F1 f26 +#define NCG_SpillTmp_F2 f27 +#define NCG_SpillTmp_D1 f6 +#define NCG_SpillTmp_D2 f8 +*/ + +#define NCG_FirstFloatReg f22 + +#endif /* sparc */ + +#endif /* NO_REGS */ + +/* ----------------------------------------------------------------------------- + * These constants define how many stg registers will be used for + * passing arguments (and results, in the case of an unboxed-tuple + * return). + * + * We usually set MAX_REAL_VANILLA_REG and co. to be the number of the + * highest STG register to occupy a real machine register, otherwise + * the calling conventions will needlessly shuffle data between the + * stack and memory-resident STG registers. We might occasionally + * set these macros to other values for testing, though. + * + * Registers above these values might still be used, for instance to + * communicate with PrimOps and RTS functions. + */ + +#ifndef MAX_REAL_VANILLA_REG +# if defined(REG_R8) +# define MAX_REAL_VANILLA_REG 8 +# elif defined(REG_R7) +# define MAX_REAL_VANILLA_REG 7 +# elif defined(REG_R6) +# define MAX_REAL_VANILLA_REG 6 +# elif defined(REG_R5) +# define MAX_REAL_VANILLA_REG 5 +# elif defined(REG_R4) +# define MAX_REAL_VANILLA_REG 4 +# elif defined(REG_R3) +# define MAX_REAL_VANILLA_REG 3 +# elif defined(REG_R2) +# define MAX_REAL_VANILLA_REG 2 +# elif defined(REG_R1) +# define MAX_REAL_VANILLA_REG 1 +# else +# define MAX_REAL_VANILLA_REG 0 +# endif +#endif + +#ifndef MAX_REAL_FLOAT_REG +# if defined(REG_F4) +# define MAX_REAL_FLOAT_REG 4 +# elif defined(REG_F3) +# define MAX_REAL_FLOAT_REG 3 +# elif defined(REG_F2) +# define MAX_REAL_FLOAT_REG 2 +# elif defined(REG_F1) +# define MAX_REAL_FLOAT_REG 1 +# else +# define MAX_REAL_FLOAT_REG 0 +# endif +#endif + +#ifndef MAX_REAL_DOUBLE_REG +# if defined(REG_D2) +# define MAX_REAL_DOUBLE_REG 2 +# elif defined(REG_D1) +# define MAX_REAL_DOUBLE_REG 1 +# else +# define MAX_REAL_DOUBLE_REG 0 +# endif +#endif + +#ifndef MAX_REAL_LONG_REG +# if defined(REG_L1) +# define MAX_REAL_LONG_REG 1 +# else +# define MAX_REAL_LONG_REG 0 +# endif +#endif + +/* define NO_ARG_REGS if we have no argument registers at all (we can + * optimise certain code paths using this predicate). + */ +#if MAX_REAL_VANILLA_REG < 2 +#define NO_ARG_REGS +#else +#undef NO_ARG_REGS +#endif + +#endif /* MACHREGS_H */ diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h new file mode 100644 index 0000000000..1591570780 --- /dev/null +++ b/includes/stg/MiscClosures.h @@ -0,0 +1,642 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2004 + * + * Declarations for various symbols exported by the RTS. + * + * ToDo: many of the symbols in here don't need to be exported, but + * our Cmm code generator doesn't know how to generate local symbols + * for the RTS bits (it assumes all RTS symbols are external). + * + * See wiki:Commentary/Compiler/Backends/PprC#Prototypes + * + * --------------------------------------------------------------------------*/ + +#ifndef STGMISCCLOSURES_H +#define STGMISCCLOSURES_H + +#if IN_STG_CODE +# define RTS_RET_INFO(i) extern W_(i)[] +# define RTS_FUN_INFO(i) extern W_(i)[] +# define RTS_THUNK_INFO(i) extern W_(i)[] +# define RTS_INFO(i) extern W_(i)[] +# define RTS_CLOSURE(i) extern W_(i)[] +# define RTS_FUN(f) extern DLL_IMPORT_RTS StgFunPtr f(void) +#else +# define RTS_RET_INFO(i) extern DLL_IMPORT_RTS const StgRetInfoTable i +# define RTS_FUN_INFO(i) extern DLL_IMPORT_RTS const StgFunInfoTable i +# define RTS_THUNK_INFO(i) extern DLL_IMPORT_RTS const StgThunkInfoTable i +# define RTS_INFO(i) extern DLL_IMPORT_RTS const StgInfoTable i +# define RTS_CLOSURE(i) extern DLL_IMPORT_RTS StgClosure i +# define RTS_FUN(f) extern DLL_IMPORT_RTS StgFunPtr f(void) +#endif + +#ifdef TABLES_NEXT_TO_CODE +# define RTS_ENTRY(f) /* nothing */ +#else +# define RTS_ENTRY(f) RTS_FUN(f) +#endif + +/* Stack frames */ +RTS_RET_INFO(stg_upd_frame_info); +RTS_RET_INFO(stg_marked_upd_frame_info); +RTS_RET_INFO(stg_noupd_frame_info); +RTS_RET_INFO(stg_catch_frame_info); +RTS_RET_INFO(stg_catch_retry_frame_info); +RTS_RET_INFO(stg_atomically_frame_info); +RTS_RET_INFO(stg_atomically_waiting_frame_info); +RTS_RET_INFO(stg_catch_stm_frame_info); +RTS_RET_INFO(stg_unblockAsyncExceptionszh_ret_info); + +RTS_ENTRY(stg_upd_frame_ret); +RTS_ENTRY(stg_marked_upd_frame_ret); + +// RTS_FUN(stg_interp_constr_entry); +// +// This is referenced using the FFI in the compiler (ByteCodeItbls), +// so we can't give it the correct type here because the prototypes +// would clash (FFI references are always declared with type StgWord[] +// in the generated C code). + +/* Magic glue code for when compiled code returns a value in R1/F1/D1 + or a VoidRep to the interpreter. */ +RTS_RET_INFO(stg_ctoi_R1p_info); +RTS_RET_INFO(stg_ctoi_R1unpt_info); +RTS_RET_INFO(stg_ctoi_R1n_info); +RTS_RET_INFO(stg_ctoi_F1_info); +RTS_RET_INFO(stg_ctoi_D1_info); +RTS_RET_INFO(stg_ctoi_L1_info); +RTS_RET_INFO(stg_ctoi_V_info); + +RTS_ENTRY(stg_ctoi_R1p_ret); +RTS_ENTRY(stg_ctoi_R1unpt_ret); +RTS_ENTRY(stg_ctoi_R1n_ret); +RTS_ENTRY(stg_ctoi_F1_ret); +RTS_ENTRY(stg_ctoi_D1_ret); +RTS_ENTRY(stg_ctoi_L1_ret); +RTS_ENTRY(stg_ctoi_V_ret); + +RTS_RET_INFO(stg_apply_interp_info); +RTS_ENTRY(stg_apply_interp_ret); + +RTS_INFO(stg_IND_info); +RTS_INFO(stg_IND_direct_info); +RTS_INFO(stg_IND_STATIC_info); +RTS_INFO(stg_IND_PERM_info); +RTS_INFO(stg_IND_OLDGEN_info); +RTS_INFO(stg_IND_OLDGEN_PERM_info); +RTS_INFO(stg_CAF_UNENTERED_info); +RTS_INFO(stg_CAF_ENTERED_info); +RTS_INFO(stg_WHITEHOLE_info); +RTS_INFO(stg_BLACKHOLE_info); +RTS_INFO(__stg_EAGER_BLACKHOLE_info); +RTS_INFO(stg_CAF_BLACKHOLE_info); + +RTS_FUN_INFO(stg_BCO_info); +RTS_INFO(stg_EVACUATED_info); +RTS_INFO(stg_WEAK_info); +RTS_INFO(stg_DEAD_WEAK_info); +RTS_INFO(stg_STABLE_NAME_info); +RTS_INFO(stg_MVAR_CLEAN_info); +RTS_INFO(stg_MVAR_DIRTY_info); +RTS_INFO(stg_TSO_info); +RTS_INFO(stg_ARR_WORDS_info); +RTS_INFO(stg_MUT_ARR_WORDS_info); +RTS_INFO(stg_MUT_ARR_PTRS_CLEAN_info); +RTS_INFO(stg_MUT_ARR_PTRS_DIRTY_info); +RTS_INFO(stg_MUT_ARR_PTRS_FROZEN_info); +RTS_INFO(stg_MUT_ARR_PTRS_FROZEN0_info); +RTS_INFO(stg_MUT_VAR_CLEAN_info); +RTS_INFO(stg_MUT_VAR_DIRTY_info); +RTS_INFO(stg_END_TSO_QUEUE_info); +RTS_INFO(stg_MUT_CONS_info); +RTS_INFO(stg_catch_info); +RTS_INFO(stg_PAP_info); +RTS_INFO(stg_AP_info); +RTS_INFO(stg_AP_NOUPD_info); +RTS_INFO(stg_AP_STACK_info); +RTS_INFO(stg_dummy_ret_info); +RTS_INFO(stg_raise_info); +RTS_INFO(stg_raise_ret_info); +RTS_INFO(stg_TVAR_WATCH_QUEUE_info); +RTS_INFO(stg_INVARIANT_CHECK_QUEUE_info); +RTS_INFO(stg_ATOMIC_INVARIANT_info); +RTS_INFO(stg_TVAR_info); +RTS_INFO(stg_TREC_CHUNK_info); +RTS_INFO(stg_TREC_HEADER_info); +RTS_INFO(stg_END_STM_WATCH_QUEUE_info); +RTS_INFO(stg_END_INVARIANT_CHECK_QUEUE_info); +RTS_INFO(stg_END_STM_CHUNK_LIST_info); +RTS_INFO(stg_NO_TREC_info); + +RTS_ENTRY(stg_IND_entry); +RTS_ENTRY(stg_IND_direct_entry); +RTS_ENTRY(stg_IND_STATIC_entry); +RTS_ENTRY(stg_IND_PERM_entry); +RTS_ENTRY(stg_IND_OLDGEN_entry); +RTS_ENTRY(stg_IND_OLDGEN_PERM_entry); +RTS_ENTRY(stg_CAF_UNENTERED_entry); +RTS_ENTRY(stg_CAF_ENTERED_entry); +RTS_ENTRY(stg_WHITEHOLE_entry); +RTS_ENTRY(stg_BLACKHOLE_entry); +RTS_ENTRY(__stg_EAGER_BLACKHOLE_entry); +RTS_ENTRY(stg_CAF_BLACKHOLE_entry); +RTS_ENTRY(stg_BCO_entry); +RTS_ENTRY(stg_EVACUATED_entry); +RTS_ENTRY(stg_WEAK_entry); +RTS_ENTRY(stg_DEAD_WEAK_entry); +RTS_ENTRY(stg_STABLE_NAME_entry); +RTS_ENTRY(stg_FULL_MVAR_entry); +RTS_ENTRY(stg_EMPTY_MVAR_entry); +RTS_ENTRY(stg_TSO_entry); +RTS_ENTRY(stg_ARR_WORDS_entry); +RTS_ENTRY(stg_MUT_ARR_WORDS_entry); +RTS_ENTRY(stg_MUT_ARR_PTRS_CLEAN_entry); +RTS_ENTRY(stg_MUT_ARR_PTRS_DIRTY_entry); +RTS_ENTRY(stg_MUT_ARR_PTRS_FROZEN_entry); +RTS_ENTRY(stg_MUT_ARR_PTRS_FROZEN0_entry); +RTS_ENTRY(stg_MUT_VAR_CLEAN_entry); +RTS_ENTRY(stg_MUT_VAR_DIRTY_entry); +RTS_ENTRY(stg_END_TSO_QUEUE_entry); +RTS_ENTRY(stg_MUT_CONS_entry); +RTS_ENTRY(stg_catch_entry); +RTS_ENTRY(stg_PAP_entry); +RTS_ENTRY(stg_AP_entry); +RTS_ENTRY(stg_AP_NOUPD_entry); +RTS_ENTRY(stg_AP_STACK_entry); +RTS_ENTRY(stg_dummy_ret_entry); +RTS_ENTRY(stg_raise_entry); +RTS_ENTRY(stg_raise_ret_ret); +RTS_ENTRY(stg_END_STM_WATCH_QUEUE_entry); +RTS_ENTRY(stg_END_INVARIANT_CHECK_QUEUE_entry); +RTS_ENTRY(stg_END_STM_CHUNK_LIST_entry); +RTS_ENTRY(stg_NO_TREC_entry); +RTS_ENTRY(stg_TVAR_entry); +RTS_ENTRY(stg_TVAR_WATCH_QUEUE_entry); +RTS_ENTRY(stg_INVARIANT_CHECK_QUEUE_entry); +RTS_ENTRY(stg_ATOMIC_INVARIANT_entry); +RTS_ENTRY(stg_TREC_CHUNK_entry); +RTS_ENTRY(stg_TREC_HEADER_entry); + + +RTS_ENTRY(stg_unblockAsyncExceptionszh_ret_ret); +RTS_ENTRY(stg_blockAsyncExceptionszh_ret_ret); +RTS_ENTRY(stg_catch_frame_ret); +RTS_ENTRY(stg_catch_retry_frame_ret); +RTS_ENTRY(stg_atomically_frame_ret); +RTS_ENTRY(stg_atomically_waiting_frame_ret); +RTS_ENTRY(stg_catch_stm_frame_ret); +RTS_ENTRY(stg_catch_frame_ret); +RTS_ENTRY(stg_catch_entry); +RTS_ENTRY(stg_raise_entry); + +/* closures */ + +RTS_CLOSURE(stg_END_TSO_QUEUE_closure); +RTS_CLOSURE(stg_NO_FINALIZER_closure); +RTS_CLOSURE(stg_dummy_ret_closure); +RTS_CLOSURE(stg_forceIO_closure); + +RTS_CLOSURE(stg_END_STM_WATCH_QUEUE_closure); +RTS_CLOSURE(stg_END_INVARIANT_CHECK_QUEUE_closure); +RTS_CLOSURE(stg_END_STM_CHUNK_LIST_closure); +RTS_CLOSURE(stg_NO_TREC_closure); + +RTS_ENTRY(stg_NO_FINALIZER_entry); +RTS_ENTRY(stg_END_EXCEPTION_LIST_entry); +RTS_ENTRY(stg_EXCEPTION_CONS_entry); + +#if IN_STG_CODE +extern DLL_IMPORT_RTS StgWordArray stg_CHARLIKE_closure; +extern DLL_IMPORT_RTS StgWordArray stg_INTLIKE_closure; +#else +extern DLL_IMPORT_RTS StgIntCharlikeClosure stg_CHARLIKE_closure[]; +extern DLL_IMPORT_RTS StgIntCharlikeClosure stg_INTLIKE_closure[]; +#endif + +/* StgStartup */ + +RTS_RET_INFO(stg_forceIO_info); +RTS_ENTRY(stg_forceIO_ret); + +RTS_RET_INFO(stg_noforceIO_info); +RTS_ENTRY(stg_noforceIO_ret); + +/* standard entry points */ + +/* standard selector thunks */ + +RTS_ENTRY(stg_sel_ret_0_upd_ret); +RTS_ENTRY(stg_sel_ret_1_upd_ret); +RTS_ENTRY(stg_sel_ret_2_upd_ret); +RTS_ENTRY(stg_sel_ret_3_upd_ret); +RTS_ENTRY(stg_sel_ret_4_upd_ret); +RTS_ENTRY(stg_sel_ret_5_upd_ret); +RTS_ENTRY(stg_sel_ret_6_upd_ret); +RTS_ENTRY(stg_sel_ret_7_upd_ret); +RTS_ENTRY(stg_sel_ret_8_upd_ret); +RTS_ENTRY(stg_sel_ret_8_upd_ret); +RTS_ENTRY(stg_sel_ret_9_upd_ret); +RTS_ENTRY(stg_sel_ret_10_upd_ret); +RTS_ENTRY(stg_sel_ret_11_upd_ret); +RTS_ENTRY(stg_sel_ret_12_upd_ret); +RTS_ENTRY(stg_sel_ret_13_upd_ret); +RTS_ENTRY(stg_sel_ret_14_upd_ret); +RTS_ENTRY(stg_sel_ret_15_upd_ret); + +RTS_INFO(stg_sel_0_upd_info); +RTS_INFO(stg_sel_1_upd_info); +RTS_INFO(stg_sel_2_upd_info); +RTS_INFO(stg_sel_3_upd_info); +RTS_INFO(stg_sel_4_upd_info); +RTS_INFO(stg_sel_5_upd_info); +RTS_INFO(stg_sel_6_upd_info); +RTS_INFO(stg_sel_7_upd_info); +RTS_INFO(stg_sel_8_upd_info); +RTS_INFO(stg_sel_9_upd_info); +RTS_INFO(stg_sel_10_upd_info); +RTS_INFO(stg_sel_11_upd_info); +RTS_INFO(stg_sel_12_upd_info); +RTS_INFO(stg_sel_13_upd_info); +RTS_INFO(stg_sel_14_upd_info); +RTS_INFO(stg_sel_15_upd_info); + +RTS_ENTRY(stg_sel_0_upd_entry); +RTS_ENTRY(stg_sel_1_upd_entry); +RTS_ENTRY(stg_sel_2_upd_entry); +RTS_ENTRY(stg_sel_3_upd_entry); +RTS_ENTRY(stg_sel_4_upd_entry); +RTS_ENTRY(stg_sel_5_upd_entry); +RTS_ENTRY(stg_sel_6_upd_entry); +RTS_ENTRY(stg_sel_7_upd_entry); +RTS_ENTRY(stg_sel_8_upd_entry); +RTS_ENTRY(stg_sel_9_upd_entry); +RTS_ENTRY(stg_sel_10_upd_entry); +RTS_ENTRY(stg_sel_11_upd_entry); +RTS_ENTRY(stg_sel_12_upd_entry); +RTS_ENTRY(stg_sel_13_upd_entry); +RTS_ENTRY(stg_sel_14_upd_entry); +RTS_ENTRY(stg_sel_15_upd_entry); + +RTS_ENTRY(stg_sel_ret_0_noupd_ret); +RTS_ENTRY(stg_sel_ret_1_noupd_ret); +RTS_ENTRY(stg_sel_ret_2_noupd_ret); +RTS_ENTRY(stg_sel_ret_3_noupd_ret); +RTS_ENTRY(stg_sel_ret_4_noupd_ret); +RTS_ENTRY(stg_sel_ret_5_noupd_ret); +RTS_ENTRY(stg_sel_ret_6_noupd_ret); +RTS_ENTRY(stg_sel_ret_7_noupd_ret); +RTS_ENTRY(stg_sel_ret_8_noupd_ret); +RTS_ENTRY(stg_sel_ret_8_noupd_ret); +RTS_ENTRY(stg_sel_ret_9_noupd_ret); +RTS_ENTRY(stg_sel_ret_10_noupd_ret); +RTS_ENTRY(stg_sel_ret_11_noupd_ret); +RTS_ENTRY(stg_sel_ret_12_noupd_ret); +RTS_ENTRY(stg_sel_ret_13_noupd_ret); +RTS_ENTRY(stg_sel_ret_14_noupd_ret); +RTS_ENTRY(stg_sel_ret_15_noupd_ret); + +RTS_INFO(stg_sel_0_noupd_info); +RTS_INFO(stg_sel_1_noupd_info); +RTS_INFO(stg_sel_2_noupd_info); +RTS_INFO(stg_sel_3_noupd_info); +RTS_INFO(stg_sel_4_noupd_info); +RTS_INFO(stg_sel_5_noupd_info); +RTS_INFO(stg_sel_6_noupd_info); +RTS_INFO(stg_sel_7_noupd_info); +RTS_INFO(stg_sel_8_noupd_info); +RTS_INFO(stg_sel_9_noupd_info); +RTS_INFO(stg_sel_10_noupd_info); +RTS_INFO(stg_sel_11_noupd_info); +RTS_INFO(stg_sel_12_noupd_info); +RTS_INFO(stg_sel_13_noupd_info); +RTS_INFO(stg_sel_14_noupd_info); +RTS_INFO(stg_sel_15_noupd_info); + +RTS_ENTRY(stg_sel_0_noupd_entry); +RTS_ENTRY(stg_sel_1_noupd_entry); +RTS_ENTRY(stg_sel_2_noupd_entry); +RTS_ENTRY(stg_sel_3_noupd_entry); +RTS_ENTRY(stg_sel_4_noupd_entry); +RTS_ENTRY(stg_sel_5_noupd_entry); +RTS_ENTRY(stg_sel_6_noupd_entry); +RTS_ENTRY(stg_sel_7_noupd_entry); +RTS_ENTRY(stg_sel_8_noupd_entry); +RTS_ENTRY(stg_sel_9_noupd_entry); +RTS_ENTRY(stg_sel_10_noupd_entry); +RTS_ENTRY(stg_sel_11_noupd_entry); +RTS_ENTRY(stg_sel_12_noupd_entry); +RTS_ENTRY(stg_sel_13_noupd_entry); +RTS_ENTRY(stg_sel_14_noupd_entry); +RTS_ENTRY(stg_sel_15_noupd_entry); + +/* standard ap thunks */ + +RTS_THUNK_INFO(stg_ap_1_upd_info); +RTS_THUNK_INFO(stg_ap_2_upd_info); +RTS_THUNK_INFO(stg_ap_3_upd_info); +RTS_THUNK_INFO(stg_ap_4_upd_info); +RTS_THUNK_INFO(stg_ap_5_upd_info); +RTS_THUNK_INFO(stg_ap_6_upd_info); +RTS_THUNK_INFO(stg_ap_7_upd_info); + +RTS_ENTRY(stg_ap_1_upd_entry); +RTS_ENTRY(stg_ap_2_upd_entry); +RTS_ENTRY(stg_ap_3_upd_entry); +RTS_ENTRY(stg_ap_4_upd_entry); +RTS_ENTRY(stg_ap_5_upd_entry); +RTS_ENTRY(stg_ap_6_upd_entry); +RTS_ENTRY(stg_ap_7_upd_entry); + +/* standard application routines (see also rts/gen_apply.py, + * and compiler/codeGen/CgStackery.lhs). + */ +RTS_RET_INFO(stg_ap_v_info); +RTS_RET_INFO(stg_ap_f_info); +RTS_RET_INFO(stg_ap_d_info); +RTS_RET_INFO(stg_ap_l_info); +RTS_RET_INFO(stg_ap_n_info); +RTS_RET_INFO(stg_ap_p_info); +RTS_RET_INFO(stg_ap_pv_info); +RTS_RET_INFO(stg_ap_pp_info); +RTS_RET_INFO(stg_ap_ppv_info); +RTS_RET_INFO(stg_ap_ppp_info); +RTS_RET_INFO(stg_ap_pppv_info); +RTS_RET_INFO(stg_ap_pppp_info); +RTS_RET_INFO(stg_ap_ppppp_info); +RTS_RET_INFO(stg_ap_pppppp_info); + +RTS_ENTRY(stg_ap_v_ret); +RTS_ENTRY(stg_ap_f_ret); +RTS_ENTRY(stg_ap_d_ret); +RTS_ENTRY(stg_ap_l_ret); +RTS_ENTRY(stg_ap_n_ret); +RTS_ENTRY(stg_ap_p_ret); +RTS_ENTRY(stg_ap_pv_ret); +RTS_ENTRY(stg_ap_pp_ret); +RTS_ENTRY(stg_ap_ppv_ret); +RTS_ENTRY(stg_ap_ppp_ret); +RTS_ENTRY(stg_ap_pppv_ret); +RTS_ENTRY(stg_ap_pppp_ret); +RTS_ENTRY(stg_ap_ppppp_ret); +RTS_ENTRY(stg_ap_pppppp_ret); + +RTS_FUN(stg_ap_0_fast); +RTS_FUN(stg_ap_v_fast); +RTS_FUN(stg_ap_f_fast); +RTS_FUN(stg_ap_d_fast); +RTS_FUN(stg_ap_l_fast); +RTS_FUN(stg_ap_n_fast); +RTS_FUN(stg_ap_p_fast); +RTS_FUN(stg_ap_pv_fast); +RTS_FUN(stg_ap_pp_fast); +RTS_FUN(stg_ap_ppv_fast); +RTS_FUN(stg_ap_ppp_fast); +RTS_FUN(stg_ap_pppv_fast); +RTS_FUN(stg_ap_pppp_fast); +RTS_FUN(stg_ap_ppppp_fast); +RTS_FUN(stg_ap_pppppp_fast); +RTS_FUN(stg_PAP_apply); + +/* standard GC & stack check entry points, all defined in HeapStackCheck.hc */ + +RTS_RET_INFO(stg_enter_info); +RTS_ENTRY(stg_enter_ret); + +RTS_RET_INFO(stg_gc_void_info); +RTS_ENTRY(stg_gc_void_ret); + +RTS_FUN(__stg_gc_enter_1); + +RTS_FUN(stg_gc_noregs); + +RTS_RET_INFO(stg_gc_unpt_r1_info); +RTS_ENTRY(stg_gc_unpt_r1_ret); +RTS_FUN(stg_gc_unpt_r1); + +RTS_RET_INFO(stg_gc_unbx_r1_info); +RTS_ENTRY(stg_gc_unbx_r1_ret); +RTS_FUN(stg_gc_unbx_r1); + +RTS_RET_INFO(stg_gc_f1_info); +RTS_ENTRY(stg_gc_f1_ret); +RTS_FUN(stg_gc_f1); + +RTS_RET_INFO(stg_gc_d1_info); +RTS_ENTRY(stg_gc_d1_ret); +RTS_FUN(stg_gc_d1); + +RTS_RET_INFO(stg_gc_l1_info); +RTS_ENTRY(stg_gc_l1_ret); +RTS_FUN(stg_gc_l1); + +RTS_FUN(__stg_gc_fun); +RTS_RET_INFO(stg_gc_fun_info); +RTS_ENTRY(stg_gc_fun_ret); + +RTS_RET_INFO(stg_gc_gen_info); +RTS_ENTRY(stg_gc_gen_ret); +RTS_FUN(stg_gc_gen); + +RTS_ENTRY(stg_ut_1_0_unreg_ret); +RTS_RET_INFO(stg_ut_1_0_unreg_info); + +RTS_FUN(stg_gc_gen_hp); +RTS_FUN(stg_gc_ut); +RTS_FUN(stg_gen_yield); +RTS_FUN(stg_yield_noregs); +RTS_FUN(stg_yield_to_interpreter); +RTS_FUN(stg_gen_block); +RTS_FUN(stg_block_noregs); +RTS_FUN(stg_block_1); +RTS_FUN(stg_block_blackhole); +RTS_FUN(stg_block_blackhole_finally); +RTS_FUN(stg_block_takemvar); +RTS_ENTRY(stg_block_takemvar_ret); +RTS_FUN(stg_block_putmvar); +RTS_ENTRY(stg_block_putmvar_ret); +#ifdef mingw32_HOST_OS +RTS_FUN(stg_block_async); +RTS_ENTRY(stg_block_async_ret); +RTS_FUN(stg_block_async_void); +RTS_ENTRY(stg_block_async_void_ret); +#endif +RTS_FUN(stg_block_stmwait); +RTS_FUN(stg_block_throwto); +RTS_ENTRY(stg_block_throwto_ret); +RTS_RET_INFO(stg_block_throwto_info); + +/* Entry/exit points from StgStartup.cmm */ + +RTS_RET_INFO(stg_stop_thread_info); +RTS_ENTRY(stg_stop_thread_ret); + +RTS_FUN(stg_returnToStackTop); +RTS_FUN(stg_returnToSched); +RTS_FUN(stg_returnToSchedNotPaused); +RTS_FUN(stg_returnToSchedButFirst); +RTS_FUN(stg_threadFinished); + +RTS_FUN(stg_init_finish); +RTS_FUN(stg_init); + +RTS_FUN(StgReturn); + +/* ----------------------------------------------------------------------------- + PrimOps + -------------------------------------------------------------------------- */ + +RTS_FUN(plusIntegerzh_fast); +RTS_FUN(minusIntegerzh_fast); +RTS_FUN(timesIntegerzh_fast); +RTS_FUN(gcdIntegerzh_fast); +RTS_FUN(quotRemIntegerzh_fast); +RTS_FUN(quotIntegerzh_fast); +RTS_FUN(remIntegerzh_fast); +RTS_FUN(divExactIntegerzh_fast); +RTS_FUN(divModIntegerzh_fast); + +RTS_FUN(cmpIntegerIntzh_fast); +RTS_FUN(cmpIntegerzh_fast); +RTS_FUN(integer2Intzh_fast); +RTS_FUN(integer2Wordzh_fast); +RTS_FUN(gcdIntegerIntzh_fast); +RTS_FUN(gcdIntzh_fast); + +RTS_FUN(int2Integerzh_fast); +RTS_FUN(word2Integerzh_fast); + +RTS_FUN(decodeFloatzuIntzh_fast); +RTS_FUN(decodeDoublezh_fast); +RTS_FUN(decodeDoublezu2Intzh_fast); + +RTS_FUN(andIntegerzh_fast); +RTS_FUN(orIntegerzh_fast); +RTS_FUN(xorIntegerzh_fast); +RTS_FUN(complementIntegerzh_fast); + +#if SIZEOF_HSINT == 4 + +RTS_FUN(int64ToIntegerzh_fast); +RTS_FUN(word64ToIntegerzh_fast); + +#endif + +RTS_FUN(unsafeThawArrayzh_fast); +RTS_FUN(newByteArrayzh_fast); +RTS_FUN(newPinnedByteArrayzh_fast); +RTS_FUN(newAlignedPinnedByteArrayzh_fast); +RTS_FUN(newArrayzh_fast); + +RTS_FUN(newMutVarzh_fast); +RTS_FUN(atomicModifyMutVarzh_fast); + +RTS_FUN(isEmptyMVarzh_fast); +RTS_FUN(newMVarzh_fast); +RTS_FUN(takeMVarzh_fast); +RTS_FUN(putMVarzh_fast); +RTS_FUN(tryTakeMVarzh_fast); +RTS_FUN(tryPutMVarzh_fast); + +RTS_FUN(waitReadzh_fast); +RTS_FUN(waitWritezh_fast); +RTS_FUN(delayzh_fast); +#ifdef mingw32_HOST_OS +RTS_FUN(asyncReadzh_fast); +RTS_FUN(asyncWritezh_fast); +RTS_FUN(asyncDoProczh_fast); +#endif + +RTS_FUN(catchzh_fast); +RTS_FUN(raisezh_fast); +RTS_FUN(raiseIOzh_fast); + +RTS_FUN(makeStableNamezh_fast); +RTS_FUN(makeStablePtrzh_fast); +RTS_FUN(deRefStablePtrzh_fast); + +RTS_FUN(forkzh_fast); +RTS_FUN(forkOnzh_fast); +RTS_FUN(yieldzh_fast); +RTS_FUN(killThreadzh_fast); +RTS_FUN(asyncExceptionsBlockedzh_fast); +RTS_FUN(blockAsyncExceptionszh_fast); +RTS_FUN(unblockAsyncExceptionszh_fast); +RTS_FUN(myThreadIdzh_fast); +RTS_FUN(labelThreadzh_fast); +RTS_FUN(isCurrentThreadBoundzh_fast); +RTS_FUN(threadStatuszh_fast); + +RTS_FUN(mkWeakzh_fast); +RTS_FUN(mkWeakForeignzh_fast); +RTS_FUN(mkWeakForeignEnvzh_fast); +RTS_FUN(finalizzeWeakzh_fast); +RTS_FUN(deRefWeakzh_fast); + +RTS_FUN(newBCOzh_fast); +RTS_FUN(mkApUpd0zh_fast); + +RTS_FUN(retryzh_fast); +RTS_FUN(catchRetryzh_fast); +RTS_FUN(catchSTMzh_fast); +RTS_FUN(atomicallyzh_fast); +RTS_FUN(newTVarzh_fast); +RTS_FUN(readTVarzh_fast); +RTS_FUN(readTVarIOzh_fast); +RTS_FUN(writeTVarzh_fast); +RTS_FUN(checkzh_fast); + +RTS_FUN(unpackClosurezh_fast); +RTS_FUN(getApStackValzh_fast); +RTS_FUN(getSparkzh_fast); + +RTS_FUN(noDuplicatezh_fast); + +RTS_FUN(traceCcszh_fast); + +/* Other misc stuff */ +// See wiki:Commentary/Compiler/Backends/PprC#Prototypes + +#if IN_STG_CODE && !IN_STGCRUN + +// Interpreter.c +extern StgWord rts_stop_next_breakpoint[]; +extern StgWord rts_stop_on_exception[]; +extern StgWord rts_breakpoint_io_action[]; + +// Schedule.c +extern StgWord RTS_VAR(blocked_queue_hd), RTS_VAR(blocked_queue_tl); +extern StgWord RTS_VAR(sleeping_queue); +extern StgWord RTS_VAR(blackhole_queue); +extern StgWord RTS_VAR(sched_mutex); + +// Apply.cmm +// canned bitmap for each arg type +extern StgWord stg_arg_bitmaps[]; +extern StgWord stg_ap_stack_entries[]; +extern StgWord stg_stack_save_entries[]; + +// Storage.c +extern unsigned int RTS_VAR(alloc_blocks); +extern unsigned int RTS_VAR(alloc_blocks_lim); +extern StgWord RTS_VAR(weak_ptr_list); +extern StgWord RTS_VAR(atomic_modify_mutvar_mutex); + +// RtsFlags +extern StgWord RTS_VAR(RtsFlags); // bogus type + +// Stable.c +extern StgWord RTS_VAR(stable_ptr_table); + +// Profiling.c +extern unsigned int RTS_VAR(era); +extern StgWord RTS_VAR(CCCS); /* current CCS */ +extern unsigned int RTS_VAR(entering_PAP); +extern StgWord RTS_VAR(CC_LIST); /* registered CC list */ +extern StgWord RTS_VAR(CCS_LIST); /* registered CCS list */ +extern unsigned int RTS_VAR(CC_ID); /* global ids */ +extern unsigned int RTS_VAR(CCS_ID); + +#endif + +#endif /* STGMISCCLOSURES_H */ diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h new file mode 100644 index 0000000000..fb26254d5a --- /dev/null +++ b/includes/stg/Regs.h @@ -0,0 +1,667 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2004 + * + * Registers in the STG machine. + * + * The STG machine has a collection of "registers", each one of which + * may or may not correspond to an actual machine register when + * running code. + * + * The register set is backed by a table in memory (struct + * StgRegTable). If a particular STG register is not mapped to a + * machine register, then the apprpriate slot in this table is used + * instead. + * + * This table is itself pointed to by another register, BaseReg. If + * BaseReg is not in a machine register, then the register table is + * used from an absolute location (MainCapability). + * + * ---------------------------------------------------------------------------*/ + +#ifndef REGS_H +#define REGS_H + +typedef struct { + StgWord stgEagerBlackholeInfo; + StgFunPtr stgGCEnter1; + StgFunPtr stgGCFun; +} StgFunTable; + +/* + * Vanilla registers are given this union type, which is purely so + * that we can cast the vanilla reg to a variety of types with the + * minimum of syntax. eg. R1.w instead of (StgWord)R1. + */ +typedef union { + StgWord w; + StgAddr a; + StgChar c; + StgFloat f; + StgInt i; + StgPtr p; +} StgUnion; + +/* + * This is the table that holds shadow-locations for all the STG + * registers. The shadow locations are used when: + * + * 1) the particular register isn't mapped to a real machine + * register, probably because there's a shortage of real registers. + * 2) caller-saves registers are saved across a CCall + */ +typedef struct StgRegTable_ { + StgUnion rR1; + StgUnion rR2; + StgUnion rR3; + StgUnion rR4; + StgUnion rR5; + StgUnion rR6; + StgUnion rR7; + StgUnion rR8; + StgUnion rR9; /* used occasionally by heap/stack checks */ + StgUnion rR10; /* used occasionally by heap/stack checks */ + StgFloat rF1; + StgFloat rF2; + StgFloat rF3; + StgFloat rF4; + StgDouble rD1; + StgDouble rD2; + StgWord64 rL1; + StgPtr rSp; + StgPtr rSpLim; + StgPtr rHp; + StgPtr rHpLim; + struct StgTSO_ *rCurrentTSO; + struct step_ *rNursery; + struct bdescr_ *rCurrentNursery; /* Hp/HpLim point into this block */ + struct bdescr_ *rCurrentAlloc; /* for allocation using allocate() */ + StgWord rHpAlloc; /* number of *bytes* being allocated in heap */ + StgWord rRet; // holds the return code of the thread +} StgRegTable; + +#if IN_STG_CODE + +/* + * Registers Hp and HpLim are global across the entire system, and are + * copied into the RegTable before executing a thread. + * + * Registers Sp and SpLim are saved in the TSO for the + * thread, but are copied into the RegTable before executing a thread. + * + * All other registers are "general purpose", and are used for passing + * arguments to functions, and returning values. The code generator + * knows how many of these are in real registers, and avoids + * generating code that uses non-real registers. General purpose + * registers are never saved when returning to the scheduler, instead + * we save whatever is live at the time on the stack, and restore it + * later. This should reduce the context switch time, amongst other + * things. + * + * For argument passing, the stack will be used in preference to + * pseudo-registers if the architecture has too few general purpose + * registers. + * + * Some special RTS functions like newArray and the Integer primitives + * expect their arguments to be in registers R1-Rn, so we use these + * (pseudo-)registers in those cases. + */ + +/* + * Locations for saving per-thread registers. + */ + +#define SAVE_Sp (CurrentTSO->sp) +#define SAVE_SpLim (CurrentTSO->splim) + +#define SAVE_Hp (BaseReg->rHp) + +#define SAVE_CurrentTSO (BaseReg->rCurrentTSO) +#define SAVE_CurrentNursery (BaseReg->rCurrentNursery) +#define SAVE_HpAlloc (BaseReg->rHpAlloc) + +/* We sometimes need to save registers across a C-call, eg. if they + * are clobbered in the standard calling convention. We define the + * save locations for all registers in the register table. + */ + +#define SAVE_R1 (BaseReg->rR1) +#define SAVE_R2 (BaseReg->rR2) +#define SAVE_R3 (BaseReg->rR3) +#define SAVE_R4 (BaseReg->rR4) +#define SAVE_R5 (BaseReg->rR5) +#define SAVE_R6 (BaseReg->rR6) +#define SAVE_R7 (BaseReg->rR7) +#define SAVE_R8 (BaseReg->rR8) + +#define SAVE_F1 (BaseReg->rF1) +#define SAVE_F2 (BaseReg->rF2) +#define SAVE_F3 (BaseReg->rF3) +#define SAVE_F4 (BaseReg->rF4) + +#define SAVE_D1 (BaseReg->rD1) +#define SAVE_D2 (BaseReg->rD2) + +#define SAVE_L1 (BaseReg->rL1) + +/* ----------------------------------------------------------------------------- + * Emit the GCC-specific register declarations for each machine + * register being used. If any STG register isn't mapped to a machine + * register, then map it to an offset from BaseReg. + * + * First, the general purpose registers. The idea is, if a particular + * general-purpose STG register can't be mapped to a real machine + * register, it won't be used at all. Instead, we'll use the stack. + * + * This is an improvement on the way things used to be done, when all + * registers were mapped to locations in the register table, and stuff + * was being shifted from the stack to the register table and back + * again for no good reason (on register-poor architectures). + */ + +/* define NO_REGS to omit register declarations - used in RTS C code + * that needs all the STG definitions but not the global register + * settings. + */ +#define GLOBAL_REG_DECL(type,name,reg) register type name REG(reg); + +#if defined(REG_R1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R1,REG_R1) +#else +# define R1 (BaseReg->rR1) +#endif + +#if defined(REG_R2) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R2,REG_R2) +#else +# define R2 (BaseReg->rR2) +#endif + +#if defined(REG_R3) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R3,REG_R3) +#else +# define R3 (BaseReg->rR3) +#endif + +#if defined(REG_R4) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R4,REG_R4) +#else +# define R4 (BaseReg->rR4) +#endif + +#if defined(REG_R5) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R5,REG_R5) +#else +# define R5 (BaseReg->rR5) +#endif + +#if defined(REG_R6) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R6,REG_R6) +#else +# define R6 (BaseReg->rR6) +#endif + +#if defined(REG_R7) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R7,REG_R7) +#else +# define R7 (BaseReg->rR7) +#endif + +#if defined(REG_R8) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R8,REG_R8) +#else +# define R8 (BaseReg->rR8) +#endif + +#if defined(REG_R9) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R9,REG_R9) +#else +# define R9 (BaseReg->rR9) +#endif + +#if defined(REG_R10) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgUnion,R10,REG_R10) +#else +# define R10 (BaseReg->rR10) +#endif + +#if defined(REG_F1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgFloat,F1,REG_F1) +#else +#define F1 (BaseReg->rF1) +#endif + +#if defined(REG_F2) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgFloat,F2,REG_F2) +#else +#define F2 (BaseReg->rF2) +#endif + +#if defined(REG_F3) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgFloat,F3,REG_F3) +#else +#define F3 (BaseReg->rF3) +#endif + +#if defined(REG_F4) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgFloat,F4,REG_F4) +#else +#define F4 (BaseReg->rF4) +#endif + +#if defined(REG_D1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgDouble,D1,REG_D1) +#else +#define D1 (BaseReg->rD1) +#endif + +#if defined(REG_D2) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgDouble,D2,REG_D2) +#else +#define D2 (BaseReg->rD2) +#endif + +#if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgWord64,L1,REG_L1) +#else +#define L1 (BaseReg->rL1) +#endif + +/* + * If BaseReg isn't mapped to a machine register, just use the global + * address of the current register table (CurrentRegTable in + * concurrent Haskell, MainRegTable otherwise). + */ + +/* A capability is a combination of a FunTable and a RegTable. In STG + * code, BaseReg normally points to the RegTable portion of this + * structure, so that we can index both forwards and backwards to take + * advantage of shorter instruction forms on some archs (eg. x86). + * This is a cut-down version of the Capability structure; the full + * version is defined in Capability.h. + */ +struct PartCapability_ { + StgFunTable f; + StgRegTable r; +}; + +/* No such thing as a MainCapability under THREADED_RTS - each thread must have + * its own Capability. + */ +#if IN_STG_CODE && !(defined(THREADED_RTS) && !defined(NOSMP)) +extern W_ MainCapability[]; +#endif + +/* + * Assigning to BaseReg (the ASSIGN_BaseReg macro): this happens on + * return from a "safe" foreign call, when the thread might be running + * on a new Capability. Obviously if BaseReg is not a register, then + * we are restricted to a single Capability (this invariant is enforced + * in Capability.c:initCapabilities), and assigning to BaseReg can be omitted. + */ + +#if defined(REG_Base) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(StgRegTable *,BaseReg,REG_Base) +#define ASSIGN_BaseReg(e) (BaseReg = (e)) +#else +#if defined(THREADED_RTS) && !defined(NOSMP) +#error BaseReg must be in a register for THREADED_RTS +#endif +#define BaseReg (&((struct PartCapability_ *)MainCapability)->r) +#define ASSIGN_BaseReg(e) (e) +#endif + +#if defined(REG_Sp) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(P_,Sp,REG_Sp) +#else +#define Sp (BaseReg->rSp) +#endif + +#if defined(REG_SpLim) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(P_,SpLim,REG_SpLim) +#else +#define SpLim (BaseReg->rSpLim) +#endif + +#if defined(REG_Hp) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(P_,Hp,REG_Hp) +#else +#define Hp (BaseReg->rHp) +#endif + +#if defined(REG_HpLim) && !defined(NO_GLOBAL_REG_DECLS) +#error HpLim cannot be in a register +#else +#define HpLim (BaseReg->rHpLim) +#endif + +#if defined(REG_CurrentTSO) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(struct _StgTSO *,CurrentTSO,REG_CurrentTSO) +#else +#define CurrentTSO (BaseReg->rCurrentTSO) +#endif + +#if defined(REG_CurrentNursery) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(bdescr *,CurrentNursery,REG_CurrentNursery) +#else +#define CurrentNursery (BaseReg->rCurrentNursery) +#endif + +#if defined(REG_HpAlloc) && !defined(NO_GLOBAL_REG_DECLS) +GLOBAL_REG_DECL(bdescr *,HpAlloc,REG_HpAlloc) +#else +#define HpAlloc (BaseReg->rHpAlloc) +#endif + +/* ----------------------------------------------------------------------------- + Get absolute function pointers from the register table, to save + code space. On x86, + + jmp *-12(%ebx) + + is shorter than + + jmp absolute_address + + as long as the offset is within the range of a signed byte + (-128..+127). So we pick some common absolute_addresses and put + them in the register table. As a bonus, linking time should also + be reduced. + + Other possible candidates in order of importance: + + stg_upd_frame_info + stg_CAF_BLACKHOLE_info + stg_IND_STATIC_info + + anything else probably isn't worth the effort. + + -------------------------------------------------------------------------- */ + + +#define FunReg ((StgFunTable *)((void *)BaseReg - STG_FIELD_OFFSET(struct PartCapability_, r))) + +#define stg_EAGER_BLACKHOLE_info (FunReg->stgEagerBlackholeInfo) +#define stg_gc_enter_1 (FunReg->stgGCEnter1) +#define stg_gc_fun (FunReg->stgGCFun) + +/* ----------------------------------------------------------------------------- + For any registers which are denoted "caller-saves" by the C calling + convention, we have to emit code to save and restore them across C + calls. + -------------------------------------------------------------------------- */ + +#ifdef CALLER_SAVES_R1 +#define CALLER_SAVE_R1 SAVE_R1 = R1; +#define CALLER_RESTORE_R1 R1 = SAVE_R1; +#else +#define CALLER_SAVE_R1 /* nothing */ +#define CALLER_RESTORE_R1 /* nothing */ +#endif + +#ifdef CALLER_SAVES_R2 +#define CALLER_SAVE_R2 SAVE_R2 = R2; +#define CALLER_RESTORE_R2 R2 = SAVE_R2; +#else +#define CALLER_SAVE_R2 /* nothing */ +#define CALLER_RESTORE_R2 /* nothing */ +#endif + +#ifdef CALLER_SAVES_R3 +#define CALLER_SAVE_R3 SAVE_R3 = R3; +#define CALLER_RESTORE_R3 R3 = SAVE_R3; +#else +#define CALLER_SAVE_R3 /* nothing */ +#define CALLER_RESTORE_R3 /* nothing */ +#endif + +#ifdef CALLER_SAVES_R4 +#define CALLER_SAVE_R4 SAVE_R4 = R4; +#define CALLER_RESTORE_R4 R4 = SAVE_R4; +#else +#define CALLER_SAVE_R4 /* nothing */ +#define CALLER_RESTORE_R4 /* nothing */ +#endif + +#ifdef CALLER_SAVES_R5 +#define CALLER_SAVE_R5 SAVE_R5 = R5; +#define CALLER_RESTORE_R5 R5 = SAVE_R5; +#else +#define CALLER_SAVE_R5 /* nothing */ +#define CALLER_RESTORE_R5 /* nothing */ +#endif + +#ifdef CALLER_SAVES_R6 +#define CALLER_SAVE_R6 SAVE_R6 = R6; +#define CALLER_RESTORE_R6 R6 = SAVE_R6; +#else +#define CALLER_SAVE_R6 /* nothing */ +#define CALLER_RESTORE_R6 /* nothing */ +#endif + +#ifdef CALLER_SAVES_R7 +#define CALLER_SAVE_R7 SAVE_R7 = R7; +#define CALLER_RESTORE_R7 R7 = SAVE_R7; +#else +#define CALLER_SAVE_R7 /* nothing */ +#define CALLER_RESTORE_R7 /* nothing */ +#endif + +#ifdef CALLER_SAVES_R8 +#define CALLER_SAVE_R8 SAVE_R8 = R8; +#define CALLER_RESTORE_R8 R8 = SAVE_R8; +#else +#define CALLER_SAVE_R8 /* nothing */ +#define CALLER_RESTORE_R8 /* nothing */ +#endif + +#ifdef CALLER_SAVES_R9 +#define CALLER_SAVE_R9 SAVE_R9 = R9; +#define CALLER_RESTORE_R9 R9 = SAVE_R9; +#else +#define CALLER_SAVE_R9 /* nothing */ +#define CALLER_RESTORE_R9 /* nothing */ +#endif + +#ifdef CALLER_SAVES_R10 +#define CALLER_SAVE_R10 SAVE_R10 = R10; +#define CALLER_RESTORE_R10 R10 = SAVE_R10; +#else +#define CALLER_SAVE_R10 /* nothing */ +#define CALLER_RESTORE_R10 /* nothing */ +#endif + +#ifdef CALLER_SAVES_F1 +#define CALLER_SAVE_F1 SAVE_F1 = F1; +#define CALLER_RESTORE_F1 F1 = SAVE_F1; +#else +#define CALLER_SAVE_F1 /* nothing */ +#define CALLER_RESTORE_F1 /* nothing */ +#endif + +#ifdef CALLER_SAVES_F2 +#define CALLER_SAVE_F2 SAVE_F2 = F2; +#define CALLER_RESTORE_F2 F2 = SAVE_F2; +#else +#define CALLER_SAVE_F2 /* nothing */ +#define CALLER_RESTORE_F2 /* nothing */ +#endif + +#ifdef CALLER_SAVES_F3 +#define CALLER_SAVE_F3 SAVE_F3 = F3; +#define CALLER_RESTORE_F3 F3 = SAVE_F3; +#else +#define CALLER_SAVE_F3 /* nothing */ +#define CALLER_RESTORE_F3 /* nothing */ +#endif + +#ifdef CALLER_SAVES_F4 +#define CALLER_SAVE_F4 SAVE_F4 = F4; +#define CALLER_RESTORE_F4 F4 = SAVE_F4; +#else +#define CALLER_SAVE_F4 /* nothing */ +#define CALLER_RESTORE_F4 /* nothing */ +#endif + +#ifdef CALLER_SAVES_D1 +#define CALLER_SAVE_D1 SAVE_D1 = D1; +#define CALLER_RESTORE_D1 D1 = SAVE_D1; +#else +#define CALLER_SAVE_D1 /* nothing */ +#define CALLER_RESTORE_D1 /* nothing */ +#endif + +#ifdef CALLER_SAVES_D2 +#define CALLER_SAVE_D2 SAVE_D2 = D2; +#define CALLER_RESTORE_D2 D2 = SAVE_D2; +#else +#define CALLER_SAVE_D2 /* nothing */ +#define CALLER_RESTORE_D2 /* nothing */ +#endif + +#ifdef CALLER_SAVES_L1 +#define CALLER_SAVE_L1 SAVE_L1 = L1; +#define CALLER_RESTORE_L1 L1 = SAVE_L1; +#else +#define CALLER_SAVE_L1 /* nothing */ +#define CALLER_RESTORE_L1 /* nothing */ +#endif + +#ifdef CALLER_SAVES_Sp +#define CALLER_SAVE_Sp SAVE_Sp = Sp; +#define CALLER_RESTORE_Sp Sp = SAVE_Sp; +#else +#define CALLER_SAVE_Sp /* nothing */ +#define CALLER_RESTORE_Sp /* nothing */ +#endif + +#ifdef CALLER_SAVES_SpLim +#define CALLER_SAVE_SpLim SAVE_SpLim = SpLim; +#define CALLER_RESTORE_SpLim SpLim = SAVE_SpLim; +#else +#define CALLER_SAVE_SpLim /* nothing */ +#define CALLER_RESTORE_SpLim /* nothing */ +#endif + +#ifdef CALLER_SAVES_Hp +#define CALLER_SAVE_Hp SAVE_Hp = Hp; +#define CALLER_RESTORE_Hp Hp = SAVE_Hp; +#else +#define CALLER_SAVE_Hp /* nothing */ +#define CALLER_RESTORE_Hp /* nothing */ +#endif + +#ifdef CALLER_SAVES_Base +#ifdef THREADED_RTS +#error "Can't have caller-saved BaseReg with THREADED_RTS" +#endif +#define CALLER_SAVE_Base /* nothing */ +#define CALLER_RESTORE_Base BaseReg = &MainRegTable; +#else +#define CALLER_SAVE_Base /* nothing */ +#define CALLER_RESTORE_Base /* nothing */ +#endif + +#ifdef CALLER_SAVES_CurrentTSO +#define CALLER_SAVE_CurrentTSO SAVE_CurrentTSO = CurrentTSO; +#define CALLER_RESTORE_CurrentTSO CurrentTSO = SAVE_CurrentTSO; +#else +#define CALLER_SAVE_CurrentTSO /* nothing */ +#define CALLER_RESTORE_CurrentTSO /* nothing */ +#endif + +#ifdef CALLER_SAVES_CurrentNursery +#define CALLER_SAVE_CurrentNursery SAVE_CurrentNursery = CurrentNursery; +#define CALLER_RESTORE_CurrentNursery CurrentNursery = SAVE_CurrentNursery; +#else +#define CALLER_SAVE_CurrentNursery /* nothing */ +#define CALLER_RESTORE_CurrentNursery /* nothing */ +#endif + +#ifdef CALLER_SAVES_HpAlloc +#define CALLER_SAVE_HpAlloc SAVE_HpAlloc = HpAlloc; +#define CALLER_RESTORE_HpAlloc HpAlloc = SAVE_HpAlloc; +#else +#define CALLER_SAVE_HpAlloc /* nothing */ +#define CALLER_RESTORE_HpAlloc /* nothing */ +#endif + +#endif /* IN_STG_CODE */ + +/* ---------------------------------------------------------------------------- + Handy bunches of saves/restores + ------------------------------------------------------------------------ */ + +#if IN_STG_CODE + +#define CALLER_SAVE_USER \ + CALLER_SAVE_R1 \ + CALLER_SAVE_R2 \ + CALLER_SAVE_R3 \ + CALLER_SAVE_R4 \ + CALLER_SAVE_R5 \ + CALLER_SAVE_R6 \ + CALLER_SAVE_R7 \ + CALLER_SAVE_R8 \ + CALLER_SAVE_F1 \ + CALLER_SAVE_F2 \ + CALLER_SAVE_F3 \ + CALLER_SAVE_F4 \ + CALLER_SAVE_D1 \ + CALLER_SAVE_D2 \ + CALLER_SAVE_L1 + + /* Save Base last, since the others may + be addressed relative to it */ +#define CALLER_SAVE_SYSTEM \ + CALLER_SAVE_Sp \ + CALLER_SAVE_SpLim \ + CALLER_SAVE_Hp \ + CALLER_SAVE_CurrentTSO \ + CALLER_SAVE_CurrentNursery \ + CALLER_SAVE_Base + +#define CALLER_RESTORE_USER \ + CALLER_RESTORE_R1 \ + CALLER_RESTORE_R2 \ + CALLER_RESTORE_R3 \ + CALLER_RESTORE_R4 \ + CALLER_RESTORE_R5 \ + CALLER_RESTORE_R6 \ + CALLER_RESTORE_R7 \ + CALLER_RESTORE_R8 \ + CALLER_RESTORE_F1 \ + CALLER_RESTORE_F2 \ + CALLER_RESTORE_F3 \ + CALLER_RESTORE_F4 \ + CALLER_RESTORE_D1 \ + CALLER_RESTORE_D2 \ + CALLER_RESTORE_L1 + + /* Restore Base first, since the others may + be addressed relative to it */ +#define CALLER_RESTORE_SYSTEM \ + CALLER_RESTORE_Base \ + CALLER_RESTORE_Sp \ + CALLER_RESTORE_SpLim \ + CALLER_RESTORE_Hp \ + CALLER_RESTORE_CurrentTSO \ + CALLER_RESTORE_CurrentNursery + +#else /* not IN_STG_CODE */ + +#define CALLER_SAVE_USER /* nothing */ +#define CALLER_SAVE_SYSTEM /* nothing */ +#define CALLER_RESTORE_USER /* nothing */ +#define CALLER_RESTORE_SYSTEM /* nothing */ + +#endif /* IN_STG_CODE */ +#define CALLER_SAVE_ALL \ + CALLER_SAVE_SYSTEM \ + CALLER_SAVE_USER + +#define CALLER_RESTORE_ALL \ + CALLER_RESTORE_SYSTEM \ + CALLER_RESTORE_USER + +#endif /* REGS_H */ diff --git a/includes/stg/SMP.h b/includes/stg/SMP.h new file mode 100644 index 0000000000..5d9d80169b --- /dev/null +++ b/includes/stg/SMP.h @@ -0,0 +1,313 @@ +/* ---------------------------------------------------------------------------- + * + * (c) The GHC Team, 2005-2008 + * + * Macros for multi-CPU support + * + * -------------------------------------------------------------------------- */ + +#ifndef SMP_H +#define SMP_H + +#if defined(THREADED_RTS) + +/* ---------------------------------------------------------------------------- + Atomic operations + ------------------------------------------------------------------------- */ + +#if !IN_STG_CODE || IN_STGCRUN +// We only want the barriers, e.g. write_barrier(), declared in .hc +// files. Defining the other inline functions here causes type +// mismatch errors from gcc, because the generated C code is assuming +// that there are no prototypes in scope. + +/* + * The atomic exchange operation: xchg(p,w) exchanges the value + * pointed to by p with the value w, returning the old value. + * + * Used for locking closures during updates (see lockClosure() below) + * and the MVar primops. + */ +EXTERN_INLINE StgWord xchg(StgPtr p, StgWord w); + +/* + * Compare-and-swap. Atomically does this: + * + * cas(p,o,n) { + * r = *p; + * if (r == o) { *p = n }; + * return r; + * } + */ +EXTERN_INLINE StgWord cas(StgVolatilePtr p, StgWord o, StgWord n); + +/* + * Atomic increment + * + * atomic_inc(p) { + * return ++(*p); + * } + */ +EXTERN_INLINE StgWord atomic_inc(StgVolatilePtr p); + +/* + * Atomic decrement + * + * atomic_dec(p) { + * return --(*p); + * } + */ +EXTERN_INLINE StgWord atomic_dec(StgVolatilePtr p); + +#endif // !IN_STG_CODE + +/* + * Various kinds of memory barrier. + * write_barrier: prevents future stores occurring before prededing stores. + * store_load_barrier: prevents future loads occurring before preceding stores. + * load_load_barrier: prevents future loads occurring before earlier stores. + * + * Reference for these: "The JSR-133 Cookbook for Compiler Writers" + * http://gee.cs.oswego.edu/dl/jmm/cookbook.html + * + * To check whether you got these right, try the test in + * testsuite/tests/ghc-regress/rts/testwsdeque.c + * This tests the work-stealing deque implementation, which relies on + * properly working store_load and load_load memory barriers. + */ +EXTERN_INLINE void write_barrier(void); +EXTERN_INLINE void store_load_barrier(void); +EXTERN_INLINE void load_load_barrier(void); + +/* ---------------------------------------------------------------------------- + Implementations + ------------------------------------------------------------------------- */ + +#if !IN_STG_CODE || IN_STGCRUN + +EXTERN_INLINE StgWord +xchg(StgPtr p, StgWord w) +{ + StgWord result; +#if i386_HOST_ARCH || x86_64_HOST_ARCH + result = w; + __asm__ __volatile__ ( + // NB: the xchg instruction is implicitly locked, so we do not + // need a lock prefix here. + "xchg %1,%0" + :"+r" (result), "+m" (*p) + : /* no input-only operands */ + ); +#elif powerpc_HOST_ARCH + __asm__ __volatile__ ( + "1: lwarx %0, 0, %2\n" + " stwcx. %1, 0, %2\n" + " bne- 1b" + :"=&r" (result) + :"r" (w), "r" (p) + ); +#elif sparc_HOST_ARCH + result = w; + __asm__ __volatile__ ( + "swap %1,%0" + : "+r" (result), "+m" (*p) + : /* no input-only operands */ + ); +#elif !defined(WITHSMP) + result = *p; + *p = w; +#else +#error xchg() unimplemented on this architecture +#endif + return result; +} + +/* + * CMPXCHG - the single-word atomic compare-and-exchange instruction. Used + * in the STM implementation. + */ +EXTERN_INLINE StgWord +cas(StgVolatilePtr p, StgWord o, StgWord n) +{ +#if i386_HOST_ARCH || x86_64_HOST_ARCH + __asm__ __volatile__ ( + "lock\ncmpxchg %3,%1" + :"=a"(o), "=m" (*(volatile unsigned int *)p) + :"0" (o), "r" (n)); + return o; +#elif powerpc_HOST_ARCH + StgWord result; + __asm__ __volatile__ ( + "1: lwarx %0, 0, %3\n" + " cmpw %0, %1\n" + " bne 2f\n" + " stwcx. %2, 0, %3\n" + " bne- 1b\n" + "2:" + :"=&r" (result) + :"r" (o), "r" (n), "r" (p) + :"cc", "memory" + ); + return result; +#elif sparc_HOST_ARCH + __asm__ __volatile__ ( + "cas [%1], %2, %0" + : "+r" (n) + : "r" (p), "r" (o) + : "memory" + ); + return n; +#elif !defined(WITHSMP) + StgWord result; + result = *p; + if (result == o) { + *p = n; + } + return result; +#else +#error cas() unimplemented on this architecture +#endif +} + +EXTERN_INLINE StgWord +atomic_inc(StgVolatilePtr p) +{ +#if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) + StgWord r; + r = 1; + __asm__ __volatile__ ( + "lock\nxadd %0,%1": + "+r" (r), "+m" (*p): + ); + return r+1; +#else + StgWord old, new; + do { + old = *p; + new = old + 1; + } while (cas(p, old, new) != old); + return new; +#endif +} + +EXTERN_INLINE StgWord +atomic_dec(StgVolatilePtr p) +{ +#if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) + StgWord r; + r = (StgWord)-1; + __asm__ __volatile__ ( + "lock\nxadd %0,%1": + "+r" (r), "+m" (*p): + ); + return r-1; +#else + StgWord old, new; + do { + old = *p; + new = old - 1; + } while (cas(p, old, new) != old); + return new; +#endif +} + +#endif // !IN_STG_CODE + +/* + * We need to tell both the compiler AND the CPU about the barriers. + * It's no good preventing the CPU from reordering the operations if + * the compiler has already done so - hence the "memory" restriction + * on each of the barriers below. + */ +EXTERN_INLINE void +write_barrier(void) { +#if i386_HOST_ARCH || x86_64_HOST_ARCH + __asm__ __volatile__ ("" : : : "memory"); +#elif powerpc_HOST_ARCH + __asm__ __volatile__ ("lwsync" : : : "memory"); +#elif sparc_HOST_ARCH + /* Sparc in TSO mode does not require store/store barriers. */ + __asm__ __volatile__ ("" : : : "memory"); +#elif !defined(WITHSMP) + return; +#else +#error memory barriers unimplemented on this architecture +#endif +} + +EXTERN_INLINE void +store_load_barrier(void) { +#if i386_HOST_ARCH + __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory"); +#elif x86_64_HOST_ARCH + __asm__ __volatile__ ("lock; addq $0,0(%%rsp)" : : : "memory"); +#elif powerpc_HOST_ARCH + __asm__ __volatile__ ("sync" : : : "memory"); +#elif sparc_HOST_ARCH + __asm__ __volatile__ ("membar #StoreLoad" : : : "memory"); +#elif !defined(WITHSMP) + return; +#else +#error memory barriers unimplemented on this architecture +#endif +} + +EXTERN_INLINE void +load_load_barrier(void) { +#if i386_HOST_ARCH + __asm__ __volatile__ ("" : : : "memory"); +#elif x86_64_HOST_ARCH + __asm__ __volatile__ ("" : : : "memory"); +#elif powerpc_HOST_ARCH + __asm__ __volatile__ ("lwsync" : : : "memory"); +#elif sparc_HOST_ARCH + /* Sparc in TSO mode does not require load/load barriers. */ + __asm__ __volatile__ ("" : : : "memory"); +#elif !defined(WITHSMP) + return; +#else +#error memory barriers unimplemented on this architecture +#endif +} + +/* ---------------------------------------------------------------------- */ +#else /* !THREADED_RTS */ + +#define write_barrier() /* nothing */ +#define store_load_barrier() /* nothing */ +#define load_load_barrier() /* nothing */ + +INLINE_HEADER StgWord +xchg(StgPtr p, StgWord w) +{ + StgWord old = *p; + *p = w; + return old; +} + +STATIC_INLINE StgWord +cas(StgVolatilePtr p, StgWord o, StgWord n) +{ + StgWord result; + result = *p; + if (result == o) { + *p = n; + } + return result; +} + +INLINE_HEADER StgWord +atomic_inc(StgVolatilePtr p) +{ + return ++(*p); +} + +INLINE_HEADER StgWord +atomic_dec(StgVolatilePtr p) +{ + return --(*p); +} + +#endif /* !THREADED_RTS */ + +#endif /* SMP_H */ diff --git a/includes/stg/TailCalls.h b/includes/stg/TailCalls.h new file mode 100644 index 0000000000..854c7b4b18 --- /dev/null +++ b/includes/stg/TailCalls.h @@ -0,0 +1,304 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-1999 + * + * Stuff for implementing proper tail jumps. + * + * ---------------------------------------------------------------------------*/ + +#ifndef TAILCALLS_H +#define TAILCALLS_H + +/* ----------------------------------------------------------------------------- + Unmangled tail-jumping: use the mini interpretter. + -------------------------------------------------------------------------- */ + +#ifdef USE_MINIINTERPRETER + +#define JMP_(cont) return((StgFunPtr)(cont)) +#define FB_ +#define FE_ + +#else + +extern void __DISCARD__(void); + +/* ----------------------------------------------------------------------------- + Tail calling on x86 + -------------------------------------------------------------------------- */ + +#if i386_HOST_ARCH + +/* Note about discard: possibly there to fool GCC into clearing up + before we do the jump eg. if there are some arguments left on the C + stack that GCC hasn't popped yet. Also possibly to fool any + optimisations (a function call often acts as a barrier). Not sure + if any of this is necessary now -- SDM + + Comment to above note: I don't think the __DISCARD__() in JMP_ is + necessary. Arguments should be popped from the C stack immediately + after returning from a function, as long as we pass -fno-defer-pop + to gcc. Moreover, a goto to a first-class label acts as a barrier + for optimisations in the same way a function call does. + -= chak + */ + +/* The goto here seems to cause gcc -O2 to delete all the code after + it - including the FE_ marker and the epilogue code - exactly what + we want! -- SDM + */ + +#define JMP_(cont) \ + { \ + void *__target; \ + __DISCARD__(); \ + __target = (void *)(cont); \ + goto *__target; \ + } + +#endif /* i386_HOST_ARCH */ + +/* ----------------------------------------------------------------------------- + Tail calling on x86_64 + -------------------------------------------------------------------------- */ + +#if x86_64_HOST_ARCH + +/* + NOTE about __DISCARD__(): + + On x86_64 this is necessary to work around bugs in the register + variable support in gcc. Without the __DISCARD__() call, gcc will + silently throw away assignements to global register variables that + happen before the jump. + + Here's the example: + + extern void g(void); + static void f(void) { + R1 = g; + __DISCARD__() + goto *R1; + } + + without the dummy function call, gcc throws away the assignment to R1 + (gcc 3.4.3) gcc bug #20359. +*/ + +#define JMP_(cont) \ + { \ + __DISCARD__(); \ + goto *(void *)(cont); \ + } + +#endif /* x86_64_HOST_ARCH */ + +/* ----------------------------------------------------------------------------- + Tail calling on Sparc + -------------------------------------------------------------------------- */ + +#ifdef sparc_HOST_ARCH + +#define JMP_(cont) ((F_) (cont))() + /* Oh so happily, the above turns into a "call" instruction, + which, on a SPARC, is nothing but a "jmpl" with the + return address in %o7 [which we don't care about]. + */ + +/* Don't need these for sparc mangling */ +#define FB_ +#define FE_ + +#endif /* sparc_HOST_ARCH */ + +/* ----------------------------------------------------------------------------- + Tail calling on Alpha + -------------------------------------------------------------------------- */ + +#ifdef alpha_HOST_ARCH + +#if IN_STG_CODE +register void *_procedure __asm__("$27"); +#endif + +#define JMP_(cont) \ + do { _procedure = (void *)(cont); \ + __DISCARD__(); \ + goto *_procedure; \ + } while(0) + +/* Don't need these for alpha mangling */ +#define FB_ +#define FE_ + +#endif /* alpha_HOST_ARCH */ + +/* ----------------------------------------------------------------------------- + Tail calling on HP + +Description of HP's weird procedure linkage, many thanks to Andy Bennet +<andy_bennett@hp.com>: + +I've been digging a little further into the problem of how HP-UX does +dynamic procedure calls. My solution in the last e-mail inserting an extra +'if' statement into the JMP_ I think is probably the best general solution I +can come up with. There are still a few problems with it however: It wont +work, if JMP_ ever has to call anything in a shared library, if this is +likely to be required it'll need something more elaborate. It also wont work +with PA-RISC 2.0 wide mode (64-bit) which uses a different format PLT. + +I had some feedback from someone in HP's compiler lab and the problem +relates to the linker on HP-UX, not gcc as I first suspected. The reason the +'hsc' executable works is most likely due to a change in 'ld's behaviour for +performance reasons between your revision and mine. + +The major issue relating to this is shared libraries and how they are +implented under HP-UX. The whole point of the Procedure Label Table (PLT) is +to allow a function pointer to hold the address of the function and a +pointer to the library's global data lookup table (DLT) used by position +independent code (PIC). This makes the PLT absolutely essential for shared +library calls. HP has two linker introduced assembly functions for dealing +with dynamic calls, $$dyncall and $$dyncall_external. The former does a +check to see if the address is a PLT pointer and dereferences if necessary +or just calls the address otherwise; the latter skips the check and just +does the indirect jump no matter what. + +Since $$dyncall_external runs faster due to its not having the test, the +linker nowadays prefers to generate calls to that, rather than $$dyncall. It +makes this decision based on the presence of any shared library. If it even +smells an sl's existence at link time, it rigs the runtime system to +generate PLT references for everything on the assumption that the result +will be slightly more efficient. This is what is crashing GHC since the +calls it is generating have no understanding of the procedure label proper. +The only way to get real addresses is to link everything archive, including +system libraries, at which point it assumes you probably are going to be +using calls similar to GHC's (its rigged for HP's +ESfic compiler option) +but uses $$dyncall if necessary to cope, just in case you aren't. + + -------------------------------------------------------------------------- */ + +#ifdef hppa1_1_hp_hpux_TARGET + +#define JMP_(cont) \ + do { void *_procedure = (void *)(cont); \ + if (((int) _procedure) & 2) \ + _procedure = (void *)(*((int *) (_procedure - 2))); \ + goto *_procedure; \ + } while(0) + +#endif /* hppa1_1_hp_hpux_TARGET */ + +/* ----------------------------------------------------------------------------- + Tail calling on PowerPC + -------------------------------------------------------------------------- */ + +#ifdef powerpc_HOST_ARCH + +#define JMP_(cont) \ + { \ + void *target; \ + target = (void *)(cont); \ + __DISCARD__(); \ + goto *target; \ + } + +/* + The __DISCARD__ is there because Apple's April 2002 Beta of GCC 3.1 + sometimes generates incorrect code otherwise. + It tends to "forget" to update global register variables in the presence + of decrement/increment operators: + JMP_(*(--Sp)) is wrongly compiled as JMP_(Sp[-1]). + Calling __DISCARD__ in between works around this problem. +*/ + +/* + I would _love_ to use the following instead, + but some versions of Apple's GCC fail to generate code for it + if it is called for a casted data pointer - which is exactly what + we are going to do... + + #define JMP_(cont) ((F_) (cont))() +*/ + +#endif /* powerpc_HOST_ARCH */ + +#ifdef powerpc64_HOST_ARCH +#define JMP_(cont) ((F_) (cont))() +#endif + +/* ----------------------------------------------------------------------------- + Tail calling on IA64 + -------------------------------------------------------------------------- */ + +#ifdef ia64_HOST_ARCH + +/* The compiler can more intelligently decide how to do this. We therefore + * implement it as a call and optimise to a jump at mangle time. + * + * Sometimes GCC likes to move instructions between the function call and + * the "--- TAILCALL ---". To stop it from finding instructions to put + * there, we insert a jump to the end of the function after the TAILCALL. */ +#define JMP_(cont) \ + ((F_) (cont))(); \ + __asm__ volatile ("--- TAILCALL ---"); \ + goto _function_end; + +#define FE_ _function_end: __asm__ volatile ("--- END ---"); + +/* Don't emit calls to __DISCARD__ as this causes hassles */ +#define __DISCARD__() + +#endif + +/* ----------------------------------------------------------------------------- + Tail calling on MIPS + -------------------------------------------------------------------------- */ + +#ifdef mips_HOST_ARCH + +#if IN_STG_CODE +register void *_procedure __asm__("$25"); +#endif + +#define JMP_(cont) \ + { \ + _procedure = (void *)(cont); \ + __DISCARD__(); \ + goto *_procedure; \ + } + +/* Don't need these for MIPS mangling */ +#define FB_ +#define FE_ + +#endif /* mips_HOST_ARCH */ + +/* ----------------------------------------------------------------------------- + FUNBEGIN and FUNEND. + + These are markers indicating the start and end of Real Code in a + function. All instructions between the actual start and end of the + function and these markers is shredded by the mangler. + -------------------------------------------------------------------------- */ + +/* The following __DISCARD__() has become necessary with gcc 2.96 on x86. + * It prevents gcc from moving stack manipulation code from the function + * body (aka the Real Code) into the function prologue, ie, from moving it + * over the --- BEGIN --- marker. It should be noted that (like some + * other black magic in GHC's code), there is no essential reason why gcc + * could not move some stack manipulation code across the __DISCARD__() - + * it just doesn't choose to do it at the moment. + * -= chak + */ + +#ifndef FB_ +#define FB_ __asm__ volatile ("--- BEGIN ---"); __DISCARD__ (); +#endif + +#ifndef FE_ +#define FE_ __asm__ volatile ("--- END ---"); +#endif + +#endif /* !USE_MINIINTERPRETER */ + +#endif /* TAILCALLS_H */ diff --git a/includes/stg/Ticky.h b/includes/stg/Ticky.h new file mode 100644 index 0000000000..fd7edf85c5 --- /dev/null +++ b/includes/stg/Ticky.h @@ -0,0 +1,188 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2007 + * + * Declarations for counters used by ticky-ticky profiling. + * + * -------------------------------------------------------------------------- */ + + +#ifndef TICKYCOUNTERS_H +#define TICKYCOUNTERS_H + +/* These should probably be automatically generated in order to + keep them consistent with the macros that use them (which are + defined in Cmm.h. */ + +#ifdef TICKY_TICKY +/* same trick as in the former StgTicky.h: recycle the same declarations + for both extern decls (which are included everywhere) + and initializations (which only happen once) */ +#ifdef TICKY_C +#define INIT(ializer) = ializer +#define EXTERN +#else +#define INIT(ializer) +#define EXTERN extern +#endif + +/* Here are all the counter declarations: */ + +EXTERN StgInt ENT_VIA_NODE_ctr INIT(0); +EXTERN StgInt ENT_STATIC_THK_ctr INIT(0); +EXTERN StgInt ENT_DYN_THK_ctr INIT(0); +EXTERN StgInt ENT_STATIC_FUN_DIRECT_ctr INIT(0); +EXTERN StgInt ENT_DYN_FUN_DIRECT_ctr INIT(0); +EXTERN StgInt ENT_STATIC_CON_ctr INIT(0); +EXTERN StgInt ENT_DYN_CON_ctr INIT(0); +EXTERN StgInt ENT_STATIC_IND_ctr INIT(0); +EXTERN StgInt ENT_DYN_IND_ctr INIT(0); +EXTERN StgInt ENT_PERM_IND_ctr INIT(0); +EXTERN StgInt ENT_PAP_ctr INIT(0); +EXTERN StgInt ENT_AP_ctr INIT(0); +EXTERN StgInt ENT_AP_STACK_ctr INIT(0); +EXTERN StgInt ENT_BH_ctr INIT(0); + +EXTERN StgInt UNKNOWN_CALL_ctr INIT(0); + +EXTERN StgInt SLOW_CALL_v_ctr INIT(0); +EXTERN StgInt SLOW_CALL_f_ctr INIT(0); +EXTERN StgInt SLOW_CALL_d_ctr INIT(0); +EXTERN StgInt SLOW_CALL_l_ctr INIT(0); +EXTERN StgInt SLOW_CALL_n_ctr INIT(0); +EXTERN StgInt SLOW_CALL_p_ctr INIT(0); +EXTERN StgInt SLOW_CALL_pv_ctr INIT(0); +EXTERN StgInt SLOW_CALL_pp_ctr INIT(0); +EXTERN StgInt SLOW_CALL_ppv_ctr INIT(0); +EXTERN StgInt SLOW_CALL_ppp_ctr INIT(0); +EXTERN StgInt SLOW_CALL_pppv_ctr INIT(0); +EXTERN StgInt SLOW_CALL_pppp_ctr INIT(0); +EXTERN StgInt SLOW_CALL_ppppp_ctr INIT(0); +EXTERN StgInt SLOW_CALL_pppppp_ctr INIT(0); +EXTERN StgInt SLOW_CALL_OTHER_ctr INIT(0); + +EXTERN StgInt ticky_slow_call_unevald; +EXTERN StgInt SLOW_CALL_ctr INIT(0); +EXTERN StgInt MULTI_CHUNK_SLOW_CALL_ctr INIT(0); +EXTERN StgInt MULTI_CHUNK_SLOW_CALL_CHUNKS_ctr INIT(0); +EXTERN StgInt KNOWN_CALL_ctr INIT(0); +EXTERN StgInt KNOWN_CALL_TOO_FEW_ARGS_ctr INIT(0); +EXTERN StgInt KNOWN_CALL_EXTRA_ARGS_ctr INIT(0); +EXTERN StgInt SLOW_CALL_FUN_TOO_FEW_ctr INIT(0); +EXTERN StgInt SLOW_CALL_FUN_CORRECT_ctr INIT(0); +EXTERN StgInt SLOW_CALL_FUN_TOO_MANY_ctr INIT(0); +EXTERN StgInt SLOW_CALL_PAP_TOO_FEW_ctr INIT(0); +EXTERN StgInt SLOW_CALL_PAP_CORRECT_ctr INIT(0); +EXTERN StgInt SLOW_CALL_PAP_TOO_MANY_ctr INIT(0); +EXTERN StgInt SLOW_CALL_UNEVALD_ctr INIT(0); + + +EXTERN StgInt UPDF_OMITTED_ctr INIT(0); +EXTERN StgInt UPDF_PUSHED_ctr INIT(0); +EXTERN StgInt CATCHF_PUSHED_ctr INIT(0); +EXTERN StgInt UPDF_RCC_PUSHED_ctr INIT(0); +EXTERN StgInt UPDF_RCC_OMITTED_ctr INIT(0); + +EXTERN StgInt UPD_SQUEEZED_ctr INIT(0); +EXTERN StgInt UPD_CON_IN_NEW_ctr INIT(0); +EXTERN StgInt UPD_CON_IN_PLACE_ctr INIT(0); +EXTERN StgInt UPD_PAP_IN_NEW_ctr INIT(0); +EXTERN StgInt UPD_PAP_IN_PLACE_ctr INIT(0); + +EXTERN StgInt ALLOC_HEAP_ctr INIT(0); +EXTERN StgInt ALLOC_HEAP_tot; + +EXTERN StgInt ALLOC_FUN_ctr INIT(0); +EXTERN StgInt ALLOC_FUN_adm; +EXTERN StgInt ALLOC_FUN_gds; +EXTERN StgInt ALLOC_FUN_slp; + +EXTERN StgInt UPD_NEW_IND_ctr INIT(0); +EXTERN StgInt UPD_NEW_PERM_IND_ctr INIT(0); +EXTERN StgInt UPD_OLD_IND_ctr INIT(0); +EXTERN StgInt UPD_OLD_PERM_IND_ctr INIT(0); + +EXTERN StgInt UPD_BH_UPDATABLE_ctr INIT(0); +EXTERN StgInt UPD_BH_SINGLE_ENTRY_ctr INIT(0); +EXTERN StgInt UPD_CAF_BH_UPDATABLE_ctr INIT(0); +EXTERN StgInt UPD_CAF_BH_SINGLE_ENTRY_ctr INIT(0); + +EXTERN StgInt GC_SEL_ABANDONED_ctr INIT(0); +EXTERN StgInt GC_SEL_MINOR_ctr INIT(0); +EXTERN StgInt GC_SEL_MAJOR_ctr INIT(0); + +EXTERN StgInt GC_FAILED_PROMOTION_ctr INIT(0); + +EXTERN StgInt GC_WORDS_COPIED_ctr INIT(0); + +EXTERN StgInt ALLOC_UP_THK_ctr INIT(0); +EXTERN StgInt ALLOC_SE_THK_ctr INIT(0); +EXTERN StgInt ALLOC_THK_adm INIT(0); +EXTERN StgInt ALLOC_THK_gds INIT(0); +EXTERN StgInt ALLOC_THK_slp INIT(0); + +EXTERN StgInt ALLOC_CON_ctr INIT(0); +EXTERN StgInt ALLOC_CON_adm INIT(0); +EXTERN StgInt ALLOC_CON_gds INIT(0); +EXTERN StgInt ALLOC_CON_slp INIT(0); + +EXTERN StgInt ALLOC_TUP_ctr INIT(0); +EXTERN StgInt ALLOC_TUP_adm INIT(0); +EXTERN StgInt ALLOC_TUP_gds INIT(0); +EXTERN StgInt ALLOC_TUP_slp INIT(0); + +EXTERN StgInt ALLOC_BH_ctr INIT(0); +EXTERN StgInt ALLOC_BH_adm INIT(0); +EXTERN StgInt ALLOC_BH_gds INIT(0); +EXTERN StgInt ALLOC_BH_slp INIT(0); + +EXTERN StgInt ALLOC_PRIM_ctr INIT(0); +EXTERN StgInt ALLOC_PRIM_adm INIT(0); +EXTERN StgInt ALLOC_PRIM_gds INIT(0); +EXTERN StgInt ALLOC_PRIM_slp INIT(0); + +EXTERN StgInt ALLOC_PAP_ctr INIT(0); +EXTERN StgInt ALLOC_PAP_adm INIT(0); +EXTERN StgInt ALLOC_PAP_gds INIT(0); +EXTERN StgInt ALLOC_PAP_slp INIT(0); + +EXTERN StgInt ALLOC_TSO_ctr INIT(0); +EXTERN StgInt ALLOC_TSO_adm INIT(0); +EXTERN StgInt ALLOC_TSO_gds INIT(0); +EXTERN StgInt ALLOC_TSO_slp INIT(0); + +EXTERN StgInt RET_NEW_ctr INIT(0); +EXTERN StgInt RET_OLD_ctr INIT(0); +EXTERN StgInt RET_UNBOXED_TUP_ctr INIT(0); + +EXTERN StgInt RET_SEMI_loads_avoided INIT(0); + +/* End of counter declarations. */ + +#endif /* TICKY_TICKY */ + +/* This is ugly, but the story is: + We got rid of StgTicky.h, which was previously + defining these macros for the benefit of C code + so, we define them here instead (to be no-ops). + (since those macros are only defined in Cmm.h) + + Note that these macros must be defined whether + TICKY_TICKY is defined or not. */ + +#ifndef CMINUSMINUS +#define TICK_ALLOC_PRIM(x,y,z) +#define TICK_UPD_OLD_IND() +#define TICK_UPD_NEW_IND() +#define TICK_UPD_SQUEEZED() +#define TICK_ALLOC_HEAP_NOCTR(x) +#define TICK_GC_WORDS_COPIED(x) +#define TICK_GC_FAILED_PROMOTION() +#define TICK_ALLOC_TSO(g,s) +#define TICK_ALLOC_UP_THK(g,s) +#define TICK_ALLOC_SE_THK(g,s) + +#endif + + +#endif /* TICKYCOUNTERS_H */ diff --git a/includes/stg/Types.h b/includes/stg/Types.h new file mode 100644 index 0000000000..227356c9ea --- /dev/null +++ b/includes/stg/Types.h @@ -0,0 +1,135 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 1998-2004 + * + * Various C datatypes used in the run-time system. This is the + * lowest-level include file, after ghcconfig.h and RtsConfig.h. + * + * This module should define types *only*, all beginning with "Stg". + * + * Specifically: + + StgInt8, 16, 32, 64 + StgWord8, 16, 32, 64 + StgChar, StgFloat, StgDouble + + ***** All the same size (i.e. sizeof(void *)): ***** + StgPtr Basic pointer type + StgWord Unit of heap allocation + StgInt Signed version of StgWord + StgAddr Generic address type + + StgBool, StgVoid, StgPtr, StgOffset, + StgCode, StgStablePtr, StgFunPtr, + StgUnion. + + * WARNING: Keep this file, MachDeps.h, and HsFFI.h in synch! + * + * NOTE: assumes #include "ghcconfig.h" + * + * Works with or without _POSIX_SOURCE. + * + * ---------------------------------------------------------------------------*/ + +#ifndef STGTYPES_H +#define STGTYPES_H + +/* + * First, platform-dependent definitions of size-specific integers. + * Assume for now that the int type is 32 bits. + * NOTE: Synch the following definitions with MachDeps.h! + * ToDo: move these into a platform-dependent file. + */ + +typedef signed char StgInt8; +typedef unsigned char StgWord8; + +typedef signed short StgInt16; +typedef unsigned short StgWord16; + +#if SIZEOF_LONG == 4 +typedef signed long StgInt32; +typedef unsigned long StgWord32; +#elif SIZEOF_INT == 4 +typedef signed int StgInt32; +typedef unsigned int StgWord32; +#else +#error GHC untested on this architecture: sizeof(int) != 4 +#endif + +#if SIZEOF_LONG == 8 +typedef signed long StgInt64; +typedef unsigned long StgWord64; +#elif defined(__MSVC__) +typedef __int64 StgInt64; +typedef unsigned __int64 StgWord64; +#elif SIZEOF_LONG_LONG == 8 +typedef signed long long int StgInt64; +typedef unsigned long long int StgWord64; +#else +#error cannot find a way to define StgInt64 +#endif + +/* + * Define the standard word size we'll use on this machine: make it + * big enough to hold a pointer. + * + * It's useful if StgInt/StgWord are always the same as long, so that + * we can use a consistent printf format specifier without warnings on + * any platform. Fortunately this works at the moement; if it breaks + * in the future we'll have to start using macros for format + * specifiers (c.f. FMT_StgWord64 in Rts.h). + */ + +#if SIZEOF_VOID_P == 8 +typedef StgInt64 StgInt; +typedef StgWord64 StgWord; +typedef StgInt32 StgHalfInt; +typedef StgWord32 StgHalfWord; +#else +#if SIZEOF_VOID_P == 4 +typedef StgInt32 StgInt; +typedef StgWord32 StgWord; +typedef StgInt16 StgHalfInt; +typedef StgWord16 StgHalfWord; +#else +#error GHC untested on this architecture: sizeof(void *) != 4 or 8 +#endif +#endif + +#define W_MASK (sizeof(W_)-1) + +/* + * Other commonly-used STG datatypes. + */ + +typedef void* StgAddr; +typedef StgWord32 StgChar; +typedef int StgBool; +typedef float StgFloat; +typedef double StgDouble; +typedef StgWord* StgPtr; /* heap or stack pointer */ +typedef StgWord volatile* StgVolatilePtr; /* pointer to volatile word */ +typedef StgWord StgOffset; /* byte offset within closure */ +typedef StgWord8 StgCode; /* close enough */ +typedef void* StgStablePtr; +typedef StgWord8* StgByteArray; + +/* + Types for the generated C functions + take no arguments + return a pointer to the next function to be called + use: Ptr to Fun that returns a Ptr to Fun which returns Ptr to void + + Note: Neither StgFunPtr not StgFun is quite right (that is, + StgFunPtr != StgFun*). So, the functions we define all have type + StgFun but we always have to cast them to StgFunPtr when we assign + them to something. + The only way round this would be to write a recursive type but + C only allows that if you're defining a struct or union. +*/ + +typedef void *(*(*StgFunPtr)(void))(void); +typedef StgFunPtr StgFun(void); + +#endif /* STGTYPES_H */ |