summaryrefslogtreecommitdiff
path: root/rts/sm/GCTDecl.h
blob: 2fbe1894f813b172269fd7b27ec45a4fa8faf80f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/* -----------------------------------------------------------------------------
 *
 * (c) The GHC Team 1998-2014
 *
 * Documentation on the architecture of the Garbage Collector can be
 * found in the online commentary:
 *
 *   https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/storage/gc
 *
 * ---------------------------------------------------------------------------*/

#pragma once

#include "BeginPrivate.h"

/* The gct variable is thread-local and points to the current thread's
   gc_thread structure. It is heavily accessed, and thus high
   performance access is crucial to parallel (-threaded) workloads.

   First, we try to use a 'global register variable' which is a GCC
   extension. This reserves the register globally.

   If that's not possible, then we need to use __thread, which is a
   compiler/OS specific TLS storage mechanism (assumed to be Fast
   Enough.)

   BUT, some older versions of OS X compilers (llvm-gcc, older Clangs)
   do not support __thread at all. Modern clang however, does - but on
   OS X it's not as fast as the Linux (which can write directly into a
   segment register - see #7602.)

   If we don't support __thread then we do the absolute worst thing:
   we just use pthread_getspecific and pthread_setspecific (which are
   horribly slow.)
*/

#define GCT_REG_DECL(type,name,reg) register type name REG(reg);


/* -------------------------------------------------------------------------- */

/* First: if we're not using the threaded RTS, it's easy: just fake it. */
#if !defined(THREADED_RTS)
extern StgWord8 the_gc_thread[];
#define gct ((gc_thread*)&the_gc_thread)
#define SET_GCT(to) /*nothing*/
#define DECLARE_GCT /*nothing*/

#else /* defined(THREADED_RTS) */

/* -------------------------------------------------------------------------- */

/* Now, llvm-gcc and some older Clang compilers do not support
   __thread. So we have to fallback to the extremely slow case,
   unfortunately.

   Also, the iOS Clang compiler doesn't support __thread either for
   some bizarre reason, so there's not much we can do about that... */
#if defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 0)
#define gct ((gc_thread *)(pthread_getspecific(gctKey)))
#define SET_GCT(to) (pthread_setspecific(gctKey, to))
#define DECLARE_GCT ThreadLocalKey gctKey;

/* -------------------------------------------------------------------------- */

/* However, if we *are* using an LLVM based compiler with __thread
   support, then use that (since LLVM doesn't support global register
   variables.) */
#elif defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 1)
extern __thread gc_thread* gct;
#define SET_GCT(to) gct = (to)
#define DECLARE_GCT __thread gc_thread* gct;

/* -------------------------------------------------------------------------- */

/* Next up: Using __thread is better than stealing a register on
   x86/Linux, because we have too few registers available. In my
   tests it was worth about 5% in GC performance, but of course that
   might change as gcc improves. -- SDM 2009/04/03 */
#elif (defined(i386_HOST_ARCH) && (defined(linux_HOST_OS) \
                                   || defined(solaris2_HOST_OS)))
extern __thread gc_thread* gct;
#define SET_GCT(to) gct = (to)
#define DECLARE_GCT __thread gc_thread* gct;

/* -------------------------------------------------------------------------- */

/* Next up: generally, if REG_Base is defined and we're *not* using
   i386, then actually declare the needed register. The catch for i386
   here is that REG_Base is %ebx, but that is also used for -fPIC, so
   it can't be stolen */
#elif defined(REG_Base) && !defined(i386_HOST_ARCH)
GCT_REG_DECL(gc_thread*, gct, REG_Base);
#define SET_GCT(to) gct = (to)
#define DECLARE_GCT /* nothing */

/* -------------------------------------------------------------------------- */

/* Next up: if REG_R1 is available after checking REG_Base, we're
   gonna steal it in every case we can. */
#elif defined(REG_R1)
GCT_REG_DECL(gc_thread*, gct, REG_R1);
#define SET_GCT(to) gct = (to)
#define DECLARE_GCT /* nothing */

/* -------------------------------------------------------------------------- */

/* Finally, as an absolute fallback, if none of the above tests check
   out but we *do* have __thread support, then use that. */
#elif CC_SUPPORTS_TLS == 1
extern __thread gc_thread* gct;
#define SET_GCT(to) gct = (to)
#define DECLARE_GCT __thread gc_thread* gct;

/* -------------------------------------------------------------------------- */

/* Impossible! */
#else
#error Cannot find a way to declare the thread-local gc variable!
#endif

#endif // THREADED_RTS

#include "EndPrivate.h"