1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
/* -----------------------------------------------------------------------------
*
* (c) The GHC Team 1998-2014
*
* Documentation on the architecture of the Garbage Collector can be
* found in the online commentary:
*
* https://gitlab.haskell.org/ghc/ghc/wikis/commentary/rts/storage/gc
*
* ---------------------------------------------------------------------------*/
#pragma once
#include "BeginPrivate.h"
/* The gct variable is thread-local and points to the current thread's
gc_thread structure. It is heavily accessed, and thus high
performance access is crucial to parallel (-threaded) workloads.
First, we try to use a 'global register variable' which is a GCC
extension. This reserves the register globally.
If that's not possible, then we need to use __thread, which is a
compiler/OS specific TLS storage mechanism (assumed to be Fast
Enough.)
BUT, some older versions of OS X compilers (llvm-gcc, older Clangs)
do not support __thread at all. Modern clang however, does - but on
OS X it's not as fast as the Linux (which can write directly into a
segment register - see #7602.)
If we don't support __thread then we do the absolute worst thing:
we just use pthread_getspecific and pthread_setspecific (which are
horribly slow.)
*/
#define GCT_REG_DECL(type,name,reg) register type name REG(reg);
/* -------------------------------------------------------------------------- */
/* First: if we're not using the threaded RTS, it's easy: just fake it. */
#if !defined(THREADED_RTS)
extern StgWord8 the_gc_thread[];
#define gct ((gc_thread*)&the_gc_thread)
#define SET_GCT(to) /*nothing*/
#define DECLARE_GCT /*nothing*/
#else /* defined(THREADED_RTS) */
/* -------------------------------------------------------------------------- */
/* Now, llvm-gcc and some older Clang compilers do not support
__thread. So we have to fallback to the extremely slow case,
unfortunately.
Also, the iOS Clang compiler doesn't support __thread either for
some bizarre reason, so there's not much we can do about that... */
#if defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 0)
#define gct ((gc_thread *)(pthread_getspecific(gctKey)))
#define SET_GCT(to) (pthread_setspecific(gctKey, to))
#define DECLARE_GCT ThreadLocalKey gctKey;
/* -------------------------------------------------------------------------- */
/* However, if we *are* using an LLVM based compiler with __thread
support, then use that (since LLVM doesn't support global register
variables.) */
#elif defined(CC_LLVM_BACKEND) && (CC_SUPPORTS_TLS == 1)
extern __thread gc_thread* gct;
#define SET_GCT(to) gct = (to)
#define DECLARE_GCT __thread gc_thread* gct;
/* -------------------------------------------------------------------------- */
/* Next up: Using __thread is better than stealing a register on
x86/Linux, because we have too few registers available. In my
tests it was worth about 5% in GC performance, but of course that
might change as gcc improves. -- SDM 2009/04/03 */
#elif (defined(i386_HOST_ARCH) && (defined(linux_HOST_OS) \
|| defined(solaris2_HOST_OS)))
extern __thread gc_thread* gct;
#define SET_GCT(to) gct = (to)
#define DECLARE_GCT __thread gc_thread* gct;
/* -------------------------------------------------------------------------- */
/* Next up: generally, if REG_Base is defined and we're *not* using
i386, then actually declare the needed register. The catch for i386
here is that REG_Base is %ebx, but that is also used for -fPIC, so
it can't be stolen */
#elif defined(REG_Base) && !defined(i386_HOST_ARCH)
GCT_REG_DECL(gc_thread*, gct, REG_Base);
#define SET_GCT(to) gct = (to)
#define DECLARE_GCT /* nothing */
/* -------------------------------------------------------------------------- */
/* Next up: if REG_R1 is available after checking REG_Base, we're
gonna steal it in every case we can. */
#elif defined(REG_R1)
GCT_REG_DECL(gc_thread*, gct, REG_R1);
#define SET_GCT(to) gct = (to)
#define DECLARE_GCT /* nothing */
/* -------------------------------------------------------------------------- */
/* Finally, as an absolute fallback, if none of the above tests check
out but we *do* have __thread support, then use that. */
#elif CC_SUPPORTS_TLS == 1
extern __thread gc_thread* gct;
#define SET_GCT(to) gct = (to)
#define DECLARE_GCT __thread gc_thread* gct;
/* -------------------------------------------------------------------------- */
/* Impossible! */
#else
#error Cannot find a way to declare the thread-local gc variable!
#endif
#endif // THREADED_RTS
#include "EndPrivate.h"
|