1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
|
#include "Rts.h"
// Fallbacks for atomic primops on byte arrays. The builtins used
// below are supported on both GCC and LLVM.
//
// Ideally these function would take StgWord8, StgWord16, etc but
// older GCC versions incorrectly assume that the register that the
// argument is passed in has been zero extended, which is incorrect
// according to the ABI and is not what GHC does when it generates
// calls to these functions.
// FetchAddByteArrayOp_Int
extern StgWord hs_atomic_add8(StgWord x, StgWord val);
StgWord
hs_atomic_add8(StgWord x, StgWord val)
{
return __sync_fetch_and_add((volatile StgWord8 *) x, (StgWord8) val);
}
extern StgWord hs_atomic_add16(StgWord x, StgWord val);
StgWord
hs_atomic_add16(StgWord x, StgWord val)
{
return __sync_fetch_and_add((volatile StgWord16 *) x, (StgWord16) val);
}
extern StgWord hs_atomic_add32(StgWord x, StgWord val);
StgWord
hs_atomic_add32(StgWord x, StgWord val)
{
return __sync_fetch_and_add((volatile StgWord32 *) x, (StgWord32) val);
}
#if WORD_SIZE_IN_BITS == 64
extern StgWord64 hs_atomic_add64(StgWord x, StgWord64 val);
StgWord64
hs_atomic_add64(StgWord x, StgWord64 val)
{
return __sync_fetch_and_add((volatile StgWord64 *) x, val);
}
#endif
// FetchSubByteArrayOp_Int
extern StgWord hs_atomic_sub8(StgWord x, StgWord val);
StgWord
hs_atomic_sub8(StgWord x, StgWord val)
{
return __sync_fetch_and_sub((volatile StgWord8 *) x, (StgWord8) val);
}
extern StgWord hs_atomic_sub16(StgWord x, StgWord val);
StgWord
hs_atomic_sub16(StgWord x, StgWord val)
{
return __sync_fetch_and_sub((volatile StgWord16 *) x, (StgWord16) val);
}
extern StgWord hs_atomic_sub32(StgWord x, StgWord val);
StgWord
hs_atomic_sub32(StgWord x, StgWord val)
{
return __sync_fetch_and_sub((volatile StgWord32 *) x, (StgWord32) val);
}
#if WORD_SIZE_IN_BITS == 64
extern StgWord64 hs_atomic_sub64(StgWord x, StgWord64 val);
StgWord64
hs_atomic_sub64(StgWord x, StgWord64 val)
{
return __sync_fetch_and_sub((volatile StgWord64 *) x, val);
}
#endif
// FetchAndByteArrayOp_Int
extern StgWord hs_atomic_and8(StgWord x, StgWord val);
StgWord
hs_atomic_and8(StgWord x, StgWord val)
{
return __sync_fetch_and_and((volatile StgWord8 *) x, (StgWord8) val);
}
extern StgWord hs_atomic_and16(StgWord x, StgWord val);
StgWord
hs_atomic_and16(StgWord x, StgWord val)
{
return __sync_fetch_and_and((volatile StgWord16 *) x, (StgWord16) val);
}
extern StgWord hs_atomic_and32(StgWord x, StgWord val);
StgWord
hs_atomic_and32(StgWord x, StgWord val)
{
return __sync_fetch_and_and((volatile StgWord32 *) x, (StgWord32) val);
}
#if WORD_SIZE_IN_BITS == 64
extern StgWord64 hs_atomic_and64(StgWord x, StgWord64 val);
StgWord64
hs_atomic_and64(StgWord x, StgWord64 val)
{
return __sync_fetch_and_and((volatile StgWord64 *) x, val);
}
#endif
// FetchNandByteArrayOp_Int
// Note [__sync_fetch_and_nand usage]
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//
// The __sync_fetch_and_nand builtin is a bit of a disaster. It was introduced
// in GCC long ago with silly semantics. Specifically:
//
// *ptr = ~(tmp & value)
//
// Clang introduced the builtin with the same semantics.
//
// In GCC 4.4 the operation's semantics were rightly changed to,
//
// *ptr = ~tmp & value
//
// and the -Wsync-nand warning was added warning users of the operation about
// the change.
//
// Clang took this change as a reason to remove support for the
// builtin in 2010. Then, in 2014 Clang re-added support with the new
// semantics. However, the warning flag was given a different name
// (-Wsync-fetch-and-nand-semantics-changed) for added fun.
//
// Consequently, we are left with a bit of a mess: GHC requires GCC >4.4
// (enforced by the FP_GCC_VERSION autoconf check), so we thankfully don't need
// to support the operation's older broken semantics. However, we need to take
// care to explicitly disable -Wsync-nand wherever possible, lest the build
// fails with -Werror. Furthermore, we need to emulate the operation when
// building with some Clang versions (shipped by some Mac OS X releases) which
// lack support for the builtin.
//
// In the words of Bob Dylan: everything is broken.
//
// See also:
//
// * https://bugs.llvm.org/show_bug.cgi?id=8842
// * https://ghc.haskell.org/trac/ghc/ticket/9678
//
#define CAS_NAND(x, val) \
{ \
__typeof__ (*(x)) tmp = *(x); \
while (!__sync_bool_compare_and_swap(x, tmp, ~(tmp & (val)))) { \
tmp = *(x); \
} \
return tmp; \
}
// N.B. __has_builtin is only provided by clang
#if !defined(__has_builtin)
#define __has_builtin(x) 0
#endif
#if defined(__clang__) && !__has_builtin(__sync_fetch_and_nand)
#define USE_SYNC_FETCH_AND_NAND 0
#else
#define USE_SYNC_FETCH_AND_NAND 1
#endif
// Otherwise this fails with -Werror
#pragma GCC diagnostic push
#if defined(__clang__)
#pragma GCC diagnostic ignored "-Wsync-fetch-and-nand-semantics-changed"
#elif defined(__GNUC__)
#pragma GCC diagnostic ignored "-Wsync-nand"
#endif
extern StgWord hs_atomic_nand8(StgWord x, StgWord val);
StgWord
hs_atomic_nand8(StgWord x, StgWord val)
{
#if USE_SYNC_FETCH_AND_NAND
return __sync_fetch_and_nand((volatile StgWord8 *) x, (StgWord8) val);
#else
CAS_NAND((volatile StgWord8 *) x, (StgWord8) val)
#endif
}
extern StgWord hs_atomic_nand16(StgWord x, StgWord val);
StgWord
hs_atomic_nand16(StgWord x, StgWord val)
{
#if USE_SYNC_FETCH_AND_NAND
return __sync_fetch_and_nand((volatile StgWord16 *) x, (StgWord16) val);
#else
CAS_NAND((volatile StgWord16 *) x, (StgWord16) val);
#endif
}
extern StgWord hs_atomic_nand32(StgWord x, StgWord val);
StgWord
hs_atomic_nand32(StgWord x, StgWord val)
{
#if USE_SYNC_FETCH_AND_NAND
return __sync_fetch_and_nand((volatile StgWord32 *) x, (StgWord32) val);
#else
CAS_NAND((volatile StgWord32 *) x, (StgWord32) val);
#endif
}
#if WORD_SIZE_IN_BITS == 64
extern StgWord64 hs_atomic_nand64(StgWord x, StgWord64 val);
StgWord64
hs_atomic_nand64(StgWord x, StgWord64 val)
{
#if USE_SYNC_FETCH_AND_NAND
return __sync_fetch_and_nand((volatile StgWord64 *) x, val);
#else
CAS_NAND((volatile StgWord64 *) x, val);
#endif
}
#endif
#pragma GCC diagnostic pop
// FetchOrByteArrayOp_Int
extern StgWord hs_atomic_or8(StgWord x, StgWord val);
StgWord
hs_atomic_or8(StgWord x, StgWord val)
{
return __sync_fetch_and_or((volatile StgWord8 *) x, (StgWord8) val);
}
extern StgWord hs_atomic_or16(StgWord x, StgWord val);
StgWord
hs_atomic_or16(StgWord x, StgWord val)
{
return __sync_fetch_and_or((volatile StgWord16 *) x, (StgWord16) val);
}
extern StgWord hs_atomic_or32(StgWord x, StgWord val);
StgWord
hs_atomic_or32(StgWord x, StgWord val)
{
return __sync_fetch_and_or((volatile StgWord32 *) x, (StgWord32) val);
}
#if WORD_SIZE_IN_BITS == 64
extern StgWord64 hs_atomic_or64(StgWord x, StgWord64 val);
StgWord64
hs_atomic_or64(StgWord x, StgWord64 val)
{
return __sync_fetch_and_or((volatile StgWord64 *) x, val);
}
#endif
// FetchXorByteArrayOp_Int
extern StgWord hs_atomic_xor8(StgWord x, StgWord val);
StgWord
hs_atomic_xor8(StgWord x, StgWord val)
{
return __sync_fetch_and_xor((volatile StgWord8 *) x, (StgWord8) val);
}
extern StgWord hs_atomic_xor16(StgWord x, StgWord val);
StgWord
hs_atomic_xor16(StgWord x, StgWord val)
{
return __sync_fetch_and_xor((volatile StgWord16 *) x, (StgWord16) val);
}
extern StgWord hs_atomic_xor32(StgWord x, StgWord val);
StgWord
hs_atomic_xor32(StgWord x, StgWord val)
{
return __sync_fetch_and_xor((volatile StgWord32 *) x, (StgWord32) val);
}
#if WORD_SIZE_IN_BITS == 64
extern StgWord64 hs_atomic_xor64(StgWord x, StgWord64 val);
StgWord64
hs_atomic_xor64(StgWord x, StgWord64 val)
{
return __sync_fetch_and_xor((volatile StgWord64 *) x, val);
}
#endif
// CasByteArrayOp_Int
extern StgWord hs_cmpxchg8(StgWord x, StgWord old, StgWord new);
StgWord
hs_cmpxchg8(StgWord x, StgWord old, StgWord new)
{
return __sync_val_compare_and_swap((volatile StgWord8 *) x, (StgWord8) old, (StgWord8) new);
}
extern StgWord hs_cmpxchg16(StgWord x, StgWord old, StgWord new);
StgWord
hs_cmpxchg16(StgWord x, StgWord old, StgWord new)
{
return __sync_val_compare_and_swap((volatile StgWord16 *) x, (StgWord16) old, (StgWord16) new);
}
extern StgWord hs_cmpxchg32(StgWord x, StgWord old, StgWord new);
StgWord
hs_cmpxchg32(StgWord x, StgWord old, StgWord new)
{
return __sync_val_compare_and_swap((volatile StgWord32 *) x, (StgWord32) old, (StgWord32) new);
}
#if WORD_SIZE_IN_BITS == 64
extern StgWord hs_cmpxchg64(StgWord x, StgWord64 old, StgWord64 new);
StgWord
hs_cmpxchg64(StgWord x, StgWord64 old, StgWord64 new)
{
return __sync_val_compare_and_swap((volatile StgWord64 *) x, old, new);
}
#endif
// AtomicReadByteArrayOp_Int
// Implies a full memory barrier (see compiler/prelude/primops.txt.pp)
// __ATOMIC_SEQ_CST: Full barrier in both directions (hoisting and sinking
// of code) and synchronizes with acquire loads and release stores in
// all threads.
//
// When we lack C11 atomics support we emulate these using the old GCC __sync
// primitives which the GCC documentation claims "usually" implies a full
// barrier.
extern StgWord hs_atomicread8(StgWord x);
StgWord
hs_atomicread8(StgWord x)
{
#if HAVE_C11_ATOMICS
return __atomic_load_n((StgWord8 *) x, __ATOMIC_SEQ_CST);
#else
return __sync_add_and_fetch((StgWord8 *) x, 0);
#endif
}
extern StgWord hs_atomicread16(StgWord x);
StgWord
hs_atomicread16(StgWord x)
{
#if HAVE_C11_ATOMICS
return __atomic_load_n((StgWord16 *) x, __ATOMIC_SEQ_CST);
#else
return __sync_add_and_fetch((StgWord16 *) x, 0);
#endif
}
extern StgWord hs_atomicread32(StgWord x);
StgWord
hs_atomicread32(StgWord x)
{
#if HAVE_C11_ATOMICS
return __atomic_load_n((StgWord32 *) x, __ATOMIC_SEQ_CST);
#else
return __sync_add_and_fetch((StgWord32 *) x, 0);
#endif
}
extern StgWord64 hs_atomicread64(StgWord x);
StgWord64
hs_atomicread64(StgWord x)
{
#if HAVE_C11_ATOMICS
return __atomic_load_n((StgWord64 *) x, __ATOMIC_SEQ_CST);
#else
return __sync_add_and_fetch((StgWord64 *) x, 0);
#endif
}
// AtomicWriteByteArrayOp_Int
// Implies a full memory barrier (see compiler/prelude/primops.txt.pp)
// __ATOMIC_SEQ_CST: Full barrier (see hs_atomicread8 above).
extern void hs_atomicwrite8(StgWord x, StgWord val);
void
hs_atomicwrite8(StgWord x, StgWord val)
{
#if HAVE_C11_ATOMICS
__atomic_store_n((StgWord8 *) x, (StgWord8) val, __ATOMIC_SEQ_CST);
#else
while (!__sync_bool_compare_and_swap((StgWord8 *) x, *(StgWord8 *) x, (StgWord8) val));
#endif
}
extern void hs_atomicwrite16(StgWord x, StgWord val);
void
hs_atomicwrite16(StgWord x, StgWord val)
{
#if HAVE_C11_ATOMICS
__atomic_store_n((StgWord16 *) x, (StgWord16) val, __ATOMIC_SEQ_CST);
#else
while (!__sync_bool_compare_and_swap((StgWord16 *) x, *(StgWord16 *) x, (StgWord16) val));
#endif
}
extern void hs_atomicwrite32(StgWord x, StgWord val);
void
hs_atomicwrite32(StgWord x, StgWord val)
{
#if HAVE_C11_ATOMICS
__atomic_store_n((StgWord32 *) x, (StgWord32) val, __ATOMIC_SEQ_CST);
#else
while (!__sync_bool_compare_and_swap((StgWord32 *) x, *(StgWord32 *) x, (StgWord32) val));
#endif
}
extern void hs_atomicwrite64(StgWord x, StgWord64 val);
void
hs_atomicwrite64(StgWord x, StgWord64 val)
{
#if HAVE_C11_ATOMICS
__atomic_store_n((StgWord64 *) x, (StgWord64) val, __ATOMIC_SEQ_CST);
#else
while (!__sync_bool_compare_and_swap((StgWord64 *) x, *(StgWord64 *) x, (StgWord64) val));
#endif
}
|