summaryrefslogtreecommitdiff
path: root/src/atomic_ops_stack.c
blob: cefc1ed4e99fabeaacd061155896d21d94557dc6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
/*
 * Copyright (c) 2005 Hewlett-Packard Development Company, L.P.
 *
 * This file may be redistributed and/or modified under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2, or (at your option) any later version.
 *
 * It is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE.  See the GNU General Public License in the
 * file COPYING for more details.
 */

#if defined(HAVE_CONFIG_H)
# include "config.h"
#endif

#include <string.h>
#include <stdlib.h>
#include <assert.h>

#ifndef AO_BUILD
# define AO_BUILD
#endif

#ifndef AO_REAL_PTR_AS_MACRO
# define AO_REAL_PTR_AS_MACRO
#endif

#define AO_REQUIRE_CAS
#include "atomic_ops_stack.h"

AO_API void AO_stack_init(AO_stack_t *list)
{
  memset(list, 0, sizeof(AO_stack_t));
}

AO_API AO_t *AO_real_head_ptr(const AO_stack_t *list)
{
  return AO_REAL_HEAD_PTR(*list);
}

AO_API AO_t *AO_real_next_ptr(AO_t next)
{
  return AO_REAL_NEXT_PTR(next);
}

/* This function call must be a part of a do-while loop with a CAS      */
/* designating the condition of the loop (see the use cases below).     */
#ifdef AO_THREAD_SANITIZER
  AO_ATTR_NO_SANITIZE_THREAD
  static void store_before_cas(AO_t *addr, AO_t value)
  {
    *addr = value;
  }
#else
# define store_before_cas(addr, value) (void)(*(addr) = (value))
#endif

#ifdef AO_USE_ALMOST_LOCK_FREE

# ifdef __cplusplus
    extern "C" {
# endif
  AO_API void AO_pause(int); /* defined in atomic_ops.c */
# ifdef __cplusplus
    } /* extern "C" */
# endif

# if defined(__alpha__) && (__GNUC__ == 4)
    /* Workaround __builtin_expect bug found in         */
    /* gcc-4.6.3/alpha causing test_stack failure.      */
#   undef AO_EXPECT_FALSE
#   define AO_EXPECT_FALSE(expr) (expr)
# endif

  /* LIFO linked lists based on compare-and-swap.  We need to avoid     */
  /* the case of a node deletion and reinsertion while I'm deleting     */
  /* it, since that may cause my CAS to succeed eventhough the next     */
  /* pointer is now wrong.  Our solution is not fully lock-free, but it */
  /* is good enough for signal handlers, provided we have a suitably    */
  /* low bound on the number of recursive signal handler reentries.     */
  /* A list consists of a first pointer and a blacklist of pointer      */
  /* values that are currently being removed.  No list element on       */
  /* the blacklist may be inserted.  If we would otherwise do so, we    */
  /* are allowed to insert a variant that differs only in the least     */
  /* significant, ignored, bits.  If the list is full, we wait.         */

  /* Crucial observation: A particular padded pointer x (i.e. pointer   */
  /* plus arbitrary low order bits) can never be newly inserted into    */
  /* a list while it's in the corresponding auxiliary data structure.   */

  /* The second argument is a pointer to the link field of the element  */
  /* to be inserted.                                                    */
  /* Both list headers and link fields contain "perturbed" pointers,    */
  /* i.e. pointers with extra bits or'ed into the low order bits.       */
  AO_API void AO_stack_push_explicit_aux_release(volatile AO_t *list, AO_t *x,
                                                 AO_stack_aux *a)
  {
    AO_t x_bits = (AO_t)x;
    AO_t next;

    /* No deletions of x can start here, since x is not         */
    /* currently in the list.                                   */
  retry:
    {
#     if AO_BL_SIZE == 2
        /* Start all loads as close to concurrently as possible.        */
        AO_t entry1 = AO_load(&a->AO_stack_bl[0]);
        AO_t entry2 = AO_load(&a->AO_stack_bl[1]);
        if (AO_EXPECT_FALSE(entry1 == x_bits || entry2 == x_bits))
#     else
        int i;
        for (i = 0; i < AO_BL_SIZE; ++i)
          if (AO_EXPECT_FALSE(AO_load(&a->AO_stack_bl[i]) == x_bits))
#     endif
        {
          /* Entry is currently being removed.  Change it a little.     */
          ++x_bits;
          if ((x_bits & AO_BIT_MASK) == 0)
            /* Version count overflowed; EXTREMELY unlikely, but possible. */
            x_bits = (AO_t)x;
          goto retry;
        }
    }

    /* x_bits is not currently being deleted */
    do
      {
        next = AO_load(list);
        store_before_cas(x, next);
      }
    while (AO_EXPECT_FALSE(!AO_compare_and_swap_release(list, next, x_bits)));
  }

  /* I concluded experimentally that checking a value first before      */
  /* performing a compare-and-swap is usually beneficial on x86, but    */
  /* slows things down appreciably with contention on Itanium.          */
  /* Since the Itanium behavior makes more sense to me (more cache line */
  /* movement unless we're mostly reading, but back-off should guard    */
  /* against that), we take Itanium as the default.  Measurements on    */
  /* other multiprocessor architectures would be useful.                */
  /* (On a uniprocessor, the initial check is almost certainly a very   */
  /* small loss.) - HB                                                  */
# ifdef __i386__
#   define PRECHECK(a) (a) == 0 &&
# else
#   define PRECHECK(a)
# endif

  /* This function is used before CAS in the below AO_stack_pop and the */
  /* data race (reported by TSan) is OK because it results in a retry.  */
# ifdef AO_THREAD_SANITIZER
    AO_ATTR_NO_SANITIZE_THREAD
    static AO_t AO_load_next(const volatile AO_t *first_ptr)
    {
      /* Assuming an architecture on which loads of word type are       */
      /* atomic.  AO_load cannot be used here because it cannot be      */
      /* instructed to suppress the warning about the race.             */
      return *first_ptr;
    }
# else
#   define AO_load_next AO_load
# endif

  AO_API AO_t *AO_stack_pop_explicit_aux_acquire(volatile AO_t *list,
                                                 AO_stack_aux *a)
  {
    unsigned i;
    int j = 0;
    AO_t first;
    AO_t * first_ptr;
    AO_t next;

  retry:
    first = AO_load(list);
    if (0 == first) return 0;
    /* Insert first into aux black list.                                */
    /* This may spin if more than AO_BL_SIZE removals using auxiliary   */
    /* structure a are currently in progress.                           */
    for (i = 0; ; )
      {
        if (PRECHECK(a -> AO_stack_bl[i])
            AO_compare_and_swap_acquire(a->AO_stack_bl+i, 0, first))
          break;
        if (++i >= AO_BL_SIZE)
          {
            i = 0;
            AO_pause(++j);
          }
      }
    assert(i < AO_BL_SIZE);
#   ifndef AO_THREAD_SANITIZER
      assert(a -> AO_stack_bl[i] == first);
                                /* No actual race with the above CAS.   */
#   endif
    /* First is on the auxiliary black list.  It may be removed by      */
    /* another thread before we get to it, but a new insertion of x     */
    /* cannot be started here.  Only we can remove it from the black    */
    /* list.  We need to make sure that first is still the first entry  */
    /* on the list.  Otherwise it is possible that a reinsertion of it  */
    /* was already started before we added the black list entry.        */
    if (AO_EXPECT_FALSE(first != AO_load_acquire(list)))
                        /* Workaround test failure on AIX, at least, by */
                        /* using acquire ordering semantics for this    */
                        /* load.  Probably, it is not the right fix.    */
    {
      AO_store_release(a->AO_stack_bl+i, 0);
      goto retry;
    }
    first_ptr = AO_REAL_NEXT_PTR(first);
    next = AO_load_next(first_ptr);
    if (AO_EXPECT_FALSE(!AO_compare_and_swap_release(list, first, next)))
    {
      AO_store_release(a->AO_stack_bl+i, 0);
      goto retry;
    }
#   ifndef AO_THREAD_SANITIZER
      assert(*list != first); /* No actual race with the above CAS.     */
#   endif
    /* Since we never insert an entry on the black list, this cannot    */
    /* have succeeded unless first remained on the list while we were   */
    /* running.  Thus, its next link cannot have changed out from under */
    /* us, and we removed exactly one entry and preserved the rest of   */
    /* the list.  Note that it is quite possible that an additional     */
    /* entry was inserted and removed while we were running; this is OK */
    /* since the part of the list following first must have remained    */
    /* unchanged, and first must again have been at the head of the     */
    /* list when the compare_and_swap succeeded.                        */
    AO_store_release(a->AO_stack_bl+i, 0);
    return first_ptr;
  }

  AO_API void AO_stack_push_release(AO_stack_t *list, AO_t *x)
  {
    AO_stack_push_explicit_aux_release(&list->AO_pa.AO_ptr, x,
                                       &list->AO_pa.AO_aux);
  }

  AO_API AO_t *AO_stack_pop_acquire(AO_stack_t *list)
  {
    return AO_stack_pop_explicit_aux_acquire(&list->AO_pa.AO_ptr,
                                             &list->AO_pa.AO_aux);
  }

#else /* ! USE_ALMOST_LOCK_FREE */

  /* The functionality is the same as of AO_load_next but the atomicity */
  /* is not needed.  The usage is similar to that of store_before_cas.  */
# if defined(AO_THREAD_SANITIZER) \
     && (defined(AO_HAVE_compare_and_swap_double) \
         || defined(AO_HAVE_compare_double_and_swap_double))
    /* TODO: If compiled by Clang (as of clang-4.0) with -O3 flag,      */
    /* no_sanitize attribute is ignored unless the argument is volatile.*/
#   if defined(__clang__)
#     define LOAD_BEFORE_CAS_VOLATILE volatile
#   else
#     define LOAD_BEFORE_CAS_VOLATILE /* empty */
#   endif
    AO_ATTR_NO_SANITIZE_THREAD
    static AO_t load_before_cas(const LOAD_BEFORE_CAS_VOLATILE AO_t *addr)
    {
      return *addr;
    }
# else
#   define load_before_cas(addr) (*(addr))
# endif /* !AO_THREAD_SANITIZER */

  /* Better names for fields in AO_stack_t.     */
# define version AO_vp.AO_val1
# define ptr AO_vp.AO_val2

# if defined(AO_HAVE_compare_double_and_swap_double) \
     && !(defined(AO_STACK_PREFER_CAS_DOUBLE) \
          && defined(AO_HAVE_compare_and_swap_double))

#   ifdef LINT2
      volatile /* non-static */ AO_t AO_noop_sink;
#   endif

    AO_API void AO_stack_push_release(AO_stack_t *list, AO_t *element)
    {
      AO_t next;

      do {
        next = AO_load(&list->ptr);
        store_before_cas(element, next);
      } while (AO_EXPECT_FALSE(!AO_compare_and_swap_release(&list->ptr, next,
                                                            (AO_t)element)));
      /* This uses a narrow CAS here, an old optimization suggested     */
      /* by Treiber.  Pop is still safe, since we run into the ABA      */
      /* problem only if there were both intervening pops and pushes.   */
      /* In that case we still see a change in the version number.      */
#     ifdef LINT2
        /* Instruct static analyzer that element is not lost.   */
        AO_noop_sink = (AO_t)element;
#     endif
    }

    AO_API AO_t *AO_stack_pop_acquire(AO_stack_t *list)
    {
#     if defined(__clang__) && !AO_CLANG_PREREQ(3, 5)
        AO_t *volatile cptr;
                        /* Use volatile to workaround a bug in          */
                        /* clang-1.1/x86 causing test_stack failure.    */
#     else
        AO_t *cptr;
#     endif
      AO_t next;
      AO_t cversion;

      do {
        /* Version must be loaded first.    */
        cversion = AO_load_acquire(&list->version);
        cptr = (AO_t *)AO_load(&list->ptr);
        if (NULL == cptr)
          return NULL;
        next = load_before_cas((AO_t *)cptr);
      } while (AO_EXPECT_FALSE(!AO_compare_double_and_swap_double_release(
                                        &list->AO_vp, cversion, (AO_t)cptr,
                                        cversion+1, next)));
      return cptr;
    }

# elif defined(AO_HAVE_compare_and_swap_double)

    /* Needed for future IA64 processors.  No current clients?  */
    /* TODO: Not tested thoroughly. */

    /* We have a wide CAS, but only does an AO_t-wide comparison.       */
    /* We cannot use the Treiber optimization, since we only check      */
    /* for an unchanged version number, not an unchanged pointer.       */
    AO_API void AO_stack_push_release(AO_stack_t *list, AO_t *element)
    {
      AO_t cversion;

      do {
        AO_t next_ptr;

        /* Again version must be loaded first, for different reason.    */
        cversion = AO_load_acquire(&list->version);
        next_ptr = AO_load(&list->ptr);
        store_before_cas(element, next_ptr);
      } while (!AO_compare_and_swap_double_release(&list->AO_vp, cversion,
                                                   cversion+1, (AO_t)element));
    }

    AO_API AO_t *AO_stack_pop_acquire(AO_stack_t *list)
    {
      AO_t *cptr;
      AO_t next;
      AO_t cversion;

      do {
        cversion = AO_load_acquire(&list->version);
        cptr = (AO_t *)AO_load(&list->ptr);
        if (NULL == cptr)
          return NULL;
        next = load_before_cas(cptr);
      } while (!AO_compare_double_and_swap_double_release(&list->AO_vp,
                                cversion, (AO_t)cptr, cversion+1, next));
      return cptr;
    }
# endif /* AO_HAVE_compare_and_swap_double */

# undef ptr
# undef version
#endif /* ! USE_ALMOST_LOCK_FREE */