/* * Copyright (c) 2005 Hewlett-Packard Development Company, L.P. * * This file may be redistributed and/or modified under the * terms of the GNU General Public License as published by the Free Software * Foundation; either version 2, or (at your option) any later version. * * It is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License in the * file COPYING for more details. */ #if defined(HAVE_CONFIG_H) # include "config.h" #endif #include #include #include #ifndef AO_BUILD # define AO_BUILD #endif #define AO_REQUIRE_CAS #include "atomic_ops_stack.h" /* This function call must be a part of a do-while loop with a CAS */ /* designating the condition of the loop (see the use cases below). */ #ifdef AO_THREAD_SANITIZER AO_ATTR_NO_SANITIZE_THREAD static void store_before_cas(AO_t *addr, AO_t value) { *addr = value; } #else # define store_before_cas(addr, value) (void)(*(addr) = (value)) #endif #ifdef AO_USE_ALMOST_LOCK_FREE # ifdef __cplusplus extern "C" { # endif AO_API void AO_pause(int); /* defined in atomic_ops.c */ # ifdef __cplusplus } /* extern "C" */ # endif /* LIFO linked lists based on compare-and-swap. We need to avoid */ /* the case of a node deletion and reinsertion while I'm deleting */ /* it, since that may cause my CAS to succeed eventhough the next */ /* pointer is now wrong. Our solution is not fully lock-free, but it */ /* is good enough for signal handlers, provided we have a suitably low */ /* bound on the number of recursive signal handler reentries. */ /* A list consists of a first pointer and a blacklist */ /* of pointer values that are currently being removed. No list element */ /* on the blacklist may be inserted. If we would otherwise do so, we */ /* are allowed to insert a variant that differs only in the least */ /* significant, ignored, bits. If the list is full, we wait. */ /* Crucial observation: A particular padded pointer x (i.e. pointer */ /* plus arbitrary low order bits) can never be newly inserted into */ /* a list while it's in the corresponding auxiliary data structure. */ /* The second argument is a pointer to the link field of the element */ /* to be inserted. */ /* Both list headers and link fields contain "perturbed" pointers, i.e. */ /* pointers with extra bits "or"ed into the low order bits. */ AO_API void AO_stack_push_explicit_aux_release(volatile AO_t *list, AO_t *x, AO_stack_aux *a) { AO_t x_bits = (AO_t)x; AO_t next; /* No deletions of x can start here, since x is not currently in the */ /* list. */ retry: # if AO_BL_SIZE == 2 { /* Start all loads as close to concurrently as possible. */ AO_t entry1 = AO_load(&a->AO_stack_bl[0]); AO_t entry2 = AO_load(&a->AO_stack_bl[1]); if (entry1 == x_bits || entry2 == x_bits) { /* Entry is currently being removed. Change it a little. */ ++x_bits; if ((x_bits & AO_BIT_MASK) == 0) /* Version count overflowed; */ /* EXTREMELY unlikely, but possible. */ x_bits = (AO_t)x; goto retry; } } # else { int i; for (i = 0; i < AO_BL_SIZE; ++i) { if (AO_load(&a->AO_stack_bl[i]) == x_bits) { /* Entry is currently being removed. Change it a little. */ ++x_bits; if ((x_bits & AO_BIT_MASK) == 0) /* Version count overflowed; */ /* EXTREMELY unlikely, but possible. */ x_bits = (AO_t)x; goto retry; } } } # endif /* x_bits is not currently being deleted */ do { next = AO_load(list); store_before_cas(x, next); } while (AO_EXPECT_FALSE(!AO_compare_and_swap_release(list, next, x_bits))); } /* * I concluded experimentally that checking a value first before * performing a compare-and-swap is usually beneficial on X86, but * slows things down appreciably with contention on Itanium. * Since the Itanium behavior makes more sense to me (more cache line * movement unless we're mostly reading, but back-off should guard * against that), we take Itanium as the default. Measurements on * other multiprocessor architectures would be useful. (On a uniprocessor, * the initial check is almost certainly a very small loss.) - HB */ #ifdef __i386__ # define PRECHECK(a) (a) == 0 && #else # define PRECHECK(a) #endif /* This function is used before CAS in the below AO_stack_pop() and the */ /* data race (reported by TSan) is OK because it results in a retry. */ #ifdef AO_THREAD_SANITIZER AO_ATTR_NO_SANITIZE_THREAD static AO_t AO_load_next(const volatile AO_t *first_ptr) { /* Assuming an architecture on which loads of word type are atomic. */ /* AO_load cannot be used here because it cannot be instructed to */ /* suppress the warning about the race. */ return *first_ptr; } #else # define AO_load_next AO_load #endif AO_API AO_t *AO_stack_pop_explicit_aux_acquire(volatile AO_t *list, AO_stack_aux *a) { unsigned i; int j = 0; AO_t first; AO_t * first_ptr; AO_t next; retry: first = AO_load(list); if (0 == first) return 0; /* Insert first into aux black list. */ /* This may spin if more than AO_BL_SIZE removals using auxiliary */ /* structure a are currently in progress. */ for (i = 0; ; ) { if (PRECHECK(a -> AO_stack_bl[i]) AO_compare_and_swap_acquire(a->AO_stack_bl+i, 0, first)) break; ++i; if ( i >= AO_BL_SIZE ) { i = 0; AO_pause(++j); } } assert(i < AO_BL_SIZE); # ifndef AO_THREAD_SANITIZER assert(a -> AO_stack_bl[i] == first); /* No actual race with the above CAS. */ # endif /* First is on the auxiliary black list. It may be removed by */ /* another thread before we get to it, but a new insertion of x */ /* cannot be started here. */ /* Only we can remove it from the black list. */ /* We need to make sure that first is still the first entry on the */ /* list. Otherwise it's possible that a reinsertion of it was */ /* already started before we added the black list entry. */ # if defined(__alpha__) && (__GNUC__ == 4) if (first != AO_load_acquire(list)) /* Workaround __builtin_expect bug found in */ /* gcc-4.6.3/alpha causing test_stack failure. */ # else if (AO_EXPECT_FALSE(first != AO_load_acquire(list))) /* Workaround test failure on AIX, at least, by */ /* using acquire ordering semantics for this */ /* load. Probably, it is not the right fix. */ # endif { AO_store_release(a->AO_stack_bl+i, 0); goto retry; } first_ptr = AO_REAL_NEXT_PTR(first); next = AO_load_next(first_ptr); # if defined(__alpha__) && (__GNUC__ == 4) if (!AO_compare_and_swap_release(list, first, next)) # else if (AO_EXPECT_FALSE(!AO_compare_and_swap_release(list, first, next))) # endif { AO_store_release(a->AO_stack_bl+i, 0); goto retry; } # ifndef AO_THREAD_SANITIZER assert(*list != first); /* No actual race with the above CAS. */ # endif /* Since we never insert an entry on the black list, this cannot have */ /* succeeded unless first remained on the list while we were running. */ /* Thus its next link cannot have changed out from under us, and we */ /* removed exactly one entry and preserved the rest of the list. */ /* Note that it is quite possible that an additional entry was */ /* inserted and removed while we were running; this is OK since the */ /* part of the list following first must have remained unchanged, and */ /* first must again have been at the head of the list when the */ /* compare_and_swap succeeded. */ AO_store_release(a->AO_stack_bl+i, 0); return first_ptr; } #else /* ! USE_ALMOST_LOCK_FREE */ /* The functionality is the same as of AO_load_next but the atomicity */ /* is not needed. The usage is similar to that of store_before_cas. */ #if defined(AO_THREAD_SANITIZER) \ && (defined(AO_HAVE_compare_and_swap_double) \ || defined(AO_HAVE_compare_double_and_swap_double)) /* TODO: If compiled by Clang (as of clang-4.0) with -O3 flag, */ /* no_sanitize attribute is ignored unless the argument is volatile. */ # if defined(__clang__) # define LOAD_BEFORE_CAS_VOLATILE volatile # else # define LOAD_BEFORE_CAS_VOLATILE /* empty */ # endif AO_ATTR_NO_SANITIZE_THREAD static AO_t load_before_cas(const LOAD_BEFORE_CAS_VOLATILE AO_t *addr) { return *addr; } #else # define load_before_cas(addr) (*(addr)) #endif /* Better names for fields in AO_stack_t */ #define ptr AO_val2 #define version AO_val1 #if defined(AO_HAVE_compare_double_and_swap_double) \ && !(defined(AO_STACK_PREFER_CAS_DOUBLE) \ && defined(AO_HAVE_compare_and_swap_double)) #ifdef LINT2 volatile /* non-static */ AO_t AO_noop_sink; #endif AO_API void AO_stack_push_release(AO_stack_t *list, AO_t *element) { AO_t next; do { next = AO_load(&(list -> ptr)); store_before_cas(element, next); } while (AO_EXPECT_FALSE(!AO_compare_and_swap_release(&(list -> ptr), next, (AO_t)element))); /* This uses a narrow CAS here, an old optimization suggested */ /* by Treiber. Pop is still safe, since we run into the ABA */ /* problem only if there were both intervening "pop"s and "push"es. */ /* In that case we still see a change in the version number. */ # ifdef LINT2 /* Instruct static analyzer that element is not lost. */ AO_noop_sink = (AO_t)element; # endif } AO_API AO_t *AO_stack_pop_acquire(AO_stack_t *list) { # if defined(__clang__) && !AO_CLANG_PREREQ(3, 5) AO_t *volatile cptr; /* Use volatile to workaround a bug in */ /* clang-1.1/x86 causing test_stack failure. */ # else AO_t *cptr; # endif AO_t next; AO_t cversion; do { /* Version must be loaded first. */ cversion = AO_load_acquire(&(list -> version)); cptr = (AO_t *)AO_load(&(list -> ptr)); if (NULL == cptr) return NULL; next = load_before_cas((AO_t *)cptr); } while (AO_EXPECT_FALSE(!AO_compare_double_and_swap_double_release(list, cversion, (AO_t)cptr, cversion+1, (AO_t)next))); return cptr; } #elif defined(AO_HAVE_compare_and_swap_double) /* Needed for future IA64 processors. No current clients? */ /* TODO: Not tested thoroughly. */ /* We have a wide CAS, but only does an AO_t-wide comparison. */ /* We can't use the Treiber optimization, since we only check */ /* for an unchanged version number, not an unchanged pointer. */ AO_API void AO_stack_push_release(AO_stack_t *list, AO_t *element) { AO_t version; do { AO_t next_ptr; /* Again version must be loaded first, for different reason. */ version = AO_load_acquire(&(list -> version)); next_ptr = AO_load(&(list -> ptr)); store_before_cas(element, next_ptr); } while (!AO_compare_and_swap_double_release( list, version, version+1, (AO_t) element)); } AO_API AO_t *AO_stack_pop_acquire(AO_stack_t *list) { AO_t *cptr; AO_t next; AO_t cversion; do { cversion = AO_load_acquire(&(list -> version)); cptr = (AO_t *)AO_load(&(list -> ptr)); if (NULL == cptr) return NULL; next = load_before_cas(cptr); } while (!AO_compare_double_and_swap_double_release(list, cversion, (AO_t)cptr, cversion+1, next)); return cptr; } #endif /* AO_HAVE_compare_and_swap_double */ #endif /* ! USE_ALMOST_LOCK_FREE */