diff options
-rw-r--r-- | atomic/unix/builtins.c | 69 | ||||
-rw-r--r-- | atomic/unix/builtins64.c | 51 | ||||
-rw-r--r-- | atomic/unix/ppc.c | 63 | ||||
-rw-r--r-- | configure.in | 45 |
4 files changed, 192 insertions, 36 deletions
diff --git a/atomic/unix/builtins.c b/atomic/unix/builtins.c index 745acf155..22b828c3c 100644 --- a/atomic/unix/builtins.c +++ b/atomic/unix/builtins.c @@ -18,6 +18,12 @@ #ifdef USE_ATOMICS_BUILTINS +#if defined(__arm__) || defined(__powerpc__) || defined(__powerpc64__) +#define WEAK_MEMORY_ORDERING 1 +#else +#define WEAK_MEMORY_ORDERING 0 +#endif + APR_DECLARE(apr_status_t) apr_atomic_init(apr_pool_t *p) { return APR_SUCCESS; @@ -25,57 +31,104 @@ APR_DECLARE(apr_status_t) apr_atomic_init(apr_pool_t *p) APR_DECLARE(apr_uint32_t) apr_atomic_read32(volatile apr_uint32_t *mem) { +#if HAVE__ATOMIC_BUILTINS + return __atomic_load_n(mem, __ATOMIC_SEQ_CST); +#elif WEAK_MEMORY_ORDERING + /* No __sync_load() available => apr_atomic_add32(mem, 0) */ + return __sync_fetch_and_add(mem, 0); +#else return *mem; +#endif } APR_DECLARE(void) apr_atomic_set32(volatile apr_uint32_t *mem, apr_uint32_t val) { +#if HAVE__ATOMIC_BUILTINS + __atomic_store_n(mem, val, __ATOMIC_SEQ_CST); +#elif WEAK_MEMORY_ORDERING + /* No __sync_store() available => apr_atomic_xchg32(mem, val) */ + __sync_synchronize(); + __sync_lock_test_and_set(mem, val); +#else *mem = val; +#endif } APR_DECLARE(apr_uint32_t) apr_atomic_add32(volatile apr_uint32_t *mem, apr_uint32_t val) { +#if HAVE__ATOMIC_BUILTINS + return __atomic_fetch_add(mem, val, __ATOMIC_SEQ_CST); +#else return __sync_fetch_and_add(mem, val); +#endif } APR_DECLARE(void) apr_atomic_sub32(volatile apr_uint32_t *mem, apr_uint32_t val) { +#if HAVE__ATOMIC_BUILTINS + __atomic_fetch_sub(mem, val, __ATOMIC_SEQ_CST); +#else __sync_fetch_and_sub(mem, val); +#endif } APR_DECLARE(apr_uint32_t) apr_atomic_inc32(volatile apr_uint32_t *mem) { +#if HAVE__ATOMIC_BUILTINS + return __atomic_fetch_add(mem, 1, __ATOMIC_SEQ_CST); +#else return __sync_fetch_and_add(mem, 1); +#endif } APR_DECLARE(int) apr_atomic_dec32(volatile apr_uint32_t *mem) { +#if HAVE__ATOMIC_BUILTINS + return __atomic_sub_fetch(mem, 1, __ATOMIC_SEQ_CST); +#else return __sync_sub_and_fetch(mem, 1); +#endif } -APR_DECLARE(apr_uint32_t) apr_atomic_cas32(volatile apr_uint32_t *mem, apr_uint32_t with, +APR_DECLARE(apr_uint32_t) apr_atomic_cas32(volatile apr_uint32_t *mem, apr_uint32_t val, apr_uint32_t cmp) { - return __sync_val_compare_and_swap(mem, cmp, with); +#if HAVE__ATOMIC_BUILTINS + __atomic_compare_exchange_n(mem, &cmp, val, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return cmp; +#else + return __sync_val_compare_and_swap(mem, cmp, val); +#endif } APR_DECLARE(apr_uint32_t) apr_atomic_xchg32(volatile apr_uint32_t *mem, apr_uint32_t val) { +#if HAVE__ATOMIC_BUILTINS + return __atomic_exchange_n(mem, val, __ATOMIC_SEQ_CST); +#else __sync_synchronize(); - return __sync_lock_test_and_set(mem, val); +#endif } -APR_DECLARE(void*) apr_atomic_casptr(volatile void **mem, void *with, const void *cmp) +APR_DECLARE(void*) apr_atomic_casptr(volatile void **mem, void *ptr, const void *cmp) { - return (void*) __sync_val_compare_and_swap(mem, cmp, with); +#if HAVE__ATOMIC_BUILTINS + __atomic_compare_exchange_n(mem, (void **)&cmp, ptr, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return (void *)cmp; +#else + return (void *)__sync_val_compare_and_swap(mem, (void *)cmp, ptr); +#endif } -APR_DECLARE(void*) apr_atomic_xchgptr(volatile void **mem, void *with) +APR_DECLARE(void*) apr_atomic_xchgptr(volatile void **mem, void *ptr) { +#if HAVE__ATOMIC_BUILTINS + return (void *)__atomic_exchange_n(mem, ptr, __ATOMIC_SEQ_CST); +#else __sync_synchronize(); - - return (void*) __sync_lock_test_and_set(mem, with); + return (void *)__sync_lock_test_and_set(mem, ptr); +#endif } #endif /* USE_ATOMICS_BUILTINS */ diff --git a/atomic/unix/builtins64.c b/atomic/unix/builtins64.c index 4a4b685c7..0f6edffbb 100644 --- a/atomic/unix/builtins64.c +++ b/atomic/unix/builtins64.c @@ -18,47 +18,92 @@ #ifdef USE_ATOMICS_BUILTINS +#if defined(__arm__) || defined(__powerpc__) || defined(__powerpc64__) +#define WEAK_MEMORY_ORDERING 1 +#else +#define WEAK_MEMORY_ORDERING 0 +#endif + APR_DECLARE(apr_uint64_t) apr_atomic_read64(volatile apr_uint64_t *mem) { +#if HAVE__ATOMIC_BUILTINS + return __atomic_load_n(mem, __ATOMIC_SEQ_CST); +#elif WEAK_MEMORY_ORDERING + /* No __sync_load() available => apr_atomic_add64(mem, 0) */ + return __sync_fetch_and_add(mem, 0); +#else return *mem; +#endif } APR_DECLARE(void) apr_atomic_set64(volatile apr_uint64_t *mem, apr_uint64_t val) { +#if HAVE__ATOMIC_BUILTINS + __atomic_store_n(mem, val, __ATOMIC_SEQ_CST); +#elif WEAK_MEMORY_ORDERING + /* No __sync_store() available => apr_atomic_xchg64(mem, val) */ + __sync_synchronize(); + __sync_lock_test_and_set(mem, val); +#else *mem = val; +#endif } APR_DECLARE(apr_uint64_t) apr_atomic_add64(volatile apr_uint64_t *mem, apr_uint64_t val) { +#if HAVE__ATOMIC_BUILTINS + return __atomic_fetch_add(mem, val, __ATOMIC_SEQ_CST); +#else return __sync_fetch_and_add(mem, val); +#endif } APR_DECLARE(void) apr_atomic_sub64(volatile apr_uint64_t *mem, apr_uint64_t val) { +#if HAVE__ATOMIC_BUILTINS + __atomic_fetch_sub(mem, val, __ATOMIC_SEQ_CST); +#else __sync_fetch_and_sub(mem, val); +#endif } APR_DECLARE(apr_uint64_t) apr_atomic_inc64(volatile apr_uint64_t *mem) { +#if HAVE__ATOMIC_BUILTINS + return __atomic_fetch_add(mem, 1, __ATOMIC_SEQ_CST); +#else return __sync_fetch_and_add(mem, 1); +#endif } APR_DECLARE(int) apr_atomic_dec64(volatile apr_uint64_t *mem) { +#if HAVE__ATOMIC_BUILTINS + return __atomic_sub_fetch(mem, 1, __ATOMIC_SEQ_CST); +#else return __sync_sub_and_fetch(mem, 1); +#endif } -APR_DECLARE(apr_uint64_t) apr_atomic_cas64(volatile apr_uint64_t *mem, apr_uint64_t with, +APR_DECLARE(apr_uint64_t) apr_atomic_cas64(volatile apr_uint64_t *mem, apr_uint64_t val, apr_uint64_t cmp) { - return __sync_val_compare_and_swap(mem, cmp, with); +#if HAVE__ATOMIC_BUILTINS + __atomic_compare_exchange_n(mem, &cmp, val, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return cmp; +#else + return __sync_val_compare_and_swap(mem, cmp, val); +#endif } APR_DECLARE(apr_uint64_t) apr_atomic_xchg64(volatile apr_uint64_t *mem, apr_uint64_t val) { +#if HAVE__ATOMIC_BUILTINS + return __atomic_exchange_n(mem, val, __ATOMIC_SEQ_CST); +#else __sync_synchronize(); - return __sync_lock_test_and_set(mem, val); +#endif } #endif /* USE_ATOMICS_BUILTINS */ diff --git a/atomic/unix/ppc.c b/atomic/unix/ppc.c index 1823fe81c..46554af54 100644 --- a/atomic/unix/ppc.c +++ b/atomic/unix/ppc.c @@ -35,24 +35,39 @@ APR_DECLARE(apr_status_t) apr_atomic_init(apr_pool_t *p) APR_DECLARE(apr_uint32_t) apr_atomic_read32(volatile apr_uint32_t *mem) { - return *mem; + apr_uint32_t val; + asm volatile (" sync\n" /* full barrier */ + " lwz %0,%1\n" /* load */ + " cmpw 7,%0,%0\n" /* compare (always equal) */ + " bne- 7,$+4\n" /* goto next in any case */ + " isync" /* acquire barrier (bc+isync) */ + : "=r"(val) + : "m"(*mem) + : "cc", "memory"); + return val; } APR_DECLARE(void) apr_atomic_set32(volatile apr_uint32_t *mem, apr_uint32_t val) { - *mem = val; + asm volatile (" sync\n" /* full barrier */ + " stw %1,%0" /* store */ + : "=m"(*mem) + : "r"(val) + : "memory"); } APR_DECLARE(apr_uint32_t) apr_atomic_add32(volatile apr_uint32_t *mem, apr_uint32_t val) { apr_uint32_t prev, temp; - asm volatile ("1:\n" /* lost reservation */ + asm volatile (" sync\n" /* full barrier */ + "1:\n" /* lost reservation */ " lwarx %0,0,%3\n" /* load and reserve */ " add %1,%0,%4\n" /* add val and prev */ PPC405_ERR77_SYNC /* ppc405 Erratum 77 */ - " stwcx. %1,0,%3\n" /* store new value */ + " stwcx. %1,0,%3\n" /* store if still reserved */ " bne- 1b\n" /* loop if lost */ + " isync\n" /* acquire barrier (bc+isync) */ : "=&r" (prev), "=&r" (temp), "=m" (*mem) : "b" (mem), "r" (val) : "cc", "memory"); @@ -64,12 +79,14 @@ APR_DECLARE(void) apr_atomic_sub32(volatile apr_uint32_t *mem, apr_uint32_t val) { apr_uint32_t temp; - asm volatile ("1:\n" /* lost reservation */ + asm volatile (" sync\n" /* full barrier */ + "1:\n" /* lost reservation */ " lwarx %0,0,%2\n" /* load and reserve */ " subf %0,%3,%0\n" /* subtract val */ PPC405_ERR77_SYNC /* ppc405 Erratum 77 */ " stwcx. %0,0,%2\n" /* store new value */ " bne- 1b\n" /* loop if lost */ + " isync\n" /* acquire barrier (bc+isync) */ : "=&r" (temp), "=m" (*mem) : "b" (mem), "r" (val) : "cc", "memory"); @@ -79,13 +96,15 @@ APR_DECLARE(apr_uint32_t) apr_atomic_inc32(volatile apr_uint32_t *mem) { apr_uint32_t prev; - asm volatile ("1:\n" /* lost reservation */ + asm volatile (" sync\n" /* full barrier */ + "1:\n" /* lost reservation */ " lwarx %0,0,%2\n" /* load and reserve */ " addi %0,%0,1\n" /* add immediate */ PPC405_ERR77_SYNC /* ppc405 Erratum 77 */ " stwcx. %0,0,%2\n" /* store new value */ " bne- 1b\n" /* loop if lost */ " subi %0,%0,1\n" /* return old value */ + " isync\n" /* acquire barrier (bc+isync) */ : "=&b" (prev), "=m" (*mem) : "b" (mem), "m" (*mem) : "cc", "memory"); @@ -97,12 +116,14 @@ APR_DECLARE(int) apr_atomic_dec32(volatile apr_uint32_t *mem) { apr_uint32_t prev; - asm volatile ("1:\n" /* lost reservation */ + asm volatile (" sync\n" /* full barrier */ + "1:\n" /* lost reservation */ " lwarx %0,0,%2\n" /* load and reserve */ " subi %0,%0,1\n" /* subtract immediate */ PPC405_ERR77_SYNC /* ppc405 Erratum 77 */ " stwcx. %0,0,%2\n" /* store new value */ " bne- 1b\n" /* loop if lost */ + " isync\n" /* acquire barrier (bc+isync) */ : "=&b" (prev), "=m" (*mem) : "b" (mem), "m" (*mem) : "cc", "memory"); @@ -115,7 +136,8 @@ APR_DECLARE(apr_uint32_t) apr_atomic_cas32(volatile apr_uint32_t *mem, apr_uint3 { apr_uint32_t prev; - asm volatile ("1:\n" /* lost reservation */ + asm volatile (" sync\n" /* full barrier */ + "1:\n" /* lost reservation */ " lwarx %0,0,%1\n" /* load and reserve */ " cmpw %0,%3\n" /* compare operands */ " bne- exit_%=\n" /* skip if not equal */ @@ -123,6 +145,7 @@ APR_DECLARE(apr_uint32_t) apr_atomic_cas32(volatile apr_uint32_t *mem, apr_uint3 " stwcx. %2,0,%1\n" /* store new value */ " bne- 1b\n" /* loop if lost */ "exit_%=:\n" /* not equal */ + " isync\n" /* acquire barrier (bc+isync) */ : "=&r" (prev) : "b" (mem), "r" (with), "r" (cmp) : "cc", "memory"); @@ -134,11 +157,13 @@ APR_DECLARE(apr_uint32_t) apr_atomic_xchg32(volatile apr_uint32_t *mem, apr_uint { apr_uint32_t prev; - asm volatile ("1:\n" /* lost reservation */ + asm volatile (" sync\n" /* full barrier */ + "1:\n" /* lost reservation */ " lwarx %0,0,%1\n" /* load and reserve */ PPC405_ERR77_SYNC /* ppc405 Erratum 77 */ " stwcx. %2,0,%1\n" /* store new value */ - " bne- 1b" /* loop if lost */ + " bne- 1b\n" /* loop if lost */ + " isync\n" /* acquire barrier (bc+isync) */ : "=&r" (prev) : "b" (mem), "r" (val) : "cc", "memory"); @@ -150,7 +175,8 @@ APR_DECLARE(void*) apr_atomic_casptr(volatile void **mem, void *with, const void { void *prev; #if APR_SIZEOF_VOIDP == 4 - asm volatile ("1:\n" /* lost reservation */ + asm volatile (" sync\n" /* full barrier */ + "1:\n" /* lost reservation */ " lwarx %0,0,%1\n" /* load and reserve */ " cmpw %0,%3\n" /* compare operands */ " bne- 2f\n" /* skip if not equal */ @@ -158,11 +184,13 @@ APR_DECLARE(void*) apr_atomic_casptr(volatile void **mem, void *with, const void " stwcx. %2,0,%1\n" /* store new value */ " bne- 1b\n" /* loop if lost */ "2:\n" /* not equal */ + " isync\n" /* acquire barrier (bc+isync) */ : "=&r" (prev) : "b" (mem), "r" (with), "r" (cmp) : "cc", "memory"); #elif APR_SIZEOF_VOIDP == 8 - asm volatile ("1:\n" /* lost reservation */ + asm volatile (" sync\n" /* full barrier */ + "1:\n" /* lost reservation */ " ldarx %0,0,%1\n" /* load and reserve */ " cmpd %0,%3\n" /* compare operands */ " bne- 2f\n" /* skip if not equal */ @@ -170,6 +198,7 @@ APR_DECLARE(void*) apr_atomic_casptr(volatile void **mem, void *with, const void " stdcx. %2,0,%1\n" /* store new value */ " bne- 1b\n" /* loop if lost */ "2:\n" /* not equal */ + " isync\n" /* acquire barrier (bc+isync) */ : "=&r" (prev) : "b" (mem), "r" (with), "r" (cmp) : "cc", "memory"); @@ -183,22 +212,24 @@ APR_DECLARE(void*) apr_atomic_xchgptr(volatile void **mem, void *with) { void *prev; #if APR_SIZEOF_VOIDP == 4 - asm volatile ("1:\n" /* lost reservation */ + asm volatile (" sync\n" /* full barrier */ + "1:\n" /* lost reservation */ " lwarx %0,0,%1\n" /* load and reserve */ PPC405_ERR77_SYNC /* ppc405 Erratum 77 */ " stwcx. %2,0,%1\n" /* store new value */ " bne- 1b\n" /* loop if lost */ - " isync\n" /* memory barrier */ + " isync\n" /* acquire barrier (bc+isync) */ : "=&r" (prev) : "b" (mem), "r" (with) : "cc", "memory"); #elif APR_SIZEOF_VOIDP == 8 - asm volatile ("1:\n" /* lost reservation */ + asm volatile (" sync\n" /* full barrier */ + "1:\n" /* lost reservation */ " ldarx %0,0,%1\n" /* load and reserve */ PPC405_ERR77_SYNC /* ppc405 Erratum 77 */ " stdcx. %2,0,%1\n" /* store new value */ " bne- 1b\n" /* loop if lost */ - " isync\n" /* memory barrier */ + " isync\n" /* acquire barrier (bc+isync) */ : "=&r" (prev) : "b" (mem), "r" (with) : "cc", "memory"); diff --git a/configure.in b/configure.in index 4981aaf19..28c8d44db 100644 --- a/configure.in +++ b/configure.in @@ -467,7 +467,7 @@ esac AC_CACHE_CHECK([whether the compiler provides atomic builtins], [ap_cv_atomic_builtins], [AC_TRY_RUN([ -int main() +int main(int argc, const char *const *argv) { unsigned long val = 1010, tmp, *mem = &val; @@ -475,7 +475,6 @@ int main() return 1; tmp = val; - if (__sync_fetch_and_sub(mem, 1010) != tmp || val != 1010) return 1; @@ -483,28 +482,56 @@ int main() return 1; tmp = 3030; - if (__sync_val_compare_and_swap(mem, 0, tmp) != 0 || val != tmp) return 1; + __sync_synchronize(); if (__sync_lock_test_and_set(&val, 4040) != 3030) return 1; - mem = &tmp; + if (__sync_val_compare_and_swap(&mem, &val, &tmp) != &val || mem != &tmp) + return 1; + + return 0; +}], [ap_cv_atomic_builtins=yes], [ap_cv_atomic_builtins=no], [ap_cv_atomic_builtins=no])]) + +AC_CACHE_CHECK([whether the compiler provides __atomic builtins], [ap_cv__atomic_builtins], +[AC_TRY_RUN([ +int main(int argc, const char *const *argv) +{ + unsigned long val = 1010, tmp, *mem = &val, *ptmp; - if (__sync_val_compare_and_swap(&mem, &tmp, &val) != &tmp) + if (__atomic_fetch_add(&val, 1010, __ATOMIC_SEQ_CST) != 1010 || val != 2020) return 1; - __sync_synchronize(); + tmp = val; + if (__atomic_fetch_sub(mem, 1010, __ATOMIC_SEQ_CST) != tmp || val != 1010) + return 1; + + if (__atomic_sub_fetch(&val, 1010, __ATOMIC_SEQ_CST) != 0 || val != 0) + return 1; - if (mem != &val) + tmp = val; + if (!__atomic_compare_exchange_n(mem, &tmp, 3030, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + || tmp != 0) + return 1; + + if (__atomic_exchange_n(&val, 4040, __ATOMIC_SEQ_CST) != 3030) + return 1; + + ptmp = &val; + if (!__atomic_compare_exchange_n(&mem, &ptmp, &tmp, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + || ptmp != &val || mem != &tmp) return 1; return 0; -}], [ap_cv_atomic_builtins=yes], [ap_cv_atomic_builtins=no], [ap_cv_atomic_builtins=no])]) +}], [ap_cv__atomic_builtins=yes], [ap_cv__atomic_builtins=no], [ap_cv__atomic_builtins=no])]) -if test "$ap_cv_atomic_builtins" = "yes"; then +if test "$ap_cv_atomic_builtins" = "yes" -o "$ap_cv__atomic_builtins" = "yes"; then AC_DEFINE(HAVE_ATOMIC_BUILTINS, 1, [Define if compiler provides atomic builtins]) + if test "$ap_cv__atomic_builtins" = "yes"; then + AC_DEFINE(HAVE__ATOMIC_BUILTINS, 1, [Define if compiler provides __atomic builtins]) + fi fi case $host in |