summaryrefslogtreecommitdiff
path: root/atomic
diff options
context:
space:
mode:
authorYann Ylavic <ylavic@apache.org>2021-10-29 15:54:35 +0000
committerYann Ylavic <ylavic@apache.org>2021-10-29 15:54:35 +0000
commit09ecd1c5e6a3241a51ed9c4333beca2f19c591a1 (patch)
tree4d8faad3598b2bac665cb1fe52cb460781f409cd /atomic
parent942f42c4346068d00001bdd7d09be44ec93b7725 (diff)
downloadapr-09ecd1c5e6a3241a51ed9c4333beca2f19c591a1.tar.gz
apr_atomic: Fix load/store for weak memory ordering architectures.
Volatile access prevents compiler reordering of load/store but it's not enough for weakly ordered archs like ARM32 and PowerPC[64]. While __atomic builtins provide load and store, __sync builtins don't so let's use an atomic add of zero for the former and atomic exchange for the latter. The assembly code for PowerPC was not correct either, fix apr_atomic_read32() and apr_atomic_set32() and add the necessary memory barriers for the others. PR 50586. git-svn-id: https://svn.apache.org/repos/asf/apr/apr/trunk@1894622 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'atomic')
-rw-r--r--atomic/unix/builtins.c13
-rw-r--r--atomic/unix/builtins64.c13
-rw-r--r--atomic/unix/ppc.c63
3 files changed, 73 insertions, 16 deletions
diff --git a/atomic/unix/builtins.c b/atomic/unix/builtins.c
index d0f1b454c..7aaada2f4 100644
--- a/atomic/unix/builtins.c
+++ b/atomic/unix/builtins.c
@@ -18,6 +18,12 @@
#ifdef USE_ATOMICS_BUILTINS
+#if defined(__arm__) || defined(__powerpc__) || defined(__powerpc64__)
+#define WEAK_MEMORY_ORDERING 1
+#else
+#define WEAK_MEMORY_ORDERING 0
+#endif
+
APR_DECLARE(apr_status_t) apr_atomic_init(apr_pool_t *p)
{
return APR_SUCCESS;
@@ -27,6 +33,9 @@ APR_DECLARE(apr_uint32_t) apr_atomic_read32(volatile apr_uint32_t *mem)
{
#if HAVE__ATOMIC_BUILTINS
return __atomic_load_n(mem, __ATOMIC_SEQ_CST);
+#elif WEAK_MEMORY_ORDERING
+ /* No __sync_load() available => apr_atomic_add32(mem, 0) */
+ return __sync_fetch_and_add(mem, 0);
#else
return *mem;
#endif
@@ -36,6 +45,10 @@ APR_DECLARE(void) apr_atomic_set32(volatile apr_uint32_t *mem, apr_uint32_t val)
{
#if HAVE__ATOMIC_BUILTINS
__atomic_store_n(mem, val, __ATOMIC_SEQ_CST);
+#elif WEAK_MEMORY_ORDERING
+ /* No __sync_store() available => apr_atomic_xchg32(mem, val) */
+ __sync_synchronize();
+ __sync_lock_test_and_set(mem, val);
#else
*mem = val;
#endif
diff --git a/atomic/unix/builtins64.c b/atomic/unix/builtins64.c
index 0ac950c15..0f6edffbb 100644
--- a/atomic/unix/builtins64.c
+++ b/atomic/unix/builtins64.c
@@ -18,10 +18,19 @@
#ifdef USE_ATOMICS_BUILTINS
+#if defined(__arm__) || defined(__powerpc__) || defined(__powerpc64__)
+#define WEAK_MEMORY_ORDERING 1
+#else
+#define WEAK_MEMORY_ORDERING 0
+#endif
+
APR_DECLARE(apr_uint64_t) apr_atomic_read64(volatile apr_uint64_t *mem)
{
#if HAVE__ATOMIC_BUILTINS
return __atomic_load_n(mem, __ATOMIC_SEQ_CST);
+#elif WEAK_MEMORY_ORDERING
+ /* No __sync_load() available => apr_atomic_add64(mem, 0) */
+ return __sync_fetch_and_add(mem, 0);
#else
return *mem;
#endif
@@ -31,6 +40,10 @@ APR_DECLARE(void) apr_atomic_set64(volatile apr_uint64_t *mem, apr_uint64_t val)
{
#if HAVE__ATOMIC_BUILTINS
__atomic_store_n(mem, val, __ATOMIC_SEQ_CST);
+#elif WEAK_MEMORY_ORDERING
+ /* No __sync_store() available => apr_atomic_xchg64(mem, val) */
+ __sync_synchronize();
+ __sync_lock_test_and_set(mem, val);
#else
*mem = val;
#endif
diff --git a/atomic/unix/ppc.c b/atomic/unix/ppc.c
index 55bbdd50c..ffba3c27a 100644
--- a/atomic/unix/ppc.c
+++ b/atomic/unix/ppc.c
@@ -35,24 +35,39 @@ APR_DECLARE(apr_status_t) apr_atomic_init(apr_pool_t *p)
APR_DECLARE(apr_uint32_t) apr_atomic_read32(volatile apr_uint32_t *mem)
{
- return *mem;
+ apr_uint32_t val;
+ asm volatile (" sync\n" /* full barrier */
+ " lwz %0,%1\n" /* load */
+ " cmpw 7,%0,%0\n" /* compare (always equal) */
+ " bne- 7,$+4\n" /* goto next in any case */
+ " isync" /* acquire barrier (bc+isync) */
+ : "=r"(val)
+ : "m"(*mem)
+ : "cc", "memory");
+ return val;
}
APR_DECLARE(void) apr_atomic_set32(volatile apr_uint32_t *mem, apr_uint32_t val)
{
- *mem = val;
+ asm volatile (" sync\n" /* full barrier */
+ " stw %1,%0" /* store */
+ : "=m"(*mem)
+ : "r"(val)
+ : "memory");
}
APR_DECLARE(apr_uint32_t) apr_atomic_add32(volatile apr_uint32_t *mem, apr_uint32_t val)
{
apr_uint32_t prev, temp;
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%3\n" /* load and reserve */
" add %1,%0,%4\n" /* add val and prev */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
- " stwcx. %1,0,%3\n" /* store new value */
+ " stwcx. %1,0,%3\n" /* store if still reserved */
" bne- 1b\n" /* loop if lost */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev), "=&r" (temp), "=m" (*mem)
: "b" (mem), "r" (val)
: "cc", "memory");
@@ -64,12 +79,14 @@ APR_DECLARE(void) apr_atomic_sub32(volatile apr_uint32_t *mem, apr_uint32_t val)
{
apr_uint32_t temp;
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%2\n" /* load and reserve */
" subf %0,%3,%0\n" /* subtract val */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
" stwcx. %0,0,%2\n" /* store new value */
" bne- 1b\n" /* loop if lost */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (temp), "=m" (*mem)
: "b" (mem), "r" (val)
: "cc", "memory");
@@ -79,13 +96,15 @@ APR_DECLARE(apr_uint32_t) apr_atomic_inc32(volatile apr_uint32_t *mem)
{
apr_uint32_t prev;
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%2\n" /* load and reserve */
" addi %0,%0,1\n" /* add immediate */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
" stwcx. %0,0,%2\n" /* store new value */
" bne- 1b\n" /* loop if lost */
" subi %0,%0,1\n" /* return old value */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&b" (prev), "=m" (*mem)
: "b" (mem), "m" (*mem)
: "cc", "memory");
@@ -97,12 +116,14 @@ APR_DECLARE(int) apr_atomic_dec32(volatile apr_uint32_t *mem)
{
apr_uint32_t prev;
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%2\n" /* load and reserve */
" subi %0,%0,1\n" /* subtract immediate */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
" stwcx. %0,0,%2\n" /* store new value */
" bne- 1b\n" /* loop if lost */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&b" (prev), "=m" (*mem)
: "b" (mem), "m" (*mem)
: "cc", "memory");
@@ -115,7 +136,8 @@ APR_DECLARE(apr_uint32_t) apr_atomic_cas32(volatile apr_uint32_t *mem, apr_uint3
{
apr_uint32_t prev;
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%1\n" /* load and reserve */
" cmpw %0,%3\n" /* compare operands */
" bne- exit_%=\n" /* skip if not equal */
@@ -123,6 +145,7 @@ APR_DECLARE(apr_uint32_t) apr_atomic_cas32(volatile apr_uint32_t *mem, apr_uint3
" stwcx. %2,0,%1\n" /* store new value */
" bne- 1b\n" /* loop if lost */
"exit_%=:\n" /* not equal */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev)
: "b" (mem), "r" (with), "r" (cmp)
: "cc", "memory");
@@ -134,11 +157,13 @@ APR_DECLARE(apr_uint32_t) apr_atomic_xchg32(volatile apr_uint32_t *mem, apr_uint
{
apr_uint32_t prev;
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%1\n" /* load and reserve */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
" stwcx. %2,0,%1\n" /* store new value */
- " bne- 1b" /* loop if lost */
+ " bne- 1b\n" /* loop if lost */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev)
: "b" (mem), "r" (val)
: "cc", "memory");
@@ -150,7 +175,8 @@ APR_DECLARE(void*) apr_atomic_casptr(void *volatile *mem, void *with, const void
{
void *prev;
#if APR_SIZEOF_VOIDP == 4
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%1\n" /* load and reserve */
" cmpw %0,%3\n" /* compare operands */
" bne- 2f\n" /* skip if not equal */
@@ -158,11 +184,13 @@ APR_DECLARE(void*) apr_atomic_casptr(void *volatile *mem, void *with, const void
" stwcx. %2,0,%1\n" /* store new value */
" bne- 1b\n" /* loop if lost */
"2:\n" /* not equal */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev)
: "b" (mem), "r" (with), "r" (cmp)
: "cc", "memory");
#elif APR_SIZEOF_VOIDP == 8
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" ldarx %0,0,%1\n" /* load and reserve */
" cmpd %0,%3\n" /* compare operands */
" bne- 2f\n" /* skip if not equal */
@@ -170,6 +198,7 @@ APR_DECLARE(void*) apr_atomic_casptr(void *volatile *mem, void *with, const void
" stdcx. %2,0,%1\n" /* store new value */
" bne- 1b\n" /* loop if lost */
"2:\n" /* not equal */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev)
: "b" (mem), "r" (with), "r" (cmp)
: "cc", "memory");
@@ -183,22 +212,24 @@ APR_DECLARE(void*) apr_atomic_xchgptr(void *volatile *mem, void *with)
{
void *prev;
#if APR_SIZEOF_VOIDP == 4
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" lwarx %0,0,%1\n" /* load and reserve */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
" stwcx. %2,0,%1\n" /* store new value */
" bne- 1b\n" /* loop if lost */
- " isync\n" /* memory barrier */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev)
: "b" (mem), "r" (with)
: "cc", "memory");
#elif APR_SIZEOF_VOIDP == 8
- asm volatile ("1:\n" /* lost reservation */
+ asm volatile (" sync\n" /* full barrier */
+ "1:\n" /* lost reservation */
" ldarx %0,0,%1\n" /* load and reserve */
PPC405_ERR77_SYNC /* ppc405 Erratum 77 */
" stdcx. %2,0,%1\n" /* store new value */
" bne- 1b\n" /* loop if lost */
- " isync\n" /* memory barrier */
+ " isync\n" /* acquire barrier (bc+isync) */
: "=&r" (prev)
: "b" (mem), "r" (with)
: "cc", "memory");