summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarko Mäkelä <marko.makela@mariadb.com>2021-09-28 17:17:59 +0300
committerMarko Mäkelä <marko.makela@mariadb.com>2021-09-28 17:17:59 +0300
commit35f59bc4e1ad8b54c080932387c582bf86b5a77f (patch)
treefaef5b9034f002c2f4b9f519f383ce303c89a864
parent0d68b0a2d6e52cbbbd971cd66dab8989dd1e48ac (diff)
downloadmariadb-git-35f59bc4e1ad8b54c080932387c582bf86b5a77f.tar.gz
MDEV-26467: More cache friendliness
srw_mutex_impl<bool>::wait_and_lock(): In commit a73eedbf3fabd19ca7183b738056c30e3f7bbe35 we introduced an std::atomic::fetch_or() in a loop. Alas, on the IA-32 and AMD64, that was being translated into a loop around LOCK CMPXCHG. To avoid a nested loop, it is better to explicitly invoke std::atomic::compare_exchange_weak() in the loop, but only if the attempt has a chance to succeed (the HOLDER flag is not set). It is even more efficient to use LOCK BTS, but contemporary compilers fail to translate std::atomic::fetch_or(x) & x into that when x is a single-bit constant. On GCC-compatible compilers, we will use inline assembler to achieve that. On other ISA than IA-32 and AMD64, we will continue to use std::atomic::fetch_or(). ssux_lock_impl<spinloop>::rd_wait(): Use rd_lock_try(). A loop around std::atomic::compare_exchange_weak() should be cheaper than fetch_add(), fetch_sub() and a wakeup system call. These deficiencies were pointed out and the use of LOCK BTS was suggested by Thiago Macieira.
-rw-r--r--storage/innobase/sync/srw_lock.cc101
1 files changed, 72 insertions, 29 deletions
diff --git a/storage/innobase/sync/srw_lock.cc b/storage/innobase/sync/srw_lock.cc
index f3c0afd64e3..542037bc0a2 100644
--- a/storage/innobase/sync/srw_lock.cc
+++ b/storage/innobase/sync/srw_lock.cc
@@ -294,57 +294,107 @@ void srw_mutex_impl<true>::wait_and_lock()
DBUG_ASSERT(~HOLDER & lk);
if (lk & HOLDER)
lk= lock.load(std::memory_order_relaxed);
+#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+#elif defined _M_IX86||defined _M_X64||defined __i386__||defined __x86_64__
+ else if (lock.compare_exchange_weak(lk, lk | HOLDER,
+ std::memory_order_acquire,
+ std::memory_order_relaxed))
+ return;
+#else
+ else if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) &
+ HOLDER))
+ goto acquired;
+#endif
else
{
- lk= lock.fetch_or(HOLDER, std::memory_order_relaxed);
- if (!(lk & HOLDER))
- goto acquired;
+#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+ static_assert(HOLDER == (1U << 31), "compatibility");
+ __asm__ goto("lock btsl $31, %0\n\t"
+ "jnc %l1" : : "m" (*this) : "cc", "memory" : acquired);
+ lk|= HOLDER;
+#endif
+ srw_pause(delay);
}
- srw_pause(delay);
if (!--spin)
break;
}
- for (;; wait(lk))
+ for (;;)
{
+ DBUG_ASSERT(~HOLDER & lk);
if (lk & HOLDER)
{
+ wait(lk);
+#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+reload:
+#endif
lk= lock.load(std::memory_order_relaxed);
- if (lk & HOLDER)
- continue;
}
- lk= lock.fetch_or(HOLDER, std::memory_order_relaxed);
- if (!(lk & HOLDER))
+#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+ else
+ {
+ static_assert(HOLDER == (1U << 31), "compatibility");
+ __asm__ goto("lock btsl $31, %0\n\t"
+ "jc %l1" : : "m" (*this) : "cc", "memory" : reload);
+acquired:
+ std::atomic_thread_fence(std::memory_order_acquire);
+ return;
+ }
+#elif defined _M_IX86||defined _M_X64||defined __i386__||defined __x86_64__
+ else if (lock.compare_exchange_weak(lk, lk | HOLDER,
+ std::memory_order_acquire,
+ std::memory_order_relaxed))
+ return;
+#else
+ else if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) &
+ HOLDER))
{
acquired:
DBUG_ASSERT(lk);
std::atomic_thread_fence(std::memory_order_acquire);
return;
}
- DBUG_ASSERT(lk > HOLDER);
+#endif
}
}
template<>
void srw_mutex_impl<false>::wait_and_lock()
{
- uint32_t lk= 1 + lock.fetch_add(1, std::memory_order_relaxed);
- for (;; wait(lk))
+ for (uint32_t lk= 1 + lock.fetch_add(1, std::memory_order_relaxed);;)
{
+ DBUG_ASSERT(~HOLDER & lk);
if (lk & HOLDER)
{
+ wait(lk);
+#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+reload:
+#endif
lk= lock.load(std::memory_order_relaxed);
- if (lk & HOLDER)
- continue;
}
- lk= lock.fetch_or(HOLDER, std::memory_order_relaxed);
- if (!(lk & HOLDER))
+#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+ else
+ {
+ static_assert(HOLDER == (1U << 31), "compatibility");
+ __asm__ goto("lock btsl $31, %0\n\t"
+ "jc %l1" : : "m" (*this) : "cc", "memory" : reload);
+ std::atomic_thread_fence(std::memory_order_acquire);
+ return;
+ }
+#elif defined _M_IX86||defined _M_X64||defined __i386__||defined __x86_64__
+ else if (lock.compare_exchange_weak(lk, lk | HOLDER,
+ std::memory_order_acquire,
+ std::memory_order_relaxed))
+ return;
+#else
+ else if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) &
+ HOLDER))
{
DBUG_ASSERT(lk);
std::atomic_thread_fence(std::memory_order_acquire);
return;
}
- DBUG_ASSERT(lk > HOLDER);
+#endif
}
}
@@ -373,19 +423,12 @@ void ssux_lock_impl<spinloop>::rd_wait()
for (;;)
{
writer.wr_lock();
- uint32_t lk= readers.fetch_add(1, std::memory_order_acquire);
- if (UNIV_UNLIKELY(lk == WRITER))
- {
- readers.fetch_sub(1, std::memory_order_relaxed);
- wake();
- writer.wr_unlock();
- pthread_yield();
- continue;
- }
- DBUG_ASSERT(!(lk & WRITER));
- break;
+ bool acquired= rd_lock_try();
+ writer.wr_unlock();
+ if (acquired)
+ break;
+ std::this_thread::yield();
}
- writer.wr_unlock();
}
template void ssux_lock_impl<true>::rd_wait();