diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2021-09-28 17:17:59 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2021-09-28 17:17:59 +0300 |
commit | 35f59bc4e1ad8b54c080932387c582bf86b5a77f (patch) | |
tree | faef5b9034f002c2f4b9f519f383ce303c89a864 | |
parent | 0d68b0a2d6e52cbbbd971cd66dab8989dd1e48ac (diff) | |
download | mariadb-git-35f59bc4e1ad8b54c080932387c582bf86b5a77f.tar.gz |
MDEV-26467: More cache friendliness
srw_mutex_impl<bool>::wait_and_lock(): In
commit a73eedbf3fabd19ca7183b738056c30e3f7bbe35 we introduced
an std::atomic::fetch_or() in a loop. Alas, on the IA-32 and AMD64,
that was being translated into a loop around LOCK CMPXCHG.
To avoid a nested loop, it is better to explicitly invoke
std::atomic::compare_exchange_weak() in the loop, but only if
the attempt has a chance to succeed (the HOLDER flag is not set).
It is even more efficient to use LOCK BTS, but contemporary compilers
fail to translate std::atomic::fetch_or(x) & x into that when x is
a single-bit constant. On GCC-compatible compilers, we will use
inline assembler to achieve that.
On other ISA than IA-32 and AMD64, we will continue to use
std::atomic::fetch_or().
ssux_lock_impl<spinloop>::rd_wait(): Use rd_lock_try().
A loop around std::atomic::compare_exchange_weak() should be
cheaper than fetch_add(), fetch_sub() and a wakeup system call.
These deficiencies were pointed out and the use of LOCK BTS was
suggested by Thiago Macieira.
-rw-r--r-- | storage/innobase/sync/srw_lock.cc | 101 |
1 files changed, 72 insertions, 29 deletions
diff --git a/storage/innobase/sync/srw_lock.cc b/storage/innobase/sync/srw_lock.cc index f3c0afd64e3..542037bc0a2 100644 --- a/storage/innobase/sync/srw_lock.cc +++ b/storage/innobase/sync/srw_lock.cc @@ -294,57 +294,107 @@ void srw_mutex_impl<true>::wait_and_lock() DBUG_ASSERT(~HOLDER & lk); if (lk & HOLDER) lk= lock.load(std::memory_order_relaxed); +#if defined __GNUC__ && (defined __i386__ || defined __x86_64__) +#elif defined _M_IX86||defined _M_X64||defined __i386__||defined __x86_64__ + else if (lock.compare_exchange_weak(lk, lk | HOLDER, + std::memory_order_acquire, + std::memory_order_relaxed)) + return; +#else + else if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) & + HOLDER)) + goto acquired; +#endif else { - lk= lock.fetch_or(HOLDER, std::memory_order_relaxed); - if (!(lk & HOLDER)) - goto acquired; +#if defined __GNUC__ && (defined __i386__ || defined __x86_64__) + static_assert(HOLDER == (1U << 31), "compatibility"); + __asm__ goto("lock btsl $31, %0\n\t" + "jnc %l1" : : "m" (*this) : "cc", "memory" : acquired); + lk|= HOLDER; +#endif + srw_pause(delay); } - srw_pause(delay); if (!--spin) break; } - for (;; wait(lk)) + for (;;) { + DBUG_ASSERT(~HOLDER & lk); if (lk & HOLDER) { + wait(lk); +#if defined __GNUC__ && (defined __i386__ || defined __x86_64__) +reload: +#endif lk= lock.load(std::memory_order_relaxed); - if (lk & HOLDER) - continue; } - lk= lock.fetch_or(HOLDER, std::memory_order_relaxed); - if (!(lk & HOLDER)) +#if defined __GNUC__ && (defined __i386__ || defined __x86_64__) + else + { + static_assert(HOLDER == (1U << 31), "compatibility"); + __asm__ goto("lock btsl $31, %0\n\t" + "jc %l1" : : "m" (*this) : "cc", "memory" : reload); +acquired: + std::atomic_thread_fence(std::memory_order_acquire); + return; + } +#elif defined _M_IX86||defined _M_X64||defined __i386__||defined __x86_64__ + else if (lock.compare_exchange_weak(lk, lk | HOLDER, + std::memory_order_acquire, + std::memory_order_relaxed)) + return; +#else + else if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) & + HOLDER)) { acquired: DBUG_ASSERT(lk); std::atomic_thread_fence(std::memory_order_acquire); return; } - DBUG_ASSERT(lk > HOLDER); +#endif } } template<> void srw_mutex_impl<false>::wait_and_lock() { - uint32_t lk= 1 + lock.fetch_add(1, std::memory_order_relaxed); - for (;; wait(lk)) + for (uint32_t lk= 1 + lock.fetch_add(1, std::memory_order_relaxed);;) { + DBUG_ASSERT(~HOLDER & lk); if (lk & HOLDER) { + wait(lk); +#if defined __GNUC__ && (defined __i386__ || defined __x86_64__) +reload: +#endif lk= lock.load(std::memory_order_relaxed); - if (lk & HOLDER) - continue; } - lk= lock.fetch_or(HOLDER, std::memory_order_relaxed); - if (!(lk & HOLDER)) +#if defined __GNUC__ && (defined __i386__ || defined __x86_64__) + else + { + static_assert(HOLDER == (1U << 31), "compatibility"); + __asm__ goto("lock btsl $31, %0\n\t" + "jc %l1" : : "m" (*this) : "cc", "memory" : reload); + std::atomic_thread_fence(std::memory_order_acquire); + return; + } +#elif defined _M_IX86||defined _M_X64||defined __i386__||defined __x86_64__ + else if (lock.compare_exchange_weak(lk, lk | HOLDER, + std::memory_order_acquire, + std::memory_order_relaxed)) + return; +#else + else if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) & + HOLDER)) { DBUG_ASSERT(lk); std::atomic_thread_fence(std::memory_order_acquire); return; } - DBUG_ASSERT(lk > HOLDER); +#endif } } @@ -373,19 +423,12 @@ void ssux_lock_impl<spinloop>::rd_wait() for (;;) { writer.wr_lock(); - uint32_t lk= readers.fetch_add(1, std::memory_order_acquire); - if (UNIV_UNLIKELY(lk == WRITER)) - { - readers.fetch_sub(1, std::memory_order_relaxed); - wake(); - writer.wr_unlock(); - pthread_yield(); - continue; - } - DBUG_ASSERT(!(lk & WRITER)); - break; + bool acquired= rd_lock_try(); + writer.wr_unlock(); + if (acquired) + break; + std::this_thread::yield(); } - writer.wr_unlock(); } template void ssux_lock_impl<true>::rd_wait(); |