summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2016-11-04 17:09:42 +1100
committerGitHub <noreply@github.com>2016-11-04 17:09:42 +1100
commitf418cea41b61f47fda740751495f9f5d39d814fd (patch)
tree49cdb6b1f1b6b200701638ba4c765717c87f361e
parentf3517c27c2c5ff9bda28a24e44d3cc9ad7b4be3a (diff)
downloadmongo-f418cea41b61f47fda740751495f9f5d39d814fd.tar.gz
SERVER-26753 Add a call to spin on rwlocks with no active writers (#3126)
-rw-r--r--src/include/extern.h1
-rw-r--r--src/include/mutex.h2
-rw-r--r--src/support/mtx_rw.c44
-rw-r--r--src/txn/txn.c4
4 files changed, 36 insertions, 15 deletions
diff --git a/src/include/extern.h b/src/include/extern.h
index 69140d87eaa..6234f2f6bc5 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -667,6 +667,7 @@ extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, cons
extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_rwlock_alloc( WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
extern void __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
extern void __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock);
extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/include/mutex.h b/src/include/mutex.h
index b736d6ee9fb..84f015d6b67 100644
--- a/src/include/mutex.h
+++ b/src/include/mutex.h
@@ -43,7 +43,7 @@ typedef union { /* Read/write lock */
uint16_t writers; /* Now serving for writers */
uint16_t readers; /* Now serving for readers */
uint16_t next; /* Next available ticket number */
- uint16_t __notused; /* Padding */
+ uint16_t writers_active;/* Count of active writers */
} s;
} wt_rwlock_t;
diff --git a/src/support/mtx_rw.c b/src/support/mtx_rw.c
index 4a2d596c994..ea18f556257 100644
--- a/src/support/mtx_rw.c
+++ b/src/support/mtx_rw.c
@@ -167,13 +167,39 @@ __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
}
/*
+ * __wt_readlock_spin --
+ * Spin to get a read lock: only yield the CPU if the lock is held
+ * exclusive.
+ */
+void
+__wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
+{
+ wt_rwlock_t *l;
+
+ l = &rwlock->rwlock;
+
+ /*
+ * Try to get the lock in a single operation if it is available to
+ * readers. This avoids the situation where multiple readers arrive
+ * concurrently and have to line up in order to enter the lock. For
+ * read-heavy workloads it can make a significant difference.
+ */
+ while (__wt_try_readlock(session, rwlock) != 0) {
+ if (l->s.writers_active > 0)
+ __wt_yield();
+ else
+ WT_PAUSE();
+ }
+}
+
+/*
* __wt_readlock --
* Get a shared lock.
*/
void
__wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
{
- wt_rwlock_t *l, old;
+ wt_rwlock_t *l;
uint16_t ticket;
int pause_cnt;
@@ -183,15 +209,6 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
l = &rwlock->rwlock;
- /* Be optimistic when lock is available to readers. */
- old = *l;
- while (old.s.readers == old.s.next) {
- if (__wt_try_readlock(session, rwlock) == 0)
- return;
- WT_PAUSE();
- old = *l;
- }
-
/*
* Possibly wrap: if we have more than 64K lockers waiting, the ticket
* value will wrap and two lockers will simultaneously be granted the
@@ -270,6 +287,7 @@ __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
/* The replacement lock value is a result of allocating a new ticket. */
++new.s.next;
+ ++new.s.writers_active;
return (__wt_atomic_cas64(&l->u, old.u, new.u) ? 0 : EBUSY);
}
@@ -294,6 +312,7 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
* lock.
*/
ticket = __wt_atomic_fetch_add16(&l->s.next, 1);
+ (void)__wt_atomic_add16(&l->s.writers_active, 1);
for (pause_cnt = 0; ticket != l->s.writers;) {
/*
* We failed to get the lock; pause before retrying and if we've
@@ -325,14 +344,15 @@ __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
WT_UNUSED(session);
+ l = &rwlock->rwlock;
+ (void)__wt_atomic_sub16(&l->s.writers_active, 1);
+
/*
* Ensure that all updates made while the lock was held are visible to
* the next thread to acquire the lock.
*/
WT_WRITE_BARRIER();
- l = &rwlock->rwlock;
-
new = *l;
/*
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 3f128637970..1c47f94bb60 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -126,7 +126,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
n = 0;
/* We're going to scan the table: wait for the lock. */
- __wt_readlock(session, txn_global->scan_rwlock);
+ __wt_readlock_spin(session, txn_global->scan_rwlock);
current_id = pinned_id = txn_global->current;
prev_oldest_id = txn_global->oldest_id;
@@ -285,7 +285,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
/* First do a read-only scan. */
if (wait)
- __wt_readlock(session, txn_global->scan_rwlock);
+ __wt_readlock_spin(session, txn_global->scan_rwlock);
else if ((ret =
__wt_try_readlock(session, txn_global->scan_rwlock)) != 0)
return (ret == EBUSY ? 0 : ret);