summaryrefslogtreecommitdiff
path: root/src/txn/txn.c
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2017-06-02 12:08:34 +1000
committerGitHub <noreply@github.com>2017-06-02 12:08:34 +1000
commit42daa132f21c1391ae2b2b3d789df85878aca471 (patch)
tree385f10e3f035a7bea711ed18d6ffdbd77c601160 /src/txn/txn.c
parentb3ff7c4ab91d3c5fda64381d8ab5957cb697167d (diff)
downloadmongo-42daa132f21c1391ae2b2b3d789df85878aca471.tar.gz
WT-3345 Tune WiredTiger's read/write locks. (#3446)
* Add a workload that stresses rwlock performance under various conditions (including `threads >> cores`), tune read and write lock operations to only spin when it is likely to help, and to back off to a condition variable when there is heavy contention. * New rwlock implementation: queue readers and writers separately, don't enforce fairness among readers or if the lock is overwhelmed. * Switch to a spinlock whenever we need to lock a page. Previously we had a read/write lock in the __wt_page structure that was only ever acquired in write mode, plus a spinlock in the page->modify structure. Switch to using the spinlock for everything. One slight downside of this change is that we can no longer precisely determine whether a page is locked based on the status of the spinlock (since another page sharing the same lock could be holding it in the places where we used to check). Since that was only ever used for diagnostic / debugging purposes, I think the benefit of the change outweighs this issue. * Fix a bug where a failure during `__wt_curfile_create` caused a data handle to be released twice. This is caught by the sanity checking assertions in the new read/write lock code. * Split may be holding a page lock when restoring update. Tell the restore code we have the page exclusive and no further locking is required. * Allocate a spinlock for each modified page. Using shared page locks for mulitple operations that need to lock a page (including inserts and reconciliation) resulted in self-deadlock when the lookaside table was used. That's because reconciliation held a page lock, then caused inserts to the lookaside table, which acquired the page lock for a page in the lookaside table. With a shared set of page locks, they could both be the same lock. Switch (back?) to allocating a spinlock per modified page. Earlier in this ticket we saved some space in __wt_page, so growing __wt_page_modify is unlikely to be noticeable. * Tweak padding and position of the spinlock in WT_PAGE_MODIFY to claw back some bytes. Move evict_pass_gen to the end of WT_PAGE: on inspection, it should be a cold field relative to the others, which now fit in one x86 cache line.
Diffstat (limited to 'src/txn/txn.c')
-rw-r--r--src/txn/txn.c8
1 files changed, 4 insertions, 4 deletions
diff --git a/src/txn/txn.c b/src/txn/txn.c
index d9edbb80564..fb77ab4e860 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -126,7 +126,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
n = 0;
/* We're going to scan the table: wait for the lock. */
- __wt_readlock_spin(session, &txn_global->scan_rwlock);
+ __wt_readlock(session, &txn_global->scan_rwlock);
current_id = pinned_id = txn_global->current;
prev_oldest_id = txn_global->oldest_id;
@@ -293,7 +293,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
/* First do a read-only scan. */
if (wait)
- __wt_readlock_spin(session, &txn_global->scan_rwlock);
+ __wt_readlock(session, &txn_global->scan_rwlock);
else if ((ret =
__wt_try_readlock(session, &txn_global->scan_rwlock)) != 0)
return (ret == EBUSY ? 0 : ret);
@@ -782,8 +782,8 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET(__wt_spin_init(session,
&txn_global->id_lock, "transaction id lock"));
- __wt_rwlock_init(session, &txn_global->scan_rwlock);
- __wt_rwlock_init(session, &txn_global->nsnap_rwlock);
+ WT_RET(__wt_rwlock_init(session, &txn_global->scan_rwlock));
+ WT_RET(__wt_rwlock_init(session, &txn_global->nsnap_rwlock));
txn_global->nsnap_oldest_id = WT_TXN_NONE;
TAILQ_INIT(&txn_global->nsnaph);