diff options
author | Keith Bostic <keith.bostic@mongodb.com> | 2016-08-17 10:04:02 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-08-17 10:04:02 -0400 |
commit | 36c2663b813216f9e10052ceeb2516fca5586496 (patch) | |
tree | adaf8f3304d8736b70a8e9040f63947c3e0018f5 /src | |
parent | b9a98e44bee8692808ebe51ce29d5a774307494c (diff) | |
download | mongo-36c2663b813216f9e10052ceeb2516fca5586496.tar.gz |
WT-2847 Merge fair locks into read/write locks. (#2966)
* WT-2847 Merge fair locks into read/write locks.
Merge the fair-lock implementation into the read/write lock implementation
(fair locks were the same as the read/write lock's write lock), add a new
read/write "is locked?" function for diagnostic tests.
* Rename the WT_RWLOCK "users" field to be "next", it makes the code
easier to read (and we already depend on "next" not being a keyword,
WT_CURSOR.next).
* Extend a comment.
* aspell does not know amongst!
Diffstat (limited to 'src')
-rw-r--r-- | src/btree/bt_compact.c | 4 | ||||
-rw-r--r-- | src/btree/bt_debug.c | 2 | ||||
-rw-r--r-- | src/btree/bt_discard.c | 2 | ||||
-rw-r--r-- | src/btree/bt_split.c | 10 | ||||
-rw-r--r-- | src/include/btmem.h | 5 | ||||
-rw-r--r-- | src/include/extern.h | 1 | ||||
-rw-r--r-- | src/include/mutex.h | 20 | ||||
-rw-r--r-- | src/include/mutex.i | 96 | ||||
-rw-r--r-- | src/include/serial.i | 4 | ||||
-rw-r--r-- | src/include/wt_internal.h | 2 | ||||
-rw-r--r-- | src/reconcile/rec_write.c | 6 | ||||
-rw-r--r-- | src/support/mtx_rw.c | 44 |
12 files changed, 50 insertions, 146 deletions
diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c index d52afbe4280..bb1261d94b0 100644 --- a/src/btree/bt_compact.c +++ b/src/btree/bt_compact.c @@ -60,7 +60,7 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) */ if (mod->rec_result == WT_PM_REC_REPLACE || mod->rec_result == WT_PM_REC_MULTIBLOCK) - __wt_fair_lock(session, &page->page_lock); + __wt_writelock(session, &page->page_lock); if (mod->rec_result == WT_PM_REC_REPLACE) ret = bm->compact_page_skip(bm, session, @@ -80,7 +80,7 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) if (mod->rec_result == WT_PM_REC_REPLACE || mod->rec_result == WT_PM_REC_MULTIBLOCK) - __wt_fair_unlock(session, &page->page_lock); + __wt_writeunlock(session, &page->page_lock); return (ret); } diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index d4b1362fbfe..c1560150435 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -682,7 +682,7 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) "\t" "disk %p, entries %" PRIu32, (void *)page->dsk, entries)); WT_RET(ds->f(ds, ", %s", __wt_page_is_modified(page) ? "dirty" : "clean")); - WT_RET(ds->f(ds, ", %s", __wt_fair_islocked( + WT_RET(ds->f(ds, ", %s", __wt_rwlock_islocked( session, &page->page_lock) ? "locked" : "unlocked")); if (F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS)) diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index 4b132e311c5..162bc07a1c2 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -61,7 +61,7 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) */ WT_ASSERT(session, !__wt_page_is_modified(page)); WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU)); - WT_ASSERT(session, !__wt_fair_islocked(session, &page->page_lock)); + WT_ASSERT(session, !__wt_rwlock_islocked(session, &page->page_lock)); #ifdef HAVE_DIAGNOSTIC { diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 5b15a1224ea..700c2f3b192 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -1287,12 +1287,12 @@ __split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock, return (EBUSY); if (trylock) - WT_RET(__wt_fair_trylock(session, &parent->page_lock)); + WT_RET(__wt_try_writelock(session, &parent->page_lock)); else - __wt_fair_lock(session, &parent->page_lock); + __wt_writelock(session, &parent->page_lock); if (parent == ref->home) break; - __wt_fair_unlock(session, &parent->page_lock); + __wt_writeunlock(session, &parent->page_lock); } /* @@ -1315,7 +1315,7 @@ __split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock, *parentp = parent; return (0); -err: __wt_fair_unlock(session, &parent->page_lock); +err: __wt_writeunlock(session, &parent->page_lock); return (ret); } @@ -1331,7 +1331,7 @@ __split_internal_unlock(WT_SESSION_IMPL *session, WT_PAGE *parent, bool hazard) if (hazard) ret = __wt_hazard_clear(session, parent); - __wt_fair_unlock(session, &parent->page_lock); + __wt_writeunlock(session, &parent->page_lock); return (ret); } diff --git a/src/include/btmem.h b/src/include/btmem.h index 817ce892952..a9d190c3b09 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -584,9 +584,10 @@ struct __wt_page { /* * Used to protect and co-ordinate splits for internal pages and - * reconciliation for all pages. + * reconciliation for all pages. Only used to co-ordinate among the + * uncommon cases that require exclusive access to a page. */ - WT_FAIR_LOCK page_lock; + WT_RWLOCK page_lock; /* * The page's read generation acts as an LRU value for each page in the diff --git a/src/include/extern.h b/src/include/extern.h index a3916a31542..796cf9a5f71 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -667,6 +667,7 @@ extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) WT_GC extern void __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); extern void __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); extern void __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp); +extern bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); extern uint32_t __wt_nlpo2_round(uint32_t v); extern uint32_t __wt_nlpo2(uint32_t v); extern uint32_t __wt_log2_int(uint32_t n); diff --git a/src/include/mutex.h b/src/include/mutex.h index 04679884930..f0f8173bad4 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -42,7 +42,7 @@ typedef union { /* Read/write lock */ struct { uint16_t writers; /* Now serving for writers */ uint16_t readers; /* Now serving for readers */ - uint16_t users; /* Next available ticket number */ + uint16_t next; /* Next available ticket number */ uint16_t __notused; /* Padding */ } s; } wt_rwlock_t; @@ -59,24 +59,6 @@ struct __wt_rwlock { }; /* - * A light weight lock that can be used to replace spinlocks if fairness is - * necessary. Implements a ticket-based back off spin lock. - * The fields are available as a union to allow for atomically setting - * the state of the entire lock. - */ -struct __wt_fair_lock { - union { - uint32_t lock; - struct { - uint16_t owner; /* Ticket for current owner */ - uint16_t waiter; /* Last allocated ticket */ - } s; - } u; -#define fair_lock_owner u.s.owner -#define fair_lock_waiter u.s.waiter -}; - -/* * Spin locks: * * WiredTiger uses spinlocks for fast mutual exclusion (where operations done diff --git a/src/include/mutex.i b/src/include/mutex.i index e7c78fdb484..cb1847d9991 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -255,99 +255,3 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) #error Unknown spinlock type #endif - -/* - * __wt_fair_trylock -- - * Try to get a lock - give up if it is not immediately available. - */ -static inline int -__wt_fair_trylock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) -{ - WT_FAIR_LOCK new, old; - - WT_UNUSED(session); - - old = new = *lock; - - /* Exit early if there is no chance we can get the lock. */ - if (old.fair_lock_waiter != old.fair_lock_owner) - return (EBUSY); - - /* The replacement lock value is a result of allocating a new ticket. */ - ++new.fair_lock_waiter; - return (__wt_atomic_cas32( - &lock->u.lock, old.u.lock, new.u.lock) ? 0 : EBUSY); -} - -/* - * __wt_fair_lock -- - * Get a lock. - */ -static inline void -__wt_fair_lock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) -{ - uint16_t ticket; - int pause_cnt; - - WT_UNUSED(session); - - /* - * Possibly wrap: if we have more than 64K lockers waiting, the ticket - * value will wrap and two lockers will simultaneously be granted the - * lock. - */ - ticket = __wt_atomic_fetch_add16(&lock->fair_lock_waiter, 1); - for (pause_cnt = 0; ticket != lock->fair_lock_owner;) { - /* - * We failed to get the lock; pause before retrying and if we've - * paused enough, sleep so we don't burn CPU to no purpose. This - * situation happens if there are more threads than cores in the - * system and we're thrashing on shared resources. - */ - if (++pause_cnt < WT_THOUSAND) - WT_PAUSE(); - else - __wt_sleep(0, 10); - } - - /* - * Applications depend on a barrier here so that operations holding the - * lock see consistent data. - */ - WT_READ_BARRIER(); -} - -/* - * __wt_fair_unlock -- - * Release a shared lock. - */ -static inline void -__wt_fair_unlock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) -{ - WT_UNUSED(session); - - /* - * Ensure that all updates made while the lock was held are visible to - * the next thread to acquire the lock. - */ - WT_WRITE_BARRIER(); - - /* - * We have exclusive access - the update does not need to be atomic. - */ - ++lock->fair_lock_owner; -} - -#ifdef HAVE_DIAGNOSTIC -/* - * __wt_fair_islocked -- - * Test whether the lock is currently held. - */ -static inline bool -__wt_fair_islocked(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) -{ - WT_UNUSED(session); - - return (lock->fair_lock_waiter != lock->fair_lock_owner); -} -#endif diff --git a/src/include/serial.i b/src/include/serial.i index ddbf3c17bb8..982f196b0b8 100644 --- a/src/include/serial.i +++ b/src/include/serial.i @@ -316,11 +316,11 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, } /* If we can't lock it, don't scan, that's okay. */ - if (__wt_fair_trylock(session, &page->page_lock) != 0) + if (__wt_try_writelock(session, &page->page_lock) != 0) return (0); obsolete = __wt_update_obsolete_check(session, page, upd->next); - __wt_fair_unlock(session, &page->page_lock); + __wt_writeunlock(session, &page->page_lock); if (obsolete != NULL) __wt_update_obsolete_free(session, page, obsolete); diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index c5337967f22..9b3460d0f9e 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -176,8 +176,6 @@ struct __wt_ext; typedef struct __wt_ext WT_EXT; struct __wt_extlist; typedef struct __wt_extlist WT_EXTLIST; -struct __wt_fair_lock; - typedef struct __wt_fair_lock WT_FAIR_LOCK; struct __wt_fh; typedef struct __wt_fh WT_FH; struct __wt_file_handle_inmem; diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 4e5537dbcdf..9c742476910 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -372,7 +372,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, * In-memory splits: reconciliation of an internal page cannot handle * a child page splitting during the reconciliation. */ - __wt_fair_lock(session, &page->page_lock); + __wt_writelock(session, &page->page_lock); /* * Check that transaction time always moves forward for a given page. @@ -386,7 +386,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, /* Initialize the reconciliation structure for each new run. */ if ((ret = __rec_write_init( session, ref, flags, salvage, &session->reconcile)) != 0) { - __wt_fair_unlock(session, &page->page_lock); + __wt_writeunlock(session, &page->page_lock); return (ret); } r = session->reconcile; @@ -427,7 +427,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_TRET(__rec_write_wrapup_err(session, r, page)); /* Release the reconciliation lock. */ - __wt_fair_unlock(session, &page->page_lock); + __wt_writeunlock(session, &page->page_lock); /* Update statistics. */ WT_STAT_FAST_CONN_INCR(session, rec_pages); diff --git a/src/support/mtx_rw.c b/src/support/mtx_rw.c index 6298a9dc52b..0bdde81c32f 100644 --- a/src/support/mtx_rw.c +++ b/src/support/mtx_rw.c @@ -45,19 +45,19 @@ * struct { * uint16_t writers; Now serving for writers * uint16_t readers; Now serving for readers - * uint16_t users; Next available ticket number + * uint16_t next; Next available ticket number * uint16_t __notused; Padding * } * * First, imagine a store's 'take a number' ticket algorithm. A customer takes * a unique ticket number and customers are served in ticket order. In the data * structure, 'writers' is the next writer to be served, 'readers' is the next - * reader to be served, and 'users' is the next available ticket number. + * reader to be served, and 'next' is the next available ticket number. * * Next, consider exclusive (write) locks. The 'now serving' number for writers * is 'writers'. To lock, 'take a number' and wait until that number is being * served; more specifically, atomically copy and increment the current value of - * 'users', and then wait until 'writers' equals that copied number. + * 'next', and then wait until 'writers' equals that copied number. * * Shared (read) locks are similar. Like writers, readers atomically get the * next number available. However, instead of waiting for 'writers' to equal @@ -74,7 +74,7 @@ * * For example, consider the following read (R) and write (W) lock requests: * - * writers readers users + * writers readers next * 0 0 0 * R: ticket 0, readers match OK 0 1 1 * R: ticket 1, readers match OK 0 2 2 @@ -92,7 +92,7 @@ * and the next ticket holder (reader or writer) will unblock when the writer * unlocks. An example, continuing from the last line of the above example: * - * writers readers users + * writers readers next * W: ticket 3, writers match OK 3 3 4 * R: ticket 4, readers no match block 3 3 5 * R: ticket 5, readers no match block 3 3 6 @@ -101,8 +101,8 @@ * R: ticket 4, readers match OK 4 5 7 * R: ticket 5, readers match OK 4 6 7 * - * The 'users' field is a 2-byte value so the available ticket number wraps at - * 64K requests. If a thread's lock request is not granted until the 'users' + * The 'next' field is a 2-byte value so the available ticket number wraps at + * 64K requests. If a thread's lock request is not granted until the 'next' * field cycles and the same ticket is taken by another thread, we could grant * a lock to two separate threads at the same time, and bad things happen: two * writer threads or a reader thread and a writer thread would run in parallel, @@ -155,14 +155,14 @@ __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * Do the cheap test to see if this can possibly succeed (and confirm * the lock is in the correct state to grant this read lock). */ - if (old.s.readers != old.s.users) + if (old.s.readers != old.s.next) return (EBUSY); /* * The replacement lock value is a result of allocating a new ticket and * incrementing the reader value to match it. */ - new.s.readers = new.s.users = old.s.users + 1; + new.s.readers = new.s.next = old.s.next + 1; return (__wt_atomic_cas64(&l->u, old.u, new.u) ? 0 : EBUSY); } @@ -188,7 +188,7 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * value will wrap and two lockers will simultaneously be granted the * lock. */ - ticket = __wt_atomic_fetch_add16(&l->s.users, 1); + ticket = __wt_atomic_fetch_add16(&l->s.next, 1); for (pause_cnt = 0; ticket != l->s.readers;) { /* * We failed to get the lock; pause before retrying and if we've @@ -259,11 +259,11 @@ __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * Do the cheap test to see if this can possibly succeed (and confirm * the lock is in the correct state to grant this write lock). */ - if (old.s.writers != old.s.users) + if (old.s.writers != old.s.next) return (EBUSY); /* The replacement lock value is a result of allocating a new ticket. */ - ++new.s.users; + ++new.s.next; return (__wt_atomic_cas64(&l->u, old.u, new.u) ? 0 : EBUSY); } @@ -287,7 +287,7 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * value will wrap and two lockers will simultaneously be granted the * lock. */ - ticket = __wt_atomic_fetch_add16(&l->s.users, 1); + ticket = __wt_atomic_fetch_add16(&l->s.next, 1); for (pause_cnt = 0; ticket != l->s.writers;) { /* * We failed to get the lock; pause before retrying and if we've @@ -361,3 +361,21 @@ __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp) __wt_free(session, rwlock); } + +#ifdef HAVE_DIAGNOSTIC +/* + * __wt_rwlock_islocked -- + * Return if a read/write lock is currently locked for reading or writing. + */ +bool +__wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +{ + wt_rwlock_t *l; + + WT_UNUSED(session); + + l = &rwlock->rwlock; + + return (l->s.writers != l->s.next || l->s.readers != l->s.next); +} +#endif |