summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2017-02-01 12:11:48 +1100
committerGitHub <noreply@github.com>2017-02-01 12:11:48 +1100
commit1e24579efee68f6fdb6a4c582275a50d95d7eb81 (patch)
tree04dbe334b08a79a3e1cd1294a43602c2fbf59a07
parent0a70661a0d33c9705509955baafded2855054a29 (diff)
downloadmongo-1e24579efee68f6fdb6a4c582275a50d95d7eb81.tar.gz
WT-3115 Convert the dhandle list lock into a read/write lock. (#3236)
It was a spinlock, but most acquirers only need shared access and it can be a contention point in many-table workloads. Split uses of the handle list lock into small operations. In particular, only hold the handle list lock to get the "next" handle, not for loops over all the handles in the system. Update statistics around handle list lock and corresponding doc.
-rw-r--r--dist/flags.py3
-rwxr-xr-xdist/s_stat3
-rw-r--r--dist/stat_data.py4
-rw-r--r--src/conn/conn_dhandle.c55
-rw-r--r--src/conn/conn_handle.c4
-rw-r--r--src/conn/conn_stat.c8
-rw-r--r--src/conn/conn_sweep.c2
-rw-r--r--src/cursor/cur_backup.c8
-rw-r--r--src/docs/upgrading.dox6
-rw-r--r--src/evict/evict_lru.c50
-rw-r--r--src/evict/evict_stat.c2
-rw-r--r--src/include/cache.i2
-rw-r--r--src/include/connection.h6
-rw-r--r--src/include/dhandle.h18
-rw-r--r--src/include/extern.h1
-rw-r--r--src/include/flags.h33
-rw-r--r--src/include/schema.h72
-rw-r--r--src/include/stat.h4
-rw-r--r--src/include/wiredtiger.in218
-rw-r--r--src/lsm/lsm_cursor.c4
-rw-r--r--src/lsm/lsm_manager.c12
-rw-r--r--src/lsm/lsm_stat.c4
-rw-r--r--src/lsm/lsm_tree.c63
-rw-r--r--src/lsm/lsm_work_unit.c4
-rw-r--r--src/schema/schema_drop.c2
-rw-r--r--src/schema/schema_rename.c2
-rw-r--r--src/schema/schema_worker.c2
-rw-r--r--src/session/session_dhandle.c43
-rw-r--r--src/support/stat.c16
-rw-r--r--src/txn/txn_ckpt.c5
30 files changed, 359 insertions, 297 deletions
diff --git a/dist/flags.py b/dist/flags.py
index 55ce233e60d..216f7c29e0a 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -117,7 +117,8 @@ flags = {
'SESSION_CAN_WAIT',
'SESSION_INTERNAL',
'SESSION_LOCKED_CHECKPOINT',
- 'SESSION_LOCKED_HANDLE_LIST',
+ 'SESSION_LOCKED_HANDLE_LIST_READ',
+ 'SESSION_LOCKED_HANDLE_LIST_WRITE',
'SESSION_LOCKED_METADATA',
'SESSION_LOCKED_PASS',
'SESSION_LOCKED_SCHEMA',
diff --git a/dist/s_stat b/dist/s_stat
index 5d5937e1833..6aeeca6faa6 100755
--- a/dist/s_stat
+++ b/dist/s_stat
@@ -25,9 +25,6 @@ cat << UNUSED_STAT_FIELDS
lock_checkpoint_count
lock_checkpoint_wait_application
lock_checkpoint_wait_internal
-lock_handle_list_count
-lock_handle_list_wait_application
-lock_handle_list_wait_internal
lock_metadata_count
lock_metadata_wait_application
lock_metadata_wait_internal
diff --git a/dist/stat_data.py b/dist/stat_data.py
index 0af5d6d017e..a4d92345f88 100644
--- a/dist/stat_data.py
+++ b/dist/stat_data.py
@@ -288,9 +288,7 @@ connection_stats = [
LockStat('lock_checkpoint_count', 'checkpoint lock acquisitions'),
LockStat('lock_checkpoint_wait_application', 'checkpoint lock application thread wait time (usecs)'),
LockStat('lock_checkpoint_wait_internal', 'checkpoint lock internal thread wait time (usecs)'),
- LockStat('lock_handle_list_count', 'handle-list lock acquisitions'),
- LockStat('lock_handle_list_wait_application', 'handle-list lock application thread wait time (usecs)'),
- LockStat('lock_handle_list_wait_internal', 'handle-list lock internal thread wait time (usecs)'),
+ LockStat('lock_handle_list_wait_eviction', 'handle-list lock eviction thread wait time (usecs)'),
LockStat('lock_metadata_count', 'metadata lock acquisitions'),
LockStat('lock_metadata_wait_application', 'metadata lock application thread wait time (usecs)'),
LockStat('lock_metadata_wait_internal', 'metadata lock internal thread wait time (usecs)'),
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index b2f4bb04ce4..866b8633f71 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -25,21 +25,19 @@ __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle)
}
/*
- * __conn_dhandle_alloc --
+ * __wt_conn_dhandle_alloc --
* Allocate a new data handle and return it linked into the connection's
* list.
*/
-static int
-__conn_dhandle_alloc(WT_SESSION_IMPL *session,
- const char *uri, const char *checkpoint, WT_DATA_HANDLE **dhandlep)
+int
+__wt_conn_dhandle_alloc(
+ WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
{
WT_BTREE *btree;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
uint64_t bucket;
- *dhandlep = NULL;
-
WT_RET(__wt_calloc_one(session, &dhandle));
__wt_rwlock_init(session, &dhandle->rwlock);
@@ -75,7 +73,7 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session,
bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
WT_CONN_DHANDLE_INSERT(S2C(session), dhandle, bucket);
- *dhandlep = dhandle;
+ session->dhandle = dhandle;
return (0);
err: __conn_dhandle_destroy(session, dhandle);
@@ -122,10 +120,7 @@ __wt_conn_dhandle_find(
}
}
- WT_RET(__conn_dhandle_alloc(session, uri, checkpoint, &dhandle));
-
- session->dhandle = dhandle;
- return (0);
+ return (WT_NOTFOUND);
}
/*
@@ -419,12 +414,11 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri,
{
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
uint64_t bucket;
conn = S2C(session);
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
-
/*
* If we're given a URI, then we walk only the hash list for that
* name. If we don't have a URI we walk the entire dhandle list.
@@ -432,29 +426,42 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri,
if (uri != NULL) {
bucket =
__wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
- TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) {
+
+ for (dhandle = NULL;;) {
+ WT_WITH_HANDLE_LIST_READ_LOCK(session,
+ WT_DHANDLE_NEXT(session, dhandle,
+ &conn->dhhash[bucket], hashq));
+ if (dhandle == NULL)
+ return (0);
+
if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
dhandle->checkpoint != NULL ||
strcmp(uri, dhandle->name) != 0)
continue;
- WT_RET(__conn_btree_apply_internal(
- session, dhandle, file_func, name_func, cfg));
+ WT_ERR(__conn_btree_apply_internal(session,
+ dhandle, file_func, name_func, cfg));
}
} else {
- TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
+ for (dhandle = NULL;;) {
+ WT_WITH_HANDLE_LIST_READ_LOCK(session,
+ WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q));
+ if (dhandle == NULL)
+ return (0);
+
if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
dhandle->checkpoint != NULL ||
!WT_PREFIX_MATCH(dhandle->name, "file:") ||
WT_IS_METADATA(dhandle))
continue;
- WT_RET(__conn_btree_apply_internal(
- session, dhandle, file_func, name_func, cfg));
+ WT_ERR(__conn_btree_apply_internal(session,
+ dhandle, file_func, name_func, cfg));
}
}
- return (0);
+err: WT_DHANDLE_RELEASE(dhandle);
+ return (ret);
}
/*
@@ -473,7 +480,8 @@ __wt_conn_dhandle_close_all(
conn = S2C(session);
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
+ WT_ASSERT(session,
+ F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
WT_ASSERT(session, session->dhandle == NULL);
bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
@@ -534,7 +542,8 @@ __conn_dhandle_remove(WT_SESSION_IMPL *session, bool final)
dhandle = session->dhandle;
bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
+ WT_ASSERT(session,
+ F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
WT_ASSERT(session, dhandle != conn->cache->evict_file_next);
/* Check if the handle was reacquired by a session while we waited. */
@@ -583,7 +592,7 @@ __wt_conn_dhandle_discard_single(
}
/* Try to remove the handle, protected by the data handle lock. */
- WT_WITH_HANDLE_LIST_LOCK(session,
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
tret = __conn_dhandle_remove(session, final));
if (set_pass_intr)
(void)__wt_atomic_subv32(&S2C(session)->cache->pass_intr, 1);
diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c
index 54bcfd98aba..4f8d89fa9d2 100644
--- a/src/conn/conn_handle.c
+++ b/src/conn/conn_handle.c
@@ -53,7 +53,6 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
/* Spinlocks. */
WT_RET(__wt_spin_init(session, &conn->api_lock, "api"));
WT_SPIN_INIT_TRACKED(session, &conn->checkpoint_lock, checkpoint);
- WT_SPIN_INIT_TRACKED(session, &conn->dhandle_lock, handle_list);
WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor"));
WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list"));
WT_RET(__wt_spin_init(session, &conn->las_lock, "lookaside table"));
@@ -64,6 +63,7 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file"));
/* Read-write locks */
+ __wt_rwlock_init(session, &conn->dhandle_lock);
__wt_rwlock_init(session, &conn->hot_backup_lock);
WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS, &conn->page_lock));
@@ -134,7 +134,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
__wt_spin_destroy(session, &conn->api_lock);
__wt_spin_destroy(session, &conn->block_lock);
__wt_spin_destroy(session, &conn->checkpoint_lock);
- __wt_spin_destroy(session, &conn->dhandle_lock);
+ __wt_rwlock_destroy(session, &conn->dhandle_lock);
__wt_spin_destroy(session, &conn->encryptor_lock);
__wt_spin_destroy(session, &conn->fh_lock);
__wt_rwlock_destroy(session, &conn->hot_backup_lock);
diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c
index 31dc9c45992..d89392b66c6 100644
--- a/src/conn/conn_stat.c
+++ b/src/conn/conn_stat.c
@@ -409,7 +409,6 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
struct timespec ts;
struct tm *tm, _tm;
WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
WT_FSTREAM *log_stream;
conn = S2C(session);
@@ -446,12 +445,9 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
* Lock the schema and walk the list of open handles, dumping
* any that match the list of object sources.
*/
- if (conn->stat_sources != NULL) {
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_conn_btree_apply(
+ if (conn->stat_sources != NULL)
+ WT_RET(__wt_conn_btree_apply(
session, NULL, __statlog_apply, NULL, NULL));
- WT_RET(ret);
- }
/*
* Walk the list of open LSM trees, dumping any that match the
diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c
index f9b7305c7d8..8c186c63939 100644
--- a/src/conn/conn_sweep.c
+++ b/src/conn/conn_sweep.c
@@ -233,7 +233,7 @@ __sweep_remove_handles(WT_SESSION_IMPL *session)
if (!WT_DHANDLE_CAN_DISCARD(dhandle))
continue;
- WT_WITH_HANDLE_LIST_LOCK(session,
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
ret = __sweep_remove_one(session, dhandle));
if (ret == 0)
WT_STAT_CONN_INCR(session, dh_sweep_remove);
diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c
index 08b15e6ca5e..61ced8d11e7 100644
--- a/src/cursor/cur_backup.c
+++ b/src/cursor/cur_backup.c
@@ -346,13 +346,9 @@ __backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
static int
__backup_all(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
-
/* Build a list of the file objects that need to be copied. */
- WT_WITH_HANDLE_LIST_LOCK(session, ret =
- __wt_meta_apply_all(session, NULL, __backup_list_uri_append, NULL));
-
- return (ret);
+ return (__wt_meta_apply_all(
+ session, NULL, __backup_list_uri_append, NULL));
}
/*
diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox
index 4a356f7da61..f463e6bc615 100644
--- a/src/docs/upgrading.dox
+++ b/src/docs/upgrading.dox
@@ -7,6 +7,12 @@
The WiredTiger Utility can now \c truncate an object. Removing all contents
from the specified object.
</dd>
+<dt>Handle list lock statistics</dt>
+<dd>
+In the 2.9.1 release we added statistics tracking handle list lock timing, we
+have switched that lock from a spin lock to a read-write lock, and consequently
+changed the statistics tracking lock related wait time.
+</dd>
</dl>
@section version_291 Upgrading to Version 2.9.1
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 48ea1ccb02b..de1cff85816 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -24,40 +24,40 @@ static int __evict_walk_file(
(S2C(s)->evict_threads.current_threads > 1)
/*
- * __evict_lock_dhandle --
- * Try to get the dhandle lock, with yield and sleep back off.
+ * __evict_lock_handle_list --
+ * Try to get the handle list lock, with yield and sleep back off.
* Keep timing statistics overall.
*/
static int
-__evict_lock_dhandle(WT_SESSION_IMPL *session)
+__evict_lock_handle_list(WT_SESSION_IMPL *session)
{
struct timespec enter, leave;
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- WT_SPINLOCK *dh_lock;
- int64_t **stats;
+ WT_RWLOCK *dh_lock;
u_int spins;
bool dh_stats;
conn = S2C(session);
cache = conn->cache;
dh_lock = &conn->dhandle_lock;
- stats = (int64_t **)conn->stats;
- dh_stats = WT_STAT_ENABLED(session) && dh_lock->stat_count_off != -1;
/*
- * Maintain lock acquisition timing statistics as if this were a
- * regular lock acquisition.
+ * Setup tracking of handle lock acquisition wait time if statistics
+ * are enabled.
*/
+ dh_stats = WT_STAT_ENABLED(session);
+
if (dh_stats)
__wt_epoch(session, &enter);
+
/*
* Use a custom lock acquisition back off loop so the eviction server
* notices any interrupt quickly.
*/
for (spins = 0;
- (ret = __wt_spin_trylock_track(session, dh_lock)) == EBUSY &&
+ (ret = __wt_try_readlock(session, dh_lock)) == EBUSY &&
cache->pass_intr == 0; spins++) {
if (spins < WT_THOUSAND)
__wt_yield();
@@ -70,8 +70,9 @@ __evict_lock_dhandle(WT_SESSION_IMPL *session)
WT_RET(ret);
if (dh_stats) {
__wt_epoch(session, &leave);
- stats[session->stat_bucket][dh_lock->stat_int_usecs_off] +=
- (int64_t)WT_TIMEDIFF_US(leave, enter);
+ WT_STAT_CONN_INCRV(
+ session, lock_handle_list_wait_eviction,
+ (int64_t)WT_TIMEDIFF_US(leave, enter));
}
return (0);
}
@@ -379,18 +380,17 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work)
* otherwise we can block applications evicting large pages.
*/
if (!__wt_cache_stuck(session)) {
-
/*
- * If we gave up acquiring the lock, that indicates a
- * session is waiting for us to clear walks. Do that
- * as part of a normal pass (without the handle list
+ * Try to get the handle list lock: if we give up, that
+ * indicates a session is waiting for us to clear walks. Do
+ * that as part of a normal pass (without the handle list
* lock) to avoid deadlock.
*/
- if ((ret = __evict_lock_dhandle(session)) == EBUSY)
+ if ((ret = __evict_lock_handle_list(session)) == EBUSY)
return (0);
WT_RET(ret);
ret = __evict_clear_all_walks(session);
- __wt_spin_unlock(session, &conn->dhandle_lock);
+ __wt_readunlock(session, &conn->dhandle_lock);
WT_RET(ret);
cache->pages_evicted = 0;
@@ -1321,7 +1321,7 @@ retry: while (slot < max_entries) {
* reference count to keep it alive while we sweep.
*/
if (!dhandle_locked) {
- WT_ERR(__evict_lock_dhandle(session));
+ WT_ERR(__evict_lock_handle_list(session));
dhandle_locked = true;
}
@@ -1400,7 +1400,7 @@ retry: while (slot < max_entries) {
(void)__wt_atomic_addi32(&dhandle->session_inuse, 1);
incr = true;
- __wt_spin_unlock(session, &conn->dhandle_lock);
+ __wt_readunlock(session, &conn->dhandle_lock);
dhandle_locked = false;
/*
@@ -1447,7 +1447,7 @@ retry: while (slot < max_entries) {
}
err: if (dhandle_locked) {
- __wt_spin_unlock(session, &conn->dhandle_lock);
+ __wt_readunlock(session, &conn->dhandle_lock);
dhandle_locked = false;
}
@@ -2319,8 +2319,11 @@ __wt_verbose_dump_cache(WT_SESSION_IMPL *session)
WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
WT_RET(__wt_msg(session, "cache dump"));
- __wt_spin_lock(session, &conn->dhandle_lock);
- TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
+ for (dhandle = NULL;;) {
+ WT_WITH_HANDLE_LIST_READ_LOCK(session,
+ WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q));
+ if (dhandle == NULL)
+ break;
if (!WT_PREFIX_MATCH(dhandle->name, "file:") ||
!F_ISSET(dhandle, WT_DHANDLE_OPEN))
continue;
@@ -2331,7 +2334,6 @@ __wt_verbose_dump_cache(WT_SESSION_IMPL *session)
if (ret != 0)
break;
}
- __wt_spin_unlock(session, &conn->dhandle_lock);
WT_RET(ret);
/*
diff --git a/src/evict/evict_stat.c b/src/evict/evict_stat.c
index 2dd3b1e83a0..7c2d5722a63 100644
--- a/src/evict/evict_stat.c
+++ b/src/evict/evict_stat.c
@@ -134,5 +134,5 @@ __wt_curstat_cache_walk(WT_SESSION_IMPL *session)
WT_STAT_DATA_SET(session,
cache_state_root_size, btree->root.page->memory_footprint);
- WT_WITH_HANDLE_LIST_LOCK(session, __evict_stat_walk(session));
+ __evict_stat_walk(session);
}
diff --git a/src/include/cache.i b/src/include/cache.i
index 17ab39e97d2..d71978ccf35 100644
--- a/src/include/cache.i
+++ b/src/include/cache.i
@@ -364,7 +364,7 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp)
* block eviction), we don't want to highjack the thread for eviction.
*/
if (F_ISSET(session, WT_SESSION_NO_EVICTION |
- WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA))
+ WT_SESSION_LOCKED_HANDLE_LIST_WRITE | WT_SESSION_LOCKED_SCHEMA))
return (0);
/* In memory configurations don't block when the cache is full. */
diff --git a/src/include/connection.h b/src/include/connection.h
index 64ac4271db1..3a719e59608 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -123,12 +123,16 @@ struct __wt_named_extractor {
* main queue and the hashed queue.
*/
#define WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket) do { \
+ WT_ASSERT(session, \
+ F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \
TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \
++conn->dhandle_count; \
} while (0)
#define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) do { \
+ WT_ASSERT(session, \
+ F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \
TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \
--conn->dhandle_count; \
@@ -163,13 +167,13 @@ struct __wt_connection_impl {
WT_SPINLOCK api_lock; /* Connection API spinlock */
WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */
- WT_SPINLOCK dhandle_lock; /* Data handle list spinlock */
WT_SPINLOCK fh_lock; /* File handle queue spinlock */
WT_SPINLOCK metadata_lock; /* Metadata update spinlock */
WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */
WT_SPINLOCK schema_lock; /* Schema operation spinlock */
WT_SPINLOCK table_lock; /* Table creation spinlock */
WT_SPINLOCK turtle_lock; /* Turtle file spinlock */
+ WT_RWLOCK dhandle_lock; /* Data handle list lock */
/*
* We distribute the btree page locks across a set of spin locks. Don't
diff --git a/src/include/dhandle.h b/src/include/dhandle.h
index dcc788f0839..4f318e7bccf 100644
--- a/src/include/dhandle.h
+++ b/src/include/dhandle.h
@@ -37,6 +37,24 @@
#define WT_SESSION_META_DHANDLE(s) \
(((WT_CURSOR_BTREE *)((s)->meta_cursor))->btree->dhandle)
+#define WT_DHANDLE_ACQUIRE(dhandle) \
+ (void)__wt_atomic_add32(&dhandle->session_ref, 1)
+
+#define WT_DHANDLE_RELEASE(dhandle) \
+ (void)__wt_atomic_sub32(&dhandle->session_ref, 1)
+
+#define WT_DHANDLE_NEXT(session, dhandle, head, field) do { \
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));\
+ if (dhandle == NULL) \
+ dhandle = TAILQ_FIRST(head); \
+ else { \
+ WT_DHANDLE_RELEASE(dhandle); \
+ dhandle = TAILQ_NEXT(dhandle, field); \
+ } \
+ if (dhandle != NULL) \
+ WT_DHANDLE_ACQUIRE(dhandle); \
+} while (0)
+
/*
* WT_DATA_HANDLE --
* A handle for a generic named data source.
diff --git a/src/include/extern.h b/src/include/extern.h
index eb2f9a0e784..d7d58c58048 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -254,6 +254,7 @@ extern WT_THREAD_RET __wt_cache_pool_server(void *arg) WT_GCC_FUNC_DECL_ATTRIBUT
extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern void __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
+extern int __wt_conn_dhandle_alloc( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_conn_dhandle_find( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_conn_btree_open( WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
diff --git a/src/include/flags.h b/src/include/flags.h
index 0b92a12c686..5219bf33ed6 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -53,22 +53,23 @@
#define WT_SESSION_CAN_WAIT 0x00000001
#define WT_SESSION_INTERNAL 0x00000002
#define WT_SESSION_LOCKED_CHECKPOINT 0x00000004
-#define WT_SESSION_LOCKED_HANDLE_LIST 0x00000008
-#define WT_SESSION_LOCKED_METADATA 0x00000010
-#define WT_SESSION_LOCKED_PASS 0x00000020
-#define WT_SESSION_LOCKED_SCHEMA 0x00000040
-#define WT_SESSION_LOCKED_SLOT 0x00000080
-#define WT_SESSION_LOCKED_TABLE 0x00000100
-#define WT_SESSION_LOCKED_TURTLE 0x00000200
-#define WT_SESSION_LOGGING_INMEM 0x00000400
-#define WT_SESSION_LOOKASIDE_CURSOR 0x00000800
-#define WT_SESSION_NO_CACHE 0x00001000
-#define WT_SESSION_NO_DATA_HANDLES 0x00002000
-#define WT_SESSION_NO_EVICTION 0x00004000
-#define WT_SESSION_NO_LOGGING 0x00008000
-#define WT_SESSION_NO_SCHEMA_LOCK 0x00010000
-#define WT_SESSION_QUIET_CORRUPT_FILE 0x00020000
-#define WT_SESSION_SERVER_ASYNC 0x00040000
+#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000008
+#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000010
+#define WT_SESSION_LOCKED_METADATA 0x00000020
+#define WT_SESSION_LOCKED_PASS 0x00000040
+#define WT_SESSION_LOCKED_SCHEMA 0x00000080
+#define WT_SESSION_LOCKED_SLOT 0x00000100
+#define WT_SESSION_LOCKED_TABLE 0x00000200
+#define WT_SESSION_LOCKED_TURTLE 0x00000400
+#define WT_SESSION_LOGGING_INMEM 0x00000800
+#define WT_SESSION_LOOKASIDE_CURSOR 0x00001000
+#define WT_SESSION_NO_CACHE 0x00002000
+#define WT_SESSION_NO_DATA_HANDLES 0x00004000
+#define WT_SESSION_NO_EVICTION 0x00008000
+#define WT_SESSION_NO_LOGGING 0x00010000
+#define WT_SESSION_NO_SCHEMA_LOCK 0x00020000
+#define WT_SESSION_QUIET_CORRUPT_FILE 0x00040000
+#define WT_SESSION_SERVER_ASYNC 0x00080000
#define WT_STAT_CLEAR 0x00000001
#define WT_STAT_JSON 0x00000002
#define WT_STAT_ON_CLOSE 0x00000004
diff --git a/src/include/schema.h b/src/include/schema.h
index bb116e5cf2f..fff57951c0e 100644
--- a/src/include/schema.h
+++ b/src/include/schema.h
@@ -78,6 +78,11 @@ struct __wt_table {
*/
#define WT_COLGROUPS(t) WT_MAX((t)->ncolgroups, 1)
+/* Make it simple to check a generic locked state on the handle list lock */
+#define WT_SESSION_LOCKED_HANDLE_LIST \
+ (WT_SESSION_LOCKED_HANDLE_LIST_READ | \
+ WT_SESSION_LOCKED_HANDLE_LIST_WRITE)
+
/*
* WT_WITH_LOCK_WAIT --
* Wait for a lock, perform an operation, drop the lock.
@@ -122,16 +127,47 @@ struct __wt_table {
&S2C(session)->checkpoint_lock, WT_SESSION_LOCKED_CHECKPOINT, op)
/*
- * WT_WITH_HANDLE_LIST_LOCK --
- * Acquire the data handle list lock, perform an operation, drop the lock.
+ * WT_WITH_HANDLE_LIST_READ_LOCK --
+ * Acquire the data handle list lock in shared mode, perform an operation,
+ * drop the lock. The handle list lock is a read-write lock so the
+ * implementation is different to the other lock macros.
*
* Note: always waits because some operations need the handle list lock to
* discard handles, and we only expect it to be held across short
* operations.
*/
-#define WT_WITH_HANDLE_LIST_LOCK(session, op) \
- WT_WITH_LOCK_WAIT(session, \
- &S2C(session)->dhandle_lock, WT_SESSION_LOCKED_HANDLE_LIST, op)
+#define WT_WITH_HANDLE_LIST_READ_LOCK(session, op) do { \
+ if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) { \
+ op; \
+ } else { \
+ __wt_readlock(session, &S2C(session)->dhandle_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ op; \
+ F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ __wt_readunlock(session, &S2C(session)->dhandle_lock); \
+ } \
+} while (0)
+
+/*
+ * WT_WITH_HANDLE_LIST_WRITE_LOCK --
+ * Acquire the data handle list lock in shared mode, perform an operation,
+ * drop the lock. The handle list lock is a read-write lock so the
+ * implementation is different to the other lock macros.
+ * Automatically upgrade from a read lock if held.
+ */
+#define WT_WITH_HANDLE_LIST_WRITE_LOCK(session, op) do { \
+ if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)) { \
+ op; \
+ } else { \
+ WT_ASSERT(session, \
+ !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ));\
+ __wt_writelock(session, &S2C(session)->dhandle_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ op; \
+ F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ __wt_writeunlock(session, &S2C(session)->dhandle_lock); \
+ } \
+} while (0)
/*
* WT_WITH_METADATA_LOCK --
@@ -192,15 +228,21 @@ struct __wt_table {
WT_CONNECTION_IMPL *__conn = S2C(session); \
bool __checkpoint_locked = \
F_ISSET(session, WT_SESSION_LOCKED_CHECKPOINT); \
- bool __handle_locked = \
- F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST); \
+ bool __handle_read_locked = \
+ F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ bool __handle_write_locked = \
+ F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
bool __table_locked = \
F_ISSET(session, WT_SESSION_LOCKED_TABLE); \
bool __schema_locked = \
F_ISSET(session, WT_SESSION_LOCKED_SCHEMA); \
- if (__handle_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST); \
- __wt_spin_unlock(session, &__conn->dhandle_lock); \
+ if (__handle_read_locked) { \
+ F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ __wt_readunlock(session, &__conn->dhandle_lock); \
+ } \
+ if (__handle_write_locked) { \
+ F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ __wt_writeunlock(session, &__conn->dhandle_lock); \
} \
if (__table_locked) { \
F_CLR(session, WT_SESSION_LOCKED_TABLE); \
@@ -227,8 +269,12 @@ struct __wt_table {
__wt_spin_lock(session, &__conn->table_lock); \
F_SET(session, WT_SESSION_LOCKED_TABLE); \
} \
- if (__handle_locked) { \
- __wt_spin_lock(session, &__conn->dhandle_lock); \
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST); \
+ if (__handle_read_locked) { \
+ __wt_readlock(session, &__conn->dhandle_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ } \
+ if (__handle_write_locked) { \
+ __wt_writelock(session, &__conn->dhandle_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
} \
} while (0)
diff --git a/src/include/stat.h b/src/include/stat.h
index fd3e3290d95..8b2e78a4ed5 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -392,9 +392,7 @@ struct __wt_connection_stats {
int64_t lock_checkpoint_count;
int64_t lock_checkpoint_wait_application;
int64_t lock_checkpoint_wait_internal;
- int64_t lock_handle_list_count;
- int64_t lock_handle_list_wait_application;
- int64_t lock_handle_list_wait_internal;
+ int64_t lock_handle_list_wait_eviction;
int64_t lock_metadata_count;
int64_t lock_metadata_wait_application;
int64_t lock_metadata_wait_internal;
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index f05d3d4ab55..d1e3d383396 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -4595,240 +4595,236 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1133
/*! lock: checkpoint lock internal thread wait time (usecs) */
#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1134
-/*! lock: handle-list lock acquisitions */
-#define WT_STAT_CONN_LOCK_HANDLE_LIST_COUNT 1135
-/*! lock: handle-list lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_APPLICATION 1136
-/*! lock: handle-list lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_INTERNAL 1137
+/*! lock: handle-list lock eviction thread wait time (usecs) */
+#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1135
/*! lock: metadata lock acquisitions */
-#define WT_STAT_CONN_LOCK_METADATA_COUNT 1138
+#define WT_STAT_CONN_LOCK_METADATA_COUNT 1136
/*! lock: metadata lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1139
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1137
/*! lock: metadata lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1140
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1138
/*! lock: schema lock acquisitions */
-#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1141
+#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1139
/*! lock: schema lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1142
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1140
/*! lock: schema lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1143
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1141
/*! lock: table lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_COUNT 1144
+#define WT_STAT_CONN_LOCK_TABLE_COUNT 1142
/*!
* lock: table lock application thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1145
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1143
/*!
* lock: table lock internal thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1146
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1144
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1147
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1145
/*! log: consolidated slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1148
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1146
/*! log: consolidated slot join races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1149
+#define WT_STAT_CONN_LOG_SLOT_RACES 1147
/*! log: consolidated slot join transitions */
-#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1150
+#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1148
/*! log: consolidated slot joins */
-#define WT_STAT_CONN_LOG_SLOT_JOINS 1151
+#define WT_STAT_CONN_LOG_SLOT_JOINS 1149
/*! log: consolidated slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1152
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1150
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1153
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1151
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1154
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1152
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1155
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1153
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1156
+#define WT_STAT_CONN_LOG_FLUSH 1154
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1157
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1155
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1158
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1156
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1159
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1157
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1160
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1158
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1161
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1159
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1162
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1160
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1163
+#define WT_STAT_CONN_LOG_SCANS 1161
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1164
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1162
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1165
+#define WT_STAT_CONN_LOG_WRITE_LSN 1163
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1166
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1164
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1167
+#define WT_STAT_CONN_LOG_SYNC 1165
/*! log: log sync time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DURATION 1168
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1166
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1169
+#define WT_STAT_CONN_LOG_SYNC_DIR 1167
/*! log: log sync_dir time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1170
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1168
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1171
+#define WT_STAT_CONN_LOG_WRITES 1169
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1172
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1170
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1173
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1171
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1174
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1172
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1175
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1173
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1176
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1174
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1177
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1175
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1178
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1176
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1179
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1177
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1180
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1178
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1181
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1179
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1182
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1180
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1183
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1181
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1184
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1182
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1185
+#define WT_STAT_CONN_REC_PAGES 1183
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1186
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1184
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1187
+#define WT_STAT_CONN_REC_PAGE_DELETE 1185
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1188
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1186
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1189
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1187
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1190
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1188
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1191
+#define WT_STAT_CONN_SESSION_OPEN 1189
/*! session: table alter failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1192
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1190
/*! session: table alter successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1193
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1191
/*! session: table alter unchanged and skipped */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1194
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1192
/*! session: table compact failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1195
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1193
/*! session: table compact successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1196
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1194
/*! session: table create failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1197
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1195
/*! session: table create successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1198
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1196
/*! session: table drop failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1199
+#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1197
/*! session: table drop successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1200
+#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1198
/*! session: table rebalance failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1201
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1199
/*! session: table rebalance successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1202
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1200
/*! session: table rename failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1203
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1201
/*! session: table rename successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1204
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1202
/*! session: table salvage failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1205
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1203
/*! session: table salvage successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1206
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1204
/*! session: table truncate failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1207
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1205
/*! session: table truncate successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1208
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1206
/*! session: table verify failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1209
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1207
/*! session: table verify successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1210
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1208
/*! thread-state: active filesystem fsync calls */
-#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1211
+#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1209
/*! thread-state: active filesystem read calls */
-#define WT_STAT_CONN_THREAD_READ_ACTIVE 1212
+#define WT_STAT_CONN_THREAD_READ_ACTIVE 1210
/*! thread-state: active filesystem write calls */
-#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1213
+#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1211
/*! thread-yield: application thread time evicting (usecs) */
-#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1214
+#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1212
/*! thread-yield: application thread time waiting for cache (usecs) */
-#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1215
+#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1213
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1216
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1214
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1217
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1215
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1218
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1216
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1219
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1217
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1220
+#define WT_STAT_CONN_PAGE_SLEEP 1218
/*! transaction: number of named snapshots created */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1221
+#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1219
/*! transaction: number of named snapshots dropped */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1222
+#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1220
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1223
+#define WT_STAT_CONN_TXN_BEGIN 1221
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1224
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1222
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1225
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1223
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1226
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1224
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1227
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1225
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1228
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1226
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1229
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1227
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1230
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1228
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1231
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1229
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1232
+#define WT_STAT_CONN_TXN_CHECKPOINT 1230
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1233
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1231
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1234
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1232
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1235
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1233
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1236
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1234
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1237
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1235
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1238
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1236
/*!
* transaction: transaction range of IDs currently pinned by named
* snapshots
*/
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1239
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1237
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1240
+#define WT_STAT_CONN_TXN_SYNC 1238
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1241
+#define WT_STAT_CONN_TXN_COMMIT 1239
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1242
+#define WT_STAT_CONN_TXN_ROLLBACK 1240
/*!
* @}
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index a2511f48e2b..60afbc99ade 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -1692,8 +1692,8 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
bulk = cval.val != 0;
/* Get the LSM tree. */
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_lsm_tree_get(session, uri, bulk, &lsm_tree));
+ ret = __wt_lsm_tree_get(session, uri, bulk, &lsm_tree);
+
/*
* Check whether the exclusive open for a bulk load succeeded, and
* if it did ensure that it's safe to bulk load into the tree.
diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c
index cbd83a5cd30..6dc06146179 100644
--- a/src/lsm/lsm_manager.c
+++ b/src/lsm/lsm_manager.c
@@ -387,8 +387,8 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
__wt_sleep(0, 10000);
if (TAILQ_EMPTY(&conn->lsmqh))
continue;
- __wt_spin_lock(session, &conn->dhandle_lock);
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST);
+ __wt_readlock(session, &conn->dhandle_lock);
+ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
dhandle_locked = true;
TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) {
if (!lsm_tree->active)
@@ -448,14 +448,14 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
session, WT_LSM_WORK_MERGE, 0, lsm_tree));
}
}
- __wt_spin_unlock(session, &conn->dhandle_lock);
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST);
+ __wt_readunlock(session, &conn->dhandle_lock);
+ F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
dhandle_locked = false;
}
err: if (dhandle_locked) {
- __wt_spin_unlock(session, &conn->dhandle_lock);
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST);
+ __wt_readunlock(session, &conn->dhandle_lock);
+ F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
}
return (ret);
}
diff --git a/src/lsm/lsm_stat.c b/src/lsm/lsm_stat.c
index 150de968722..21e8991be94 100644
--- a/src/lsm/lsm_stat.c
+++ b/src/lsm/lsm_stat.c
@@ -33,9 +33,7 @@ __curstat_lsm_init(
"checkpoint=" WT_CHECKPOINT, NULL, NULL };
locked = false;
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree));
- WT_RET(ret);
+ WT_RET(__wt_lsm_tree_get(session, uri, false, &lsm_tree));
WT_ERR(__wt_scr_alloc(session, 0, &uribuf));
/* Propagate all, fast and/or clear to the cursors we open. */
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index 71a981a6284..a9275976023 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -38,7 +38,7 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
/* We may be destroying an lsm_tree before it was added. */
if (F_ISSET(lsm_tree, WT_LSM_TREE_OPEN)) {
WT_ASSERT(session, final ||
- F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
+ F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
TAILQ_REMOVE(&S2C(session)->lsmqh, lsm_tree, q);
}
@@ -321,9 +321,7 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session,
metadata = NULL;
/* If the tree can be opened, it already exists. */
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree));
- if (ret == 0) {
+ if ((ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree)) == 0) {
__wt_lsm_tree_release(session, lsm_tree);
return (exclusive ? EEXIST : 0);
}
@@ -339,7 +337,7 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session,
* error: the returned handle is NULL on error, and the metadata
* tracking macros handle cleaning up on failure.
*/
- WT_WITH_HANDLE_LIST_LOCK(session,
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
ret = __lsm_tree_open(session, uri, true, &lsm_tree));
if (ret == 0)
__wt_lsm_tree_release(session, lsm_tree);
@@ -404,6 +402,9 @@ __lsm_tree_find(WT_SESSION_IMPL *session,
}
*treep = lsm_tree;
+
+ WT_ASSERT(session, lsm_tree->excl_session ==
+ (exclusive ? session : NULL));
return (0);
}
@@ -456,7 +457,8 @@ __lsm_tree_open(WT_SESSION_IMPL *session,
conn = S2C(session);
lsm_tree = NULL;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
+ WT_ASSERT(session,
+ F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
/* Start the LSM manager thread if it isn't running. */
if (__wt_atomic_cas32(&conn->lsm_manager.lsm_workers, 0, 1))
@@ -520,14 +522,21 @@ __wt_lsm_tree_get(WT_SESSION_IMPL *session,
{
WT_DECL_RET;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
-
- ret = __lsm_tree_find(session, uri, exclusive, treep);
+ /*
+ * Dropping and re-acquiring the lock is safe here, since the tree open
+ * call checks to see if another thread beat it to opening the tree
+ * before proceeding.
+ */
+ if (exclusive)
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
+ ret = __lsm_tree_find(session, uri, exclusive, treep));
+ else
+ WT_WITH_HANDLE_LIST_READ_LOCK(session,
+ ret = __lsm_tree_find(session, uri, exclusive, treep));
if (ret == WT_NOTFOUND)
- ret = __lsm_tree_open(session, uri, exclusive, treep);
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
+ ret = __lsm_tree_open(session, uri, exclusive, treep));
- WT_ASSERT(session, ret != 0 ||
- (*treep)->excl_session == (exclusive ? session : NULL));
return (ret);
}
@@ -857,9 +866,7 @@ __wt_lsm_tree_alter(
locked = false;
/* Get the LSM tree. */
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree));
- WT_RET(ret);
+ WT_RET(__wt_lsm_tree_get(session, uri, false, &lsm_tree));
/* Prevent any new opens. */
__wt_lsm_tree_writelock(session, lsm_tree);
@@ -899,9 +906,7 @@ __wt_lsm_tree_drop(
locked = false;
/* Get the LSM tree. */
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_lsm_tree_get(session, name, true, &lsm_tree));
- WT_RET(ret);
+ WT_RET(__wt_lsm_tree_get(session, name, true, &lsm_tree));
WT_ASSERT(session, !lsm_tree->active);
/* Prevent any new opens. */
@@ -934,7 +939,7 @@ __wt_lsm_tree_drop(
WT_ASSERT(session, !lsm_tree->active);
err: if (locked)
__wt_lsm_tree_writeunlock(session, lsm_tree);
- WT_WITH_HANDLE_LIST_LOCK(session,
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
tret = __lsm_tree_discard(session, lsm_tree, false));
WT_TRET(tret);
return (ret);
@@ -960,9 +965,7 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session,
locked = false;
/* Get the LSM tree. */
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_lsm_tree_get(session, olduri, true, &lsm_tree));
- WT_RET(ret);
+ WT_RET(__wt_lsm_tree_get(session, olduri, true, &lsm_tree));
/* Prevent any new opens. */
__wt_lsm_tree_writelock(session, lsm_tree);
@@ -1007,7 +1010,7 @@ err: if (locked)
* Discard this LSM tree structure. The first operation on the renamed
* tree will create a new one.
*/
- WT_WITH_HANDLE_LIST_LOCK(session,
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
tret = __lsm_tree_discard(session, lsm_tree, false));
WT_TRET(tret);
return (ret);
@@ -1032,9 +1035,7 @@ __wt_lsm_tree_truncate(
locked = false;
/* Get the LSM tree. */
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_lsm_tree_get(session, name, true, &lsm_tree));
- WT_RET(ret);
+ WT_RET(__wt_lsm_tree_get(session, name, true, &lsm_tree));
/* Prevent any new opens. */
__wt_lsm_tree_writelock(session, lsm_tree);
@@ -1068,7 +1069,7 @@ err: if (locked)
* the last good version of the metadata will be used, resulting
* in a valid (not truncated) tree.
*/
- WT_WITH_HANDLE_LIST_LOCK(session,
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
tret = __lsm_tree_discard(session, lsm_tree, false));
WT_TRET(tret);
}
@@ -1157,9 +1158,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
/* Tell __wt_schema_worker not to look inside the LSM tree. */
*skipp = true;
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_lsm_tree_get(session, name, false, &lsm_tree));
- WT_RET(ret);
+ WT_RET(__wt_lsm_tree_get(session, name, false, &lsm_tree));
if (!F_ISSET(S2C(session), WT_CONN_LSM_MERGE))
WT_ERR_MSG(session, EINVAL,
@@ -1356,9 +1355,7 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session,
locked = false;
exclusive = FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE);
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_lsm_tree_get(session, uri, exclusive, &lsm_tree));
- WT_RET(ret);
+ WT_RET(__wt_lsm_tree_get(session, uri, exclusive, &lsm_tree));
/*
* We mark that we're busy using the tree to coordinate
diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c
index d9c185a3f58..4349acf7b55 100644
--- a/src/lsm/lsm_work_unit.c
+++ b/src/lsm/lsm_work_unit.c
@@ -276,7 +276,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
!F_ISSET(chunk, WT_LSM_CHUNK_STABLE) &&
!chunk->evicted) {
- WT_WITH_HANDLE_LIST_LOCK(session,
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
ret = __lsm_discard_handle(session, chunk->uri, NULL));
if (ret == 0)
chunk->evicted = 1;
@@ -517,7 +517,7 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri)
*
* This will fail with EBUSY if the file is still in use.
*/
- WT_WITH_HANDLE_LIST_LOCK(session,
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
ret = __lsm_discard_handle(session, uri, WT_CHECKPOINT));
WT_RET(ret);
diff --git a/src/schema/schema_drop.c b/src/schema/schema_drop.c
index c1a4f257648..49801e4e5f9 100644
--- a/src/schema/schema_drop.c
+++ b/src/schema/schema_drop.c
@@ -30,7 +30,7 @@ __drop_file(
WT_RET(__wt_schema_backup_check(session, filename));
/* Close all btree handles associated with this file. */
- WT_WITH_HANDLE_LIST_LOCK(session,
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
ret = __wt_conn_dhandle_close_all(session, uri, force));
WT_RET(ret);
diff --git a/src/schema/schema_rename.c b/src/schema/schema_rename.c
index f512482c162..a374f4c2831 100644
--- a/src/schema/schema_rename.c
+++ b/src/schema/schema_rename.c
@@ -33,7 +33,7 @@ __rename_file(
WT_RET(__wt_schema_backup_check(session, filename));
WT_RET(__wt_schema_backup_check(session, newfile));
/* Close any btree handles in the file. */
- WT_WITH_HANDLE_LIST_LOCK(session,
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
ret = __wt_conn_dhandle_close_all(session, uri, false));
WT_ERR(ret);
diff --git a/src/schema/schema_worker.c b/src/schema/schema_worker.c
index fb7f8cec074..e5f71b5d56f 100644
--- a/src/schema/schema_worker.c
+++ b/src/schema/schema_worker.c
@@ -49,7 +49,7 @@ __wt_schema_worker(WT_SESSION_IMPL *session,
* any open file handles, including checkpoints.
*/
if (FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE)) {
- WT_WITH_HANDLE_LIST_LOCK(session,
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
ret = __wt_conn_dhandle_close_all(
session, uri, false));
WT_ERR(ret);
diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c
index f1251794b89..ee9bddbfc19 100644
--- a/src/session/session_dhandle.c
+++ b/src/session/session_dhandle.c
@@ -44,8 +44,7 @@ __session_discard_dhandle(
TAILQ_REMOVE(&session->dhandles, dhandle_cache, q);
TAILQ_REMOVE(&session->dhhash[bucket], dhandle_cache, hashq);
- (void)__wt_atomic_sub32(&dhandle_cache->dhandle->session_ref, 1);
-
+ WT_DHANDLE_RELEASE(dhandle_cache->dhandle);
__wt_overwrite_and_free(session, dhandle_cache);
}
@@ -412,17 +411,27 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session)
/*
* __session_find_shared_dhandle --
* Search for a data handle in the connection and add it to a session's
- * cache. Since the data handle isn't locked, this must be called holding
- * the handle list lock, and we must increment the handle's reference
- * count before releasing it.
+ * cache. We must increment the handle's reference count while holding
+ * the handle list lock.
*/
static int
__session_find_shared_dhandle(
WT_SESSION_IMPL *session, const char *uri, const char *checkpoint)
{
- WT_RET(__wt_conn_dhandle_find(session, uri, checkpoint));
- (void)__wt_atomic_add32(&session->dhandle->session_ref, 1);
- return (0);
+ WT_DECL_RET;
+
+ WT_WITH_HANDLE_LIST_READ_LOCK(session,
+ if ((ret = __wt_conn_dhandle_find(session, uri, checkpoint)) == 0)
+ WT_DHANDLE_ACQUIRE(session->dhandle));
+
+ if (ret != WT_NOTFOUND)
+ return (ret);
+
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
+ if ((ret = __wt_conn_dhandle_alloc(session, uri, checkpoint)) == 0)
+ WT_DHANDLE_ACQUIRE(session->dhandle));
+
+ return (ret);
}
/*
@@ -450,16 +459,16 @@ __session_get_dhandle(
* We didn't find a match in the session cache, search the shared
* handle list and cache the handle we find.
*/
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __session_find_shared_dhandle(session, uri, checkpoint));
- WT_RET(ret);
+ WT_RET(__session_find_shared_dhandle(session, uri, checkpoint));
/*
* Fixup the reference count on failure (we incremented the reference
* count while holding the handle-list lock).
*/
- if ((ret = __session_add_dhandle(session)) != 0)
- (void)__wt_atomic_sub32(&session->dhandle->session_ref, 1);
+ if ((ret = __session_add_dhandle(session)) != 0) {
+ WT_DHANDLE_RELEASE(session->dhandle);
+ session->dhandle = NULL;
+ }
return (ret);
}
@@ -505,17 +514,15 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
* reopen handles in the meantime. A combination of the schema
* and handle list locks are used to enforce this.
*/
- if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) ||
- !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) {
+ if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
dhandle->excl_session = NULL;
dhandle->excl_ref = 0;
F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
__wt_writeunlock(session, &dhandle->rwlock);
WT_WITH_SCHEMA_LOCK(session,
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __wt_session_get_btree(
- session, uri, checkpoint, cfg, flags)));
+ ret = __wt_session_get_btree(
+ session, uri, checkpoint, cfg, flags));
return (ret);
}
diff --git a/src/support/stat.c b/src/support/stat.c
index 167d17137ce..fd38e1b79ee 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -759,9 +759,7 @@ static const char * const __stats_connection_desc[] = {
"lock: checkpoint lock acquisitions",
"lock: checkpoint lock application thread wait time (usecs)",
"lock: checkpoint lock internal thread wait time (usecs)",
- "lock: handle-list lock acquisitions",
- "lock: handle-list lock application thread wait time (usecs)",
- "lock: handle-list lock internal thread wait time (usecs)",
+ "lock: handle-list lock eviction thread wait time (usecs)",
"lock: metadata lock acquisitions",
"lock: metadata lock application thread wait time (usecs)",
"lock: metadata lock internal thread wait time (usecs)",
@@ -1044,9 +1042,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->lock_checkpoint_count = 0;
stats->lock_checkpoint_wait_application = 0;
stats->lock_checkpoint_wait_internal = 0;
- stats->lock_handle_list_count = 0;
- stats->lock_handle_list_wait_application = 0;
- stats->lock_handle_list_wait_internal = 0;
+ stats->lock_handle_list_wait_eviction = 0;
stats->lock_metadata_count = 0;
stats->lock_metadata_wait_application = 0;
stats->lock_metadata_wait_internal = 0;
@@ -1351,12 +1347,8 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, lock_checkpoint_wait_application);
to->lock_checkpoint_wait_internal +=
WT_STAT_READ(from, lock_checkpoint_wait_internal);
- to->lock_handle_list_count +=
- WT_STAT_READ(from, lock_handle_list_count);
- to->lock_handle_list_wait_application +=
- WT_STAT_READ(from, lock_handle_list_wait_application);
- to->lock_handle_list_wait_internal +=
- WT_STAT_READ(from, lock_handle_list_wait_internal);
+ to->lock_handle_list_wait_eviction +=
+ WT_STAT_READ(from, lock_handle_list_wait_eviction);
to->lock_metadata_count += WT_STAT_READ(from, lock_metadata_count);
to->lock_metadata_wait_application +=
WT_STAT_READ(from, lock_metadata_wait_application);
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 3b19162fd3d..7b33b0c7788 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -640,9 +640,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ASSERT(session, session->ckpt_handle_next == 0);
WT_WITH_SCHEMA_LOCK(session,
WT_WITH_TABLE_LOCK(session,
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __checkpoint_apply_all(
- session, cfg, __wt_checkpoint_get_handles, NULL))));
+ ret = __checkpoint_apply_all(
+ session, cfg, __wt_checkpoint_get_handles, NULL)));
WT_ERR(ret);
/*