summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2015-04-27 17:17:31 +1000
committerMichael Cahill <michael.cahill@mongodb.com>2015-04-27 17:41:21 +1000
commit7ca9da48efb20d8e852f8393f2714d8051b139d3 (patch)
tree54624a9cebd2487cc184b7f91d71301d1380b272
parent26fd30aa13358ebec87ffd3d449fba11a2d282c1 (diff)
downloadmongo-7ca9da48efb20d8e852f8393f2714d8051b139d3.tar.gz
Merge pull request #1894 from wiredtiger/tree-discard-background
Discard trees from cache in the background Conflicts: src/conn/conn_dhandle.c
-rw-r--r--src/conn/conn_dhandle.c112
-rw-r--r--src/conn/conn_sweep.c228
-rw-r--r--src/evict/evict_lru.c13
-rw-r--r--src/include/dhandle.h13
-rw-r--r--src/include/extern.h6
-rw-r--r--src/lsm/lsm_work_unit.c2
-rw-r--r--src/meta/meta_table.c2
-rw-r--r--src/session/session_dhandle.c75
-rw-r--r--src/txn/txn_ckpt.c6
9 files changed, 294 insertions, 163 deletions
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 0eb3389de34..65b9af1aa01 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -70,28 +70,23 @@ __conn_dhandle_open_lock(
return (EBUSY);
/*
- * If the handle is open, try to get a read lock and recheck.
+ * If the handle is open, get a read lock and recheck.
*
- * Try to get a read lock if we want exclusive access and failed
+ * Wait for a read lock if we want exclusive access and failed
* to get it: the sweep server may be closing this handle, and
- * we need to wait for it to complete. If we want exclusive
- * access and find the handle open once we get the read lock,
- * give up: some other thread has it locked for real.
+ * we need to wait for it to release its lock. If we want
+ * exclusive access and find the handle open once we get the
+ * read lock, give up: some other thread has it locked for real.
*/
if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
(!want_exclusive || lock_busy)) {
- ret = __wt_try_readlock(session, dhandle->rwlock);
- if (ret == EBUSY) {
- if (want_exclusive &&
- F_ISSET(dhandle, WT_DHANDLE_OPEN))
- return (ret);
- __wt_yield();
- continue;
- }
- WT_RET(ret);
+ WT_RET(__wt_readlock(session, dhandle->rwlock));
is_open = F_ISSET(dhandle, WT_DHANDLE_OPEN) ? 1 : 0;
- if (is_open && !want_exclusive)
+ if (is_open && !want_exclusive) {
+ WT_ASSERT(session,
+ !F_ISSET(dhandle, WT_DHANDLE_DEAD));
return (0);
+ }
WT_RET(__wt_readunlock(session, dhandle->rwlock));
} else
is_open = 0;
@@ -117,6 +112,7 @@ __conn_dhandle_open_lock(
/* We have an exclusive lock, we're done. */
F_SET(dhandle, WT_DHANDLE_EXCLUSIVE);
+ WT_ASSERT(session, !F_ISSET(dhandle, WT_DHANDLE_DEAD));
return (0);
} else if (ret != EBUSY || (is_open && want_exclusive))
return (ret);
@@ -149,13 +145,26 @@ __wt_conn_dhandle_find(WT_SESSION_IMPL *session,
/* Increment the reference count if we already have the btree open. */
bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE;
- SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl)
- if (strcmp(name, dhandle->name) == 0 &&
- ((ckpt == NULL && dhandle->checkpoint == NULL) ||
- (ckpt != NULL && dhandle->checkpoint != NULL &&
- strcmp(ckpt, dhandle->checkpoint) == 0))) {
- session->dhandle = dhandle;
- return (0);
+ if (ckpt == NULL) {
+ SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) {
+ if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
+ continue;
+ if (dhandle->checkpoint == NULL &&
+ strcmp(name, dhandle->name) == 0) {
+ session->dhandle = dhandle;
+ return (0);
+ }
+ }
+ } else
+ SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) {
+ if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
+ continue;
+ if (dhandle->checkpoint != NULL &&
+ strcmp(name, dhandle->name) == 0 &&
+ strcmp(ckpt, dhandle->checkpoint) == 0) {
+ session->dhandle = dhandle;
+ return (0);
+ }
}
return (WT_NOTFOUND);
@@ -238,6 +247,30 @@ err: WT_TRET(__wt_rwlock_destroy(session, &dhandle->rwlock));
}
/*
+ * __conn_dhandle_mark_dead --
+ * Mark a data handle dead.
+ */
+static int
+__conn_dhandle_mark_dead(WT_SESSION_IMPL *session)
+{
+ int evict_reset;
+
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_HANDLE_LIST_LOCKED));
+
+ /*
+ * Handle forced discard (e.g., when dropping a file).
+ *
+ * We need exclusive access to the file -- disable ordinary
+ * eviction and drain any blocks already queued.
+ */
+ WT_RET(__wt_evict_file_exclusive_on(session, &evict_reset));
+ F_SET(session->dhandle, WT_DHANDLE_DEAD);
+ if (evict_reset)
+ __wt_evict_file_exclusive_off(session);
+ return (0);
+}
+
+/*
* __wt_conn_btree_sync_and_close --
* Sync and close the underlying btree handle.
*/
@@ -278,16 +311,25 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int final, int force)
/*
* The close can fail if an update cannot be written, return the EBUSY
* error to our caller for eventual retry.
+ *
+ * If we are forcing the close, just mark the handle dead and the tree
+ * will be discarded later. Don't do this for memory-mapped trees: we
+ * have to close the file handle to allow the file to be removed, but
+ * memory mapped trees contain pointers into memory that will become
+ * invalid if the mapping is closed.
*/
if (!F_ISSET(btree,
WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY))
- WT_ERR(__wt_checkpoint_close(session, final, force));
-
- if (dhandle->checkpoint == NULL)
- --S2C(session)->open_btree_count;
+ WT_ERR(force && (btree->bm == NULL || btree->bm->map == NULL) ?
+ __conn_dhandle_mark_dead(session) :
+ __wt_checkpoint_close(session, final));
WT_TRET(__wt_btree_close(session));
- F_CLR(dhandle, WT_DHANDLE_OPEN);
+ if (!force || final) {
+ F_CLR(dhandle, WT_DHANDLE_OPEN);
+ if (dhandle->checkpoint == NULL)
+ --S2C(session)->open_btree_count;
+ }
F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
err: __wt_spin_unlock(session, &dhandle->close_lock);
@@ -529,6 +571,7 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session,
__wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl)
if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
+ !F_ISSET(dhandle, WT_DHANDLE_DEAD) &&
strcmp(uri, dhandle->name) == 0 &&
(apply_checkpoints || dhandle->checkpoint == NULL))
WT_RET(__conn_btree_apply_internal(
@@ -536,6 +579,7 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session,
} else {
SLIST_FOREACH(dhandle, &conn->dhlh, l)
if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
+ !F_ISSET(dhandle, WT_DHANDLE_DEAD) &&
(apply_checkpoints ||
dhandle->checkpoint == NULL) &&
WT_PREFIX_MATCH(dhandle->name, "file:") &&
@@ -658,7 +702,8 @@ __wt_conn_dhandle_close_all(
bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE;
SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) {
- if (strcmp(dhandle->name, name) != 0)
+ if (strcmp(dhandle->name, name) != 0 ||
+ F_ISSET(dhandle, WT_DHANDLE_DEAD))
continue;
session->dhandle = dhandle;
@@ -730,7 +775,7 @@ __conn_dhandle_remove(WT_SESSION_IMPL *session, int final)
* Close/discard a single data handle.
*/
int
-__wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final)
+__wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final, int force)
{
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
@@ -738,8 +783,9 @@ __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final)
dhandle = session->dhandle;
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
- tret = __wt_conn_btree_sync_and_close(session, final, 0);
+ if (F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
+ (final && F_ISSET(dhandle, WT_DHANDLE_DEAD))) {
+ tret = __wt_conn_btree_sync_and_close(session, final, force);
if (final && tret != 0) {
__wt_err(session, tret,
"Final close of %s failed", dhandle->name);
@@ -803,7 +849,7 @@ restart:
continue;
WT_WITH_DHANDLE(session, dhandle,
- WT_TRET(__wt_conn_dhandle_discard_single(session, 1)));
+ WT_TRET(__wt_conn_dhandle_discard_single(session, 1, 0)));
goto restart;
}
@@ -819,7 +865,7 @@ restart:
/* Close the metadata file handle. */
while ((dhandle = SLIST_FIRST(&conn->dhlh)) != NULL)
WT_WITH_DHANDLE(session, dhandle,
- WT_TRET(__wt_conn_dhandle_discard_single(session, 1)));
+ WT_TRET(__wt_conn_dhandle_discard_single(session, 1, 0)));
return (ret);
}
diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c
index 46f40a6fefd..fc29e0b2e15 100644
--- a/src/conn/conn_sweep.c
+++ b/src/conn/conn_sweep.c
@@ -9,104 +9,87 @@
#include "wt_internal.h"
/*
- * __sweep_remove_handles --
- * Remove closed dhandles from the connection list.
+ * __sweep_mark --
+ * Mark idle handles with a time of death, and note if we see dead
+ * handles.
*/
static int
-__sweep_remove_handles(WT_SESSION_IMPL *session)
+__sweep_mark(WT_SESSION_IMPL *session, int *dead_handlesp)
{
WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle, *dhandle_next;
- WT_DECL_RET;
+ WT_DATA_HANDLE *dhandle;
+ time_t now;
conn = S2C(session);
- dhandle = SLIST_FIRST(&conn->dhlh);
+ *dead_handlesp = 0;
- for (; dhandle != NULL; dhandle = dhandle_next) {
- dhandle_next = SLIST_NEXT(dhandle, l);
+ /* Don't discard handles that have been open recently. */
+ WT_RET(__wt_seconds(session, &now));
+
+ WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps);
+ SLIST_FOREACH(dhandle, &conn->dhlh, l) {
if (WT_IS_METADATA(dhandle))
continue;
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
+ if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
+ ++*dead_handlesp;
continue;
-
- /* Make sure we get exclusive access. */
- if ((ret =
- __wt_try_writelock(session, dhandle->rwlock)) == EBUSY)
+ }
+ if (dhandle->session_inuse != 0 ||
+ now <= dhandle->timeofdeath + conn->sweep_idle_time)
continue;
- WT_RET(ret);
-
- /*
- * If there are no longer any references to the handle in any
- * sessions, attempt to discard it.
- */
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
- dhandle->session_inuse != 0 || dhandle->session_ref != 0) {
- WT_RET(__wt_writeunlock(session, dhandle->rwlock));
+ if (dhandle->timeofdeath == 0) {
+ dhandle->timeofdeath = now;
+ WT_STAT_FAST_CONN_INCR(session, dh_conn_tod);
continue;
}
- WT_WITH_DHANDLE(session, dhandle,
- ret = __wt_conn_dhandle_discard_single(session, 0));
-
- /* If the handle was not successfully discarded, unlock it. */
- if (ret != 0)
- WT_TRET(__wt_writeunlock(session, dhandle->rwlock));
- WT_RET_BUSY_OK(ret);
- WT_STAT_FAST_CONN_INCR(session, dh_conn_ref);
+ /* We now have a candidate to close. */
+ ++*dead_handlesp;
}
- return (ret == EBUSY ? 0 : ret);
+ return (0);
}
/*
- * __sweep --
- * Close unused dhandles on the connection dhandle list.
+ * __sweep_expire --
+ * Mark trees dead if they are clean and haven't been accessed recently,
+ * until we have reached the configured minimum number of handles.
*/
static int
-__sweep(WT_SESSION_IMPL *session)
+__sweep_expire(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
time_t now;
- int closed_handles;
conn = S2C(session);
- closed_handles = 0;
/* Don't discard handles that have been open recently. */
WT_RET(__wt_seconds(session, &now));
WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps);
SLIST_FOREACH(dhandle, &conn->dhlh, l) {
+ /*
+ * Ignore open files once the open file count reaches the
+ * minimum number of handles.
+ */
+ if (conn->open_file_count < conn->sweep_handles_min)
+ break;
+
if (WT_IS_METADATA(dhandle))
continue;
-
- if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
- dhandle->session_inuse == 0 && dhandle->session_ref == 0) {
- ++closed_handles;
+ if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
+ F_ISSET(dhandle, WT_DHANDLE_DEAD))
continue;
- }
if (dhandle->session_inuse != 0 ||
now <= dhandle->timeofdeath + conn->sweep_idle_time)
continue;
- if (dhandle->timeofdeath == 0) {
- dhandle->timeofdeath = now;
- WT_STAT_FAST_CONN_INCR(session, dh_conn_tod);
- continue;
- }
-
- /*
- * Ignore in-use files once the open file count reaches the
- * minimum number of handles.
- */
- if (conn->open_file_count < conn->sweep_handles_min)
- continue;
/*
* We have a candidate for closing; if it's open, acquire an
- * exclusive lock on the handle and close it.
+ * exclusive lock on the handle and mark it dead.
*
* The close would require I/O if an update cannot be written
* (updates in a no-longer-referenced file might not yet be
@@ -115,8 +98,10 @@ __sweep(WT_SESSION_IMPL *session)
* next time, after the transaction state has progressed.
*
* We don't set WT_DHANDLE_EXCLUSIVE deliberately, we want
- * opens to block on us rather than returning an EBUSY error to
- * the application.
+ * opens to block on us and then retry rather than returning an
+ * EBUSY error to the application. This is done holding the
+ * handle list lock so that connection-level handle searches
+ * never need to retry.
*/
if ((ret =
__wt_try_writelock(session, dhandle->rwlock)) == EBUSY)
@@ -129,31 +114,103 @@ __sweep(WT_SESSION_IMPL *session)
!__wt_txn_visible_all(session, btree->rec_max_txn))
goto unlock;
- /* If the handle is open, try to close it. */
- if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
- WT_WITH_DHANDLE(session, dhandle, ret =
- __wt_conn_btree_sync_and_close(session, 0, 0));
+ /*
+ * Mark the handle as dead and close the underlying file
+ * handle. Closing the handle decrements the open file count,
+ * meaning the close loop won't overrun the configured minimum.
+ */
+ WT_WITH_DHANDLE(session, dhandle, ret =
+ __wt_conn_btree_sync_and_close(session, 0, 1));
- /* We closed the btree handle, bump the statistic. */
- if (ret == 0)
- WT_STAT_FAST_CONN_INCR(
- session, dh_conn_handles);
- }
+unlock: WT_TRET(__wt_writeunlock(session, dhandle->rwlock));
+ WT_RET_BUSY_OK(ret);
+ }
- if (dhandle->session_inuse == 0 && dhandle->session_ref == 0)
- ++closed_handles;
+ return (0);
+}
+/*
+ * __sweep_flush --
+ * Flush pages from dead trees.
+ */
+static int
+__sweep_flush(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps);
+ SLIST_FOREACH(dhandle, &conn->dhlh, l) {
+ if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
+ !F_ISSET(dhandle, WT_DHANDLE_DEAD))
+ continue;
+
+ /* If the handle is marked "dead", flush it from cache. */
+ WT_WITH_DHANDLE(session, dhandle, ret =
+ __wt_conn_btree_sync_and_close(session, 0, 0));
+
+ /* We closed the btree handle, bump the statistic. */
+ if (ret == 0)
+ WT_STAT_FAST_CONN_INCR(session, dh_conn_handles);
-unlock: WT_TRET(__wt_writeunlock(session, dhandle->rwlock));
WT_RET_BUSY_OK(ret);
}
- if (closed_handles) {
- WT_WITH_DHANDLE_LOCK(session,
- ret = __sweep_remove_handles(session));
+ return (0);
+}
+
+/*
+ * __sweep_remove_handles --
+ * Remove closed dhandles from the connection list.
+ */
+static int
+__sweep_remove_handles(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle, *dhandle_next;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+ dhandle = SLIST_FIRST(&conn->dhlh);
+
+ for (; dhandle != NULL; dhandle = dhandle_next) {
+ dhandle_next = SLIST_NEXT(dhandle, l);
+ if (WT_IS_METADATA(dhandle))
+ continue;
+ if (F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
+ dhandle->session_inuse != 0 ||
+ dhandle->session_ref != 0)
+ continue;
+
+ /* Make sure we get exclusive access. */
+ if ((ret =
+ __wt_try_writelock(session, dhandle->rwlock)) == EBUSY)
+ continue;
WT_RET(ret);
+
+ /*
+ * If there are no longer any references to the handle in any
+ * sessions, attempt to discard it.
+ */
+ if (F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
+ dhandle->session_inuse != 0 || dhandle->session_ref != 0) {
+ WT_RET(__wt_writeunlock(session, dhandle->rwlock));
+ continue;
+ }
+
+ WT_WITH_DHANDLE(session, dhandle,
+ ret = __wt_conn_dhandle_discard_single(session, 0, 1));
+
+ /* If the handle was not successfully discarded, unlock it. */
+ if (ret != 0)
+ WT_TRET(__wt_writeunlock(session, dhandle->rwlock));
+ WT_RET_BUSY_OK(ret);
+ WT_STAT_FAST_CONN_INCR(session, dh_conn_ref);
}
- return (0);
+ return (ret == EBUSY ? 0 : ret);
}
/*
@@ -166,12 +223,13 @@ __sweep_server(void *arg)
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ int dead_handles;
session = arg;
conn = S2C(session);
/*
- * Sweep for dead handles.
+ * Sweep for dead and excess handles.
*/
while (F_ISSET(conn, WT_CONN_SERVER_RUN) &&
F_ISSET(conn, WT_CONN_SERVER_SWEEP)) {
@@ -179,8 +237,28 @@ __sweep_server(void *arg)
WT_ERR(__wt_cond_wait(session, conn->sweep_cond,
(uint64_t)conn->sweep_interval * WT_MILLION));
- /* Sweep the handles. */
- WT_ERR(__sweep(session));
+ /*
+ * Mark handles with a time of death, and report whether any
+ * handles are marked dead.
+ */
+ WT_ERR(__sweep_mark(session, &dead_handles));
+
+ if (dead_handles == 0 &&
+ conn->open_file_count < conn->sweep_handles_min)
+ continue;
+
+ /* Close handles if we have reached the configured limit */
+ if (conn->open_file_count >= conn->sweep_handles_min) {
+ WT_WITH_DHANDLE_LOCK(session,
+ ret = __sweep_expire(session));
+ WT_ERR(ret);
+ }
+
+ WT_ERR(__sweep_flush(session));
+
+ WT_WITH_DHANDLE_LOCK(session,
+ ret = __sweep_remove_handles(session));
+ WT_ERR(ret);
}
if (0) {
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 5057946adc9..2b721359e03 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -1426,6 +1426,19 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_server)
if (page->read_gen != WT_READGEN_OLDEST)
page->read_gen = __wt_cache_read_gen_set(session);
+ /*
+ * If we are evicting in a dead tree, don't write dirty pages.
+ *
+ * Force pages clean to keep statistics correct and to let the
+ * page-discard function assert that no dirty pages are ever
+ * discarded.
+ */
+ if (F_ISSET(btree->dhandle, WT_DHANDLE_DEAD) &&
+ __wt_page_is_modified(page)) {
+ page->modify->write_gen = 0;
+ __wt_cache_dirty_decr(session, page);
+ }
+
WT_WITH_BTREE(session, btree, ret = __wt_evict_page(session, ref));
(void)WT_ATOMIC_SUB4(btree->evict_busy, 1);
diff --git a/src/include/dhandle.h b/src/include/dhandle.h
index 300e8e735b9..034db30a0a2 100644
--- a/src/include/dhandle.h
+++ b/src/include/dhandle.h
@@ -65,11 +65,12 @@ struct __wt_data_handle {
WT_DSRC_STATS stats; /* Data-source statistics */
/* Flags values over 0xff are reserved for WT_BTREE_* */
-#define WT_DHANDLE_DISCARD 0x01 /* Discard on release */
-#define WT_DHANDLE_DISCARD_CLOSE 0x02 /* Close on release */
-#define WT_DHANDLE_EXCLUSIVE 0x04 /* Need exclusive access */
-#define WT_DHANDLE_HAVE_REF 0x08 /* Already have ref */
-#define WT_DHANDLE_LOCK_ONLY 0x10 /* Handle only used as a lock */
-#define WT_DHANDLE_OPEN 0x20 /* Handle is open */
+#define WT_DHANDLE_DEAD 0x01 /* Dead, awaiting discard */
+#define WT_DHANDLE_DISCARD 0x02 /* Discard on release */
+#define WT_DHANDLE_DISCARD_FORCE 0x04 /* Force discard on release */
+#define WT_DHANDLE_EXCLUSIVE 0x08 /* Need exclusive access */
+#define WT_DHANDLE_HAVE_REF 0x10 /* Already have ref */
+#define WT_DHANDLE_LOCK_ONLY 0x20 /* Handle only used as a lock */
+#define WT_DHANDLE_OPEN 0x40 /* Handle is open */
uint32_t flags;
};
diff --git a/src/include/extern.h b/src/include/extern.h
index 082ffb07a45..3c5e6f9412d 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -232,7 +232,7 @@ extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, int apply_checkpoints
extern int __wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
extern int __wt_conn_btree_apply_single(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *name, int force);
-extern int __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final);
+extern int __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final, int force);
extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session);
extern int __wt_connection_init(WT_CONNECTION_IMPL *conn);
extern int __wt_connection_destroy(WT_CONNECTION_IMPL *conn);
@@ -576,7 +576,7 @@ extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name
extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, WT_SESSION_IMPL **sessionp);
extern int __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, int *skip);
extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config);
-extern int __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags);
+extern int __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags, int *deadp);
extern int __wt_session_release_btree(WT_SESSION_IMPL *session);
extern int __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], uint32_t flags);
extern void __wt_session_close_cache(WT_SESSION_IMPL *session);
@@ -674,7 +674,7 @@ extern int __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]);
-extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force);
+extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int final);
extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session);
extern int __wt_ext_transaction_isolation_level( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session);
extern int __wt_ext_transaction_notify( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify);
diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c
index 15c3d34d5b0..15ba07664cc 100644
--- a/src/lsm/lsm_work_unit.c
+++ b/src/lsm/lsm_work_unit.c
@@ -457,7 +457,7 @@ __lsm_discard_handle(
WT_RET(__wt_session_get_btree(session, uri, checkpoint, NULL,
WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY));
- F_SET(session->dhandle, WT_DHANDLE_DISCARD);
+ F_SET(session->dhandle, WT_DHANDLE_DISCARD_FORCE);
return (__wt_session_release_btree(session));
}
diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c
index fb568361f74..eae0079effd 100644
--- a/src/meta/meta_table.c
+++ b/src/meta/meta_table.c
@@ -71,7 +71,7 @@ __wt_metadata_cursor(
* We use the metadata a lot, so we have a handle cached; lock it and
* increment the in-use counter once the cursor is open.
*/
- WT_ERR(__wt_session_lock_dhandle(session, 0));
+ WT_ERR(__wt_session_lock_dhandle(session, 0, NULL));
WT_ERR(__wt_curfile_create(session, NULL, cfg, 0, 0, cursorp));
__wt_cursor_dhandle_incr_use(session);
diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c
index a6c9fb867e7..a9baaf1f40d 100644
--- a/src/session/session_dhandle.c
+++ b/src/session/session_dhandle.c
@@ -47,7 +47,7 @@ __session_add_dhandle(
* the schema lock.
*/
int
-__wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags)
+__wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags, int *deadp)
{
enum { NOLOCK, READLOCK, WRITELOCK } locked;
WT_BTREE *btree;
@@ -57,6 +57,8 @@ __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags)
btree = S2BT(session);
dhandle = session->dhandle;
locked = NOLOCK;
+ if (deadp != NULL)
+ *deadp = 0;
/*
* Special operation flags will cause the handle to be reopened.
@@ -95,7 +97,10 @@ __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags)
* required, we're done. Otherwise, check that the handle is open and
* that no special flags are required.
*/
- if (LF_ISSET(WT_DHANDLE_LOCK_ONLY) ||
+ if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
+ WT_ASSERT(session, deadp != NULL);
+ *deadp = 1;
+ } else if (LF_ISSET(WT_DHANDLE_LOCK_ONLY) ||
(F_ISSET(dhandle, WT_DHANDLE_OPEN) && special_flags == 0))
return (0);
@@ -135,46 +140,25 @@ __wt_session_release_btree(WT_SESSION_IMPL *session)
dhandle = session->dhandle;
locked = F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) ? WRITELOCK : READLOCK;
- if (F_ISSET(dhandle, WT_DHANDLE_DISCARD_CLOSE)) {
- /*
- * If configured to discard on last close, trade any read lock
- * for an exclusive lock. If the exchange succeeds, setup for
- * discard. It is expected acquiring an exclusive lock will fail
- * sometimes since the handle may still be in use: in that case
- * we're done.
- */
- if (locked == READLOCK) {
- locked = NOLOCK;
- WT_ERR(__wt_readunlock(session, dhandle->rwlock));
- ret = __wt_try_writelock(session, dhandle->rwlock);
- if (ret != 0) {
- if (ret == EBUSY)
- ret = 0;
- goto err;
- }
- locked = WRITELOCK;
- F_CLR(dhandle, WT_DHANDLE_DISCARD_CLOSE);
- F_SET(dhandle,
- WT_DHANDLE_DISCARD | WT_DHANDLE_EXCLUSIVE);
- }
- }
-
/*
* If we had special flags set, close the handle so that future access
* can get a handle without special flags.
*/
- if (F_ISSET(dhandle, WT_DHANDLE_DISCARD) ||
+ if (F_ISSET(dhandle, WT_DHANDLE_DISCARD_FORCE)) {
+ WT_WITH_DHANDLE_LOCK(session,
+ ret = __wt_conn_btree_sync_and_close(session, 0, 1));
+ F_CLR(dhandle, WT_DHANDLE_DISCARD_FORCE);
+ } else if (F_ISSET(dhandle, WT_DHANDLE_DISCARD) ||
F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)) {
WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE));
+ ret = __wt_conn_btree_sync_and_close(session, 0, 0);
F_CLR(dhandle, WT_DHANDLE_DISCARD);
-
- WT_TRET(__wt_conn_btree_sync_and_close(session, 0, 0));
}
if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE))
F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
-err: switch (locked) {
+ switch (locked) {
case NOLOCK:
break;
case READLOCK:
@@ -312,7 +296,8 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session)
dhandle = dhandle_cache->dhandle;
if (dhandle != session->dhandle &&
dhandle->session_inuse == 0 &&
- now - dhandle->timeofdeath > conn->sweep_idle_time) {
+ (F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
+ now - dhandle->timeofdeath > conn->sweep_idle_time)) {
WT_STAT_FAST_CONN_INCR(session, dh_session_handles);
__session_discard_btree(session, dhandle_cache);
}
@@ -348,6 +333,7 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
WT_DATA_HANDLE_CACHE *dhandle_cache;
WT_DECL_RET;
uint64_t bucket;
+ int is_dead;
WT_ASSERT(session, !F_ISSET(session, WT_SESSION_NO_DATA_HANDLES));
WT_ASSERT(session, !LF_ISSET(WT_DHANDLE_HAVE_REF));
@@ -381,7 +367,8 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
if (dhandle != NULL) {
/* Try to lock the handle; if this succeeds, we're done. */
- if ((ret = __wt_session_lock_dhandle(session, flags)) == 0)
+ if ((ret =
+ __wt_session_lock_dhandle(session, flags, &is_dead)) == 0)
goto done;
/* Propagate errors we don't expect. */
@@ -389,17 +376,23 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
return (ret);
/*
- * Don't try harder to get the btree handle if our caller
- * hasn't allowed us to take the schema lock - they do so on
- * purpose and will handle error returns.
+ * Don't try harder to get the handle if we're only checking
+ * for locks or our caller hasn't allowed us to take the schema
+ * lock - they do so on purpose and will handle error returns.
*/
- if (!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) &&
+ if (LF_ISSET(WT_DHANDLE_LOCK_ONLY) ||
+ (!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) &&
F_ISSET(session,
- WT_SESSION_HANDLE_LIST_LOCKED | WT_SESSION_TABLE_LOCKED))
+ WT_SESSION_HANDLE_LIST_LOCKED | WT_SESSION_TABLE_LOCKED)))
return (ret);
- /* We found the data handle, don't try to get it again. */
- LF_SET(WT_DHANDLE_HAVE_REF);
+ /* If we found the handle and it isn't dead, reopen it. */
+ if (is_dead) {
+ __session_discard_btree(session, dhandle_cache);
+ dhandle_cache = NULL;
+ session->dhandle = dhandle = NULL;
+ } else
+ LF_SET(WT_DHANDLE_HAVE_REF);
}
/*
@@ -419,11 +412,11 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
WT_RET(__session_add_dhandle(session, NULL));
WT_ASSERT(session, LF_ISSET(WT_DHANDLE_LOCK_ONLY) ||
- F_ISSET(session->dhandle, WT_DHANDLE_OPEN));
+ (F_ISSET(session->dhandle, WT_DHANDLE_OPEN) &&
+ !F_ISSET(session->dhandle, WT_DHANDLE_DEAD)));
done: WT_ASSERT(session, LF_ISSET(WT_DHANDLE_EXCLUSIVE) ==
F_ISSET(session->dhandle, WT_DHANDLE_EXCLUSIVE));
- F_SET(session->dhandle, LF_ISSET(WT_DHANDLE_DISCARD_CLOSE));
return (0);
}
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 7c1532390f9..e362541f8e6 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -1090,7 +1090,7 @@ __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[])
* Checkpoint a single file as part of closing the handle.
*/
int
-__wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force)
+__wt_checkpoint_close(WT_SESSION_IMPL *session, int final)
{
WT_BTREE *btree;
WT_DECL_RET;
@@ -1099,8 +1099,8 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force)
btree = S2BT(session);
bulk = F_ISSET(btree, WT_BTREE_BULK) ? 1 : 0;
- /* Handle forced discard (when dropping a file). */
- if (force)
+ /* If the handle is already dead, force the discard. */
+ if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD_FORCE));
/*