summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@mongodb.com>2017-09-01 01:13:49 -0400
committerAlex Gorrod <alexander.gorrod@mongodb.com>2017-10-03 13:38:35 +1100
commit2d781c8cfeb2a1db8bd93e03ba35b302436e4ff3 (patch)
tree825380696aa85a116066eea23a46bb671fbf159b
parent0831e04799544167ee88c5b4dbad07d922a47d4d (diff)
downloadmongodb-3.4.10.tar.gz
WT-3533 eviction handle walk can race with handle re-open (#3620)mongodb-3.4.10
* Change btree open to no longer clear WT_BTREE eviction fields, allowing WT_BTREE.evict_disabled to persist across an open. * Change the connection-level open code to disable eviction when opening (or re-opening) an underlying WT_BTREE handle. * Eviction is turned off by btree handle open in some cases, and then turned on when an key/value is inserted. Now that we're not clearing the WT_BTREE eviction fields when re-opening a file, we have to clean up that reference if we close without ever inserting an item. * When flushing pages from the cache as part of discarding a WT_BTREE handle, release our exclusive eviction access to the handle once done. This requires eviction check for a WT_BTREE root page that's not set, because the tree is gone after we release exclusive eviction access. This reverts parts of WT-3193 (3747a26), but we can no longer increment the eviction disabled counter knowing it will be zero'd out when the handle is re-opened. The alternative is to keep another counter here, and I think checking the root page is less fragile/ugly than that would be. Add a new flag WT_BTREE.evict_disabled_open that's set if we disable eviction during open, we have to clean up the eviction disabled counters for special handles during close (for example verify). Don't use WT_BTREE.original as a flag to clean up eviction disabled counters during close, it gets set too early in the process, long before we disable eviction. Use the new WT_BTREE.evict_disabled_open flag in all cases. (cherry picked from commit fe0851d5c2a99c5084314e7b33df4c5d9735c674)
-rw-r--r--dist/s_string.ok1
-rw-r--r--src/btree/bt_cursor.c4
-rw-r--r--src/btree/bt_handle.c35
-rw-r--r--src/conn/conn_dhandle.c7
-rw-r--r--src/evict/evict_lru.c30
-rw-r--r--src/include/btree.h25
-rw-r--r--src/session/session_dhandle.c4
7 files changed, 75 insertions, 31 deletions
diff --git a/dist/s_string.ok b/dist/s_string.ok
index f3852d00ac8..99abc3e9ad1 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -1060,6 +1060,7 @@ rebalancing
recno
recnos
reconfig
+reconfigures
reconfiguring
recsize
rectype
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index f0aa632551b..95d817850ef 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -156,8 +156,10 @@ __cursor_disable_bulk(WT_SESSION_IMPL *session, WT_BTREE *btree)
* into a tree. Eviction is disabled when an empty tree is opened, and
* it must only be enabled once.
*/
- if (__wt_atomic_cas8(&btree->original, 1, 0))
+ if (__wt_atomic_cas8(&btree->original, 1, 0)) {
+ btree->evict_disabled_open = false;
__wt_evict_file_exclusive_off(session);
+ }
}
/*
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index a0da7df0998..1e6405272f7 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -66,7 +66,6 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
size_t root_addr_size;
- uint32_t mask;
uint8_t root_addr[WT_BTREE_MAX_ADDR_COOKIE];
const char *filename;
bool creation, forced_salvage, readonly;
@@ -75,15 +74,14 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
dhandle = session->dhandle;
/*
- * This may be a re-open of an underlying object and we have to clean
- * up. We can't clear the operation flags, however, they're set by the
- * connection handle software that called us.
+ * This may be a re-open, clean up the btree structure.
+ * Clear the fields that don't persist across a re-open.
+ * Clear all flags other than the operation flags (which are set by the
+ * connection handle software that called us).
*/
WT_RET(__btree_clear(session));
-
- mask = F_MASK(btree, WT_BTREE_SPECIAL_FLAGS);
- memset(btree, 0, sizeof(*btree));
- btree->flags = mask;
+ memset(btree, 0, WT_BTREE_CLEAR_SIZE);
+ F_CLR(btree, ~WT_BTREE_SPECIAL_FLAGS);
/* Set the data handle first, our called functions reasonably use it. */
btree->dhandle = dhandle;
@@ -185,13 +183,19 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
*
* Files that can still be bulk-loaded cannot be evicted.
* Permanently cache-resident files can never be evicted.
- * Special operations don't enable eviction. (The underlying commands
- * may turn on eviction, but it's their decision.)
+ * Special operations don't enable eviction. The underlying commands may
+ * turn on eviction (for example, verify turns on eviction while working
+ * a file to keep from consuming the cache), but it's their decision. If
+ * an underlying command reconfigures eviction, it must either clear the
+ * evict-disabled-open flag or restore the eviction configuration when
+ * finished so that handle close behaves correctly.
*/
if (btree->original ||
F_ISSET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_REBALANCE |
- WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY))
+ WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) {
WT_ERR(__wt_evict_file_exclusive_on(session));
+ btree->evict_disabled_open = true;
+ }
if (0) {
err: WT_TRET(__wt_btree_close(session));
@@ -228,6 +232,15 @@ __wt_btree_close(WT_SESSION_IMPL *session)
return (0);
F_SET(btree, WT_BTREE_CLOSED);
+ /*
+ * If we turned eviction off and never turned it back on, do that now,
+ * otherwise the counter will be off.
+ */
+ if (btree->evict_disabled_open) {
+ btree->evict_disabled_open = false;
+ __wt_evict_file_exclusive_off(session);
+ }
+
/* Discard any underlying block manager resources. */
if ((bm = btree->bm) != NULL) {
btree->bm = NULL;
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 243f4ac3047..2560ca47268 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -317,6 +317,9 @@ __wt_conn_btree_open(
WT_ASSERT(session,
!F_ISSET(S2C(session), WT_CONN_CLOSING_NO_MORE_OPENS));
+ /* Turn off eviction. */
+ WT_RET(__wt_evict_file_exclusive_on(session));
+
/*
* If the handle is already open, it has to be closed so it can be
* reopened with a new configuration.
@@ -330,7 +333,7 @@ __wt_conn_btree_open(
* in the tree that can block the close.
*/
if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
- WT_RET(__wt_conn_btree_sync_and_close(session, false, false));
+ WT_ERR(__wt_conn_btree_sync_and_close(session, false, false));
/* Discard any previous configuration, set up the new configuration. */
__conn_btree_config_clear(session);
@@ -374,6 +377,8 @@ __wt_conn_btree_open(
err: F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
}
+ __wt_evict_file_exclusive_off(session);
+
return (ret);
}
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index fdf68841b85..b9b1a7783fe 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -1452,26 +1452,23 @@ retry: while (slot < max_entries) {
/*
* Re-check the "no eviction" flag, used to enforce exclusive
- * access when a handle is being closed. If not set, remember
- * the file to visit first, next loop.
+ * access when a handle is being closed.
*
* Only try to acquire the lock and simply continue if we fail;
* the lock is held while the thread turning off eviction clears
* the tree's current eviction point, and part of the process is
* waiting on this thread to acknowledge that action.
+ *
+ * If a handle is being discarded, it will still be marked open,
+ * but won't have a root page.
*/
if (btree->evict_disabled == 0 &&
!__wt_spin_trylock(session, &cache->evict_walk_lock)) {
- if (btree->evict_disabled == 0) {
+ if (btree->evict_disabled == 0 &&
+ btree->root.page != NULL) {
/*
- * Assert the handle has a root page: eviction
- * should have been locked out if the tree is
- * being discarded or the root page is changing.
- * As this has not always been the case, assert
- * to debug that change.
+ * Remember the file to visit first, next loop.
*/
- WT_ASSERT(session, btree->root.page != NULL);
-
cache->evict_file_next = dhandle;
WT_WITH_DHANDLE(session, dhandle,
ret = __evict_walk_file(
@@ -1860,6 +1857,10 @@ fast: /* If the page can't be evicted, give up. */
WT_STAT_CONN_INCRV(
session, cache_eviction_pages_queued, (u_int)(evict - start));
+ __wt_verbose(session, WT_VERB_EVICTSERVER,
+ "%s walk: seen %" PRIu64 ", queued %" PRIu64,
+ session->dhandle->name, pages_seen, pages_queued);
+
/*
* If we couldn't find the number of pages we were looking for, skip
* the tree next time.
@@ -2442,14 +2443,23 @@ __wt_verbose_dump_cache(WT_SESSION_IMPL *session)
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
+ u_int pct;
uint64_t total_bytes, total_dirty_bytes;
conn = S2C(session);
total_bytes = total_dirty_bytes = 0;
+ pct = 0; /* [-Werror=uninitialized] */
WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
WT_RET(__wt_msg(session, "cache dump"));
+ WT_RET(__wt_msg(session,
+ "cache full: %s", __wt_cache_full(session) ? "yes" : "no"));
+ WT_RET(__wt_msg(session, "cache clean check: %s (%u%%)",
+ __wt_eviction_clean_needed(session, &pct) ? "yes" : "no", pct));
+ WT_RET(__wt_msg(session, "cache dirty check: %s (%u%%)",
+ __wt_eviction_dirty_needed(session, &pct) ? "yes" : "no", pct));
+
for (dhandle = NULL;;) {
WT_WITH_HANDLE_LIST_READ_LOCK(session,
WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q));
diff --git a/src/include/btree.h b/src/include/btree.h
index 28fe1b94b23..8712a404b13 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -142,12 +142,30 @@ struct __wt_btree {
uint64_t bytes_dirty_intl; /* Bytes in dirty internal pages. */
uint64_t bytes_dirty_leaf; /* Bytes in dirty leaf pages. */
+ /*
+ * We flush pages from the tree (in order to make checkpoint faster),
+ * without a high-level lock. To avoid multiple threads flushing at
+ * the same time, lock the tree.
+ */
+ WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */
+
+ /*
+ * All of the following fields live at the end of the structure so it's
+ * easier to clear everything but the fields that persist.
+ */
+#define WT_BTREE_CLEAR_SIZE (offsetof(WT_BTREE, evict_ref))
+
+ /*
+ * Eviction information is maintained in the btree handle, but owned by
+ * eviction, not the btree code.
+ */
WT_REF *evict_ref; /* Eviction thread's location */
uint64_t evict_priority; /* Relative priority of cached pages */
u_int evict_walk_period; /* Skip this many LRU walks */
u_int evict_walk_saved; /* Saved walk skips for checkpoints */
u_int evict_walk_skips; /* Number of walks skipped */
int evict_disabled; /* Eviction disabled count */
+ bool evict_disabled_open;/* Eviction disabled on open */
volatile uint32_t evict_busy; /* Count of threads in eviction */
int evict_start_type; /* Start position for eviction walk
(see WT_EVICT_WALK_START). */
@@ -155,13 +173,6 @@ struct __wt_btree {
WT_CKPT_OFF, WT_CKPT_PREPARE, WT_CKPT_RUNNING
} checkpointing; /* Checkpoint in progress */
- /*
- * We flush pages from the tree (in order to make checkpoint faster),
- * without a high-level lock. To avoid multiple threads flushing at
- * the same time, lock the tree.
- */
- WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */
-
/* Flags values up to 0xff are reserved for WT_DHANDLE_* */
#define WT_BTREE_ALLOW_SPLITS 0x000100 /* Allow splits, even with no evict */
#define WT_BTREE_BULK 0x000200 /* Bulk-load handle */
diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c
index 866dab32a78..707e07ac11f 100644
--- a/src/session/session_dhandle.c
+++ b/src/session/session_dhandle.c
@@ -598,7 +598,9 @@ __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint)
* the underlying file are visible to the in-memory pages.
*/
WT_ERR(__wt_evict_file_exclusive_on(session));
- WT_ERR(__wt_cache_op(session, WT_SYNC_DISCARD));
+ ret = __wt_cache_op(session, WT_SYNC_DISCARD);
+ __wt_evict_file_exclusive_off(session);
+ WT_ERR(ret);
/*
* We lock checkpoint handles that we are overwriting, so the handle