summaryrefslogtreecommitdiff
path: root/src/third_party
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2018-07-03 12:07:37 +1000
committerLuke Chen <luke.chen@mongodb.com>2018-07-03 12:07:37 +1000
commitb76457d9aa0188e1d2369851506b9d4d927024cb (patch)
treef93ea10b6ea968ed52d08d9b714c506bf6b2de28 /src/third_party
parent925a113194e00e193318486f576d14e6c3e27ea1 (diff)
downloadmongo-b76457d9aa0188e1d2369851506b9d4d927024cb.tar.gz
Import wiredtiger: 27f8e047911ff31500fecf4ea760e688ec541b97 from branch mongodb-4.2
ref: a8a6314182..27f8e04791 for: 4.1.1 WT-3839 Document the undefined behavior when a range truncate overlaps with inserts WT-3917 Enhance WT_CURSOR::reserve documentation around commit visibility WT-4024 Fix a race between split and next/prev WT-4067 Enhance LSM to not pin as much history in cache WT-4111 Improve checkpoint scrubbing algorithm WT-4125 Ensure that subsequent checkpoints with stable timestamp don't read too much WT-4136 Add a new timing stress flag that yields during tree search WT-4138 Add an option to timeout waiting for space in the cache WT-4140 Cursor walk limits quick eviction page selection unnecessarily. WT-4141 Enhance checkpoint with timestamps to unblock eviction sooner WT-4145 Only include the checkpoint timestamp during checkpoints WT-4146 Coverity 1393639, unused variable WT-4152 Save return value for later comparison in transaction code
Diffstat (limited to 'src/third_party')
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py10
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py2
-rw-r--r--src/third_party/wiredtiger/import.data4
-rw-r--r--src/third_party/wiredtiger/src/bloom/bloom.c11
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c13
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_random.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c12
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c22
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c315
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c5
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c7
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c46
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c137
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache.c4
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c41
-rw-r--r--src/third_party/wiredtiger/src/include/api.h3
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h18
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i115
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h6
-rw-r--r--src/third_party/wiredtiger/src/include/cache.i23
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h2
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h1
-rw-r--r--src/third_party/wiredtiger/src/include/lsm.h19
-rw-r--r--src/third_party/wiredtiger/src/include/misc.h9
-rw-r--r--src/third_party/wiredtiger/src/include/session.h2
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h2
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h2
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i72
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in554
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor.c22
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_manager.c13
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_work_unit.c94
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_worker.c12
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c183
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c7
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c34
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c134
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c3
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c68
-rw-r--r--src/third_party/wiredtiger/test/format/config.h26
-rw-r--r--src/third_party/wiredtiger/test/format/format.h2
-rw-r--r--src/third_party/wiredtiger/test/format/lrt.c17
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c11
-rw-r--r--src/third_party/wiredtiger/test/format/wts.c4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_las03.py105
47 files changed, 1261 insertions, 938 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 7d8a58c83bb..d29e9655fb3 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -417,6 +417,11 @@ connection_runtime_config = [
maximum heap memory to allocate for the cache. A database should
configure either \c cache_size or \c shared_cache but not both''',
min='1MB', max='10TB'),
+ Config('cache_max_wait_ms', '0', r'''
+ the maximum number of milliseconds an application thread will wait
+ for space to be available in cache before giving up. Default will
+ wait forever''',
+ min=0),
Config('cache_overhead', '8', r'''
assume the heap allocator overhead is the specified percentage, and
adjust the cache usage by that amount (for example, if there is 10GB
@@ -460,7 +465,7 @@ connection_runtime_config = [
vary depending on the current eviction load''',
min=1, max=20),
]),
- Config('eviction_checkpoint_target', '5', r'''
+ Config('eviction_checkpoint_target', '1', r'''
perform eviction at the beginning of checkpoints to bring the dirty
content in cache to this level. It is a percentage of the cache size if
the value is within the range of 0 to 100 or an absolute size when
@@ -585,7 +590,8 @@ connection_runtime_config = [
type='list', undoc=True,
choices=[
'checkpoint_slow', 'lookaside_sweep_race', 'split_1', 'split_2',
- 'split_3', 'split_4', 'split_5', 'split_6', 'split_7']),
+ 'split_3', 'split_4', 'split_5', 'split_6', 'split_7', 'split_8',
+ 'split_9']),
Config('verbose', '', r'''
enable messages for various events. Options are given as a
list, such as <code>"verbose=[evictserver,read]"</code>''',
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 135c930b306..f4f8f61ee1e 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -279,6 +279,7 @@ connection_stats = [
CacheStat('cache_read_lookaside_delay_checkpoint', 'pages read into cache with skipped lookaside entries needed later by checkpoint'),
CacheStat('cache_read_lookaside_skipped', 'pages read into cache skipping older lookaside entries'),
CacheStat('cache_read_overflow', 'overflow pages read into cache'),
+ CacheStat('cache_timed_out_ops', 'operations timed out waiting for space in cache'),
CacheStat('cache_write', 'pages written from cache'),
CacheStat('cache_write_app_count', 'application threads page write from cache to disk count'),
CacheStat('cache_write_app_time', 'application threads page write from cache to disk time (usecs)'),
@@ -515,6 +516,7 @@ connection_stats = [
TxnStat('txn_pinned_range', 'transaction range of IDs currently pinned', 'no_clear,no_scale'),
TxnStat('txn_pinned_snapshot_range', 'transaction range of IDs currently pinned by named snapshots', 'no_clear,no_scale'),
TxnStat('txn_pinned_timestamp', 'transaction range of timestamps currently pinned', 'no_clear,no_scale'),
+ TxnStat('txn_pinned_timestamp_checkpoint', 'transaction range of timestamps pinned by a checkpoint', 'no_clear,no_scale'),
TxnStat('txn_pinned_timestamp_oldest', 'transaction range of timestamps pinned by the oldest timestamp', 'no_clear,no_scale'),
TxnStat('txn_prepare', 'prepared transactions'),
TxnStat('txn_prepare_active', 'prepared transactions currently active'),
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 770e5ed12ec..1e72dbc56ae 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,6 +1,6 @@
{
- "commit": "a8a6314182ccf7dc6625b9c24891a355b07faa8e",
+ "commit": "27f8e047911ff31500fecf4ea760e688ec541b97",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
- "branch": "mongodb-4.0"
+ "branch": "mongodb-4.2"
}
diff --git a/src/third_party/wiredtiger/src/bloom/bloom.c b/src/third_party/wiredtiger/src/bloom/bloom.c
index cf4743009ee..d506af89ab7 100644
--- a/src/third_party/wiredtiger/src/bloom/bloom.c
+++ b/src/third_party/wiredtiger/src/bloom/bloom.c
@@ -302,7 +302,16 @@ __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash)
err: if (c != NULL)
WT_TRET(c->reset(c));
- /* Don't return WT_NOTFOUND from a failed cursor open or search. */
+ /*
+ * Error handling from this function is complex. A search in the
+ * backing bit field should never return WT_NOTFOUND - so translate
+ * that into a different error code and report an error. If we got a
+ * WT_ROLLBACK it may be because there is a lot of cache pressure and
+ * the transaction is being killed - don't report an error message in
+ * that case.
+ */
+ if (ret == WT_ROLLBACK || ret == WT_CACHE_FULL)
+ return (ret);
WT_RET_MSG(bloom->session,
ret == WT_NOTFOUND ? WT_ERROR : ret,
"Failed lookup in bloom filter");
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 02cceab3123..3a031b49db5 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -429,7 +429,8 @@ __cursor_key_order_check_row(
WT_ERR(__wt_scr_alloc(session, 512, &b));
WT_PANIC_ERR(session, EINVAL,
- "WT_CURSOR.%s out-of-order returns: returned key %s then key %s",
+ "WT_CURSOR.%s out-of-order returns: returned key %.1024s then "
+ "key %.1024s",
next ? "next" : "prev",
__wt_buf_set_printable_format(session,
cbt->lastkey->data, cbt->lastkey->size, btree->key_format, a),
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 566157abd61..16e25c1fe25 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -805,11 +805,13 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
WT_PAGE_INDEX *pindex;
WT_PAGE_MODIFY *mod;
WT_SESSION_IMPL *session;
+ uint64_t split_gen;
uint32_t entries;
session = ds->session;
page = ref->page;
mod = page->modify;
+ split_gen = 0;
WT_RET(ds->f(ds, "%p", (void *)ref));
@@ -818,6 +820,7 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno));
WT_INTL_INDEX_GET(session, page, pindex);
entries = pindex->entries;
+ split_gen = page->pg_intl_split_gen;
break;
case WT_PAGE_COL_FIX:
WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno));
@@ -830,6 +833,7 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
case WT_PAGE_ROW_INT:
WT_INTL_INDEX_GET(session, page, pindex);
entries = pindex->entries;
+ split_gen = page->pg_intl_split_gen;
break;
case WT_PAGE_ROW_LEAF:
entries = page->entries;
@@ -845,8 +849,6 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
WT_RET(ds->f(ds, ", entries %" PRIu32, entries));
WT_RET(ds->f(ds,
", %s", __wt_page_is_modified(page) ? "dirty" : "clean"));
- WT_RET(ds->f(ds,
- ", memory_size %" WT_SIZET_FMT, page->memory_footprint));
if (F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS))
WT_RET(ds->f(ds, ", keys-built"));
@@ -878,9 +880,12 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
break;
WT_ILLEGAL_VALUE(session);
}
+ if (split_gen != 0)
+ WT_RET(ds->f(ds, ", split-gen=%" PRIu64, split_gen));
if (mod != NULL)
- WT_RET(
- ds->f(ds, ", write generation=%" PRIu32, mod->write_gen));
+ WT_RET(ds->f(ds, ", write-gen=%" PRIu32, mod->write_gen));
+ WT_RET(ds->f(ds,
+ ", memory-size %" WT_SIZET_FMT, page->memory_footprint));
WT_RET(ds->f(ds, "\n"));
return (0);
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index 17497561248..4f310b27237 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -262,7 +262,7 @@ restart: /*
* holding nothing on failure.
*/
descend: if ((ret = __wt_page_swap(
- session, current, descent, false, flags)) == 0) {
+ session, current, descent, flags)) == 0) {
current = descent;
continue;
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 9e530be4f0e..c8368624d3c 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -276,13 +276,15 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
*/
page->modify->first_dirty_txn = WT_TXN_FIRST;
- if (ref->page_las->las_skew_newest &&
+ FLD_SET(page->modify->restore_state, WT_PAGE_RS_LOOKASIDE);
+
+ if (ref->page_las->skew_newest &&
!S2C(session)->txn_global.has_stable_timestamp &&
- __wt_txn_visible_all(session, ref->page_las->las_max_txn,
- WT_TIMESTAMP_NULL(&ref->page_las->onpage_timestamp))) {
- page->modify->rec_max_txn = ref->page_las->las_max_txn;
+ __wt_txn_visible_all(session, ref->page_las->unstable_txn,
+ WT_TIMESTAMP_NULL(&ref->page_las->unstable_timestamp))) {
+ page->modify->rec_max_txn = ref->page_las->max_txn;
__wt_timestamp_set(&page->modify->rec_max_timestamp,
- &ref->page_las->onpage_timestamp);
+ &ref->page_las->max_timestamp);
__wt_page_modify_clear(session, page);
}
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 62212607f18..a98de6c6c9f 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -1414,6 +1414,7 @@ __split_multi_inmem(
WT_DECL_ITEM(key);
WT_DECL_RET;
WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
WT_SAVE_UPD *supd;
WT_UPDATE *upd;
uint64_t recno;
@@ -1520,17 +1521,26 @@ __split_multi_inmem(
* might be older than that. Set the first dirty transaction to an
* impossibly old value so this page is never skipped in a checkpoint.
*/
- page->modify->first_dirty_txn = WT_TXN_FIRST;
+ mod = page->modify;
+ mod->first_dirty_txn = WT_TXN_FIRST;
/*
* If the new page is modified, save the eviction generation to avoid
* repeatedly attempting eviction on the same page.
*/
- page->modify->last_evict_pass_gen = orig->modify->last_evict_pass_gen;
- page->modify->last_eviction_id = orig->modify->last_eviction_id;
- __wt_timestamp_set(&page->modify->last_eviction_timestamp,
+ mod->last_evict_pass_gen = orig->modify->last_evict_pass_gen;
+ mod->last_eviction_id = orig->modify->last_eviction_id;
+ __wt_timestamp_set(&mod->last_eviction_timestamp,
&orig->modify->last_eviction_timestamp);
- page->modify->update_restored = 1;
+
+ /* Add the update/restore flag to any previous state. */
+ __wt_timestamp_set(&mod->last_stable_timestamp,
+ &orig->modify->last_stable_timestamp);
+ mod->rec_max_txn = orig->modify->rec_max_txn;
+ __wt_timestamp_set(&mod->rec_max_timestamp,
+ &orig->modify->rec_max_timestamp);
+ mod->restore_state = orig->modify->restore_state;
+ FLD_SET(mod->restore_state, WT_PAGE_RS_RESTORED);
err: /* Free any resources that may have been cached in the cursor. */
WT_TRET(__wt_btcur_close(&cbt, true));
@@ -1684,7 +1694,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
WT_RET(__wt_calloc_one(session, &ref->page_las));
*ref->page_las = multi->page_las;
- WT_ASSERT(session, ref->page_las->las_max_txn != WT_TXN_NONE);
+ WT_ASSERT(session, ref->page_las->max_txn != WT_TXN_NONE);
ref->state = WT_REF_LOOKASIDE;
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index d445184b7dd..a2386d907c7 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -176,44 +176,96 @@ __ref_ascend(WT_SESSION_IMPL *session,
}
/*
- * __ref_initial_descent_prev --
- * Descend the tree one level, when setting up the initial cursor position
- * for a previous-cursor walk.
+ * __split_prev_race --
+ * Check for races when descending the tree during a previous-cursor walk.
*/
static inline bool
-__ref_initial_descent_prev(
+__split_prev_race(
WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
{
WT_PAGE_INDEX *pindex;
/*
- * When splitting an internal page into its parent, we move the WT_REF
- * structures and update the parent's page index before updating the
- * split page's page index, and it's not an atomic update. A thread can
- * read the parent page's replacement page index, then read the split
- * page's original index, or the parent page's original and the split
- * page's replacement.
+ * Handle a cursor moving backwards through the tree or setting up at
+ * the end of the tree. We're passed the child page into which we're
+ * descending, and the parent page's page-index we used to find that
+ * child page.
*
- * This isn't a problem for a cursor setting up at the start of the tree
- * because we do right-hand splits on internal pages and the initial
- * part of the split page's namespace won't change as part of a split.
- * A thread reading the parent page's and split page's indexes will move
- * to the same slot no matter what order of indexes are read.
- *
- * Handle a cursor setting up at the end of the tree.
+ * When splitting an internal page into its parent, we move the split
+ * pages WT_REF structures, then update the parent's page index, then
+ * update the split page's page index, and nothing is atomic. A thread
+ * can read the parent page's replacement page index and then the split
+ * page's original index, or vice-versa, and either change can cause a
+ * cursor moving backwards through the tree to skip pages.
*
- * We're passed a child page into which we're descending, and on which
- * we have a hazard pointer.
+ * This isn't a problem for a cursor setting up at the start of the tree
+ * or moving forward through the tree because we do right-hand splits on
+ * internal pages and the initial part of the split page's namespace
+ * won't change as part of a split (in other words, a thread reading the
+ * parent page's and split page's indexes will move to the same slot no
+ * matter what order of indexes are read.
*
- * Acquire a page index for the child page and then confirm we haven't
- * raced with a parent split.
+ * Acquire the child's page index, then confirm the parent's page index
+ * hasn't changed, to check for reading an old version of the parent's
+ * page index and then reading a new version of the child's page index.
*/
WT_INTL_INDEX_GET(session, ref->page, pindex);
if (__wt_split_descent_race(session, ref, *pindexp))
- return (false);
+ return (true);
+
+ /*
+ * That doesn't check if we read a new version of parent's page index
+ * and then an old version of the child's page index. For example, if
+ * a thread were in a newly created split page subtree, the split
+ * completes into the parent before the thread reads it and descends
+ * into the child (where the split hasn't yet completed).
+ *
+ * Imagine an internal page with 3 child pages, with the namespaces a-f,
+ * g-h and i-j; the first child page splits. The parent starts out with
+ * the following page-index:
+ *
+ * | ... | a | g | i | ... |
+ *
+ * The split page starts out with the following page-index:
+ *
+ * | a | b | c | d | e | f |
+ *
+ * The first step is to move the c-f ranges into a new subtree, so, for
+ * example we might have two new internal pages 'c' and 'e', where the
+ * new 'c' page references the c-d namespace and the new 'e' page
+ * references the e-f namespace. The top of the subtree references the
+ * parent page, but until the parent's page index is updated, threads in
+ * the subtree won't be able to ascend out of the subtree. However, once
+ * the parent page's page index is updated to this:
+ *
+ * | ... | a | c | e | g | i | ... |
+ *
+ * threads in the subtree can ascend into the parent. Imagine a cursor
+ * in the c-d part of the namespace that ascends to the parent's 'c'
+ * slot. It would then decrement to the slot before the 'c' slot, the
+ * 'a' slot.
+ *
+ * The previous-cursor movement selects the last slot in the 'a' page;
+ * if the split page's page-index hasn't been updated yet, it selects
+ * the 'f' slot, which is incorrect. Once the split page's page index is
+ * updated to this:
+ *
+ * | a | b |
+ *
+ * the previous-cursor movement will select the 'b' slot, which is
+ * correct.
+ *
+ * If the last slot on the page no longer points to the current page as
+ * its "home", the page is being split and part of its namespace moved,
+ * restart. (We probably don't have to restart, I think we could spin
+ * until the page-index is updated, but I'm not willing to debug that
+ * one if I'm wrong.)
+ */
+ if (pindex->index[pindex->entries - 1]->home != ref->page)
+ return (true);
*pindexp = pindex;
- return (true);
+ return (false);
}
/*
@@ -229,22 +281,21 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
WT_BTREE *btree;
WT_DECL_RET;
WT_PAGE_INDEX *pindex;
- WT_REF *couple, *couple_orig, *ref;
+ WT_REF *couple, *ref, *ref_orig;
uint64_t sleep_usecs, yield_count;
uint32_t current_state, slot;
- bool empty_internal, initial_descent, prev, skip;
+ bool empty_internal, prev, skip;
btree = S2BT(session);
pindex = NULL;
sleep_usecs = yield_count = 0;
- empty_internal = initial_descent = false;
+ empty_internal = false;
/*
- * Tree walks are special: they look inside page structures that splits
- * may want to free. Publish that the tree is active during this
- * window.
+ * We're not supposed to walk trees without root pages. As this has not
+ * always been the case, assert to debug that change.
*/
- WT_ENTER_PAGE_INDEX(session);
+ WT_ASSERT(session, btree->root.page != NULL);
/* Check whether deleted pages can be skipped. */
if (!LF_ISSET(WT_READ_DELETED_SKIP))
@@ -284,36 +335,41 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
* new leaf, couple to the next page to which we're descending, it
* saves a hazard-pointer swap for each cursor page movement.
*
- * !!!
- * NOTE: we depend on the fact it's OK to release a page we don't hold,
- * that is, it's OK to release couple when couple is set to NULL.
- *
- * Take a copy of any held page and clear the return value. Remember
- * the hazard pointer we're currently holding.
- *
- * Clear the returned value, it makes future error handling easier.
+ * The hazard pointer on the original location is held until the end of
+ * the movement, in case we have to restart the movement. Take a copy
+ * of any held page and clear the return value (it makes future error
+ * handling easier).
*/
- couple = couple_orig = ref = *refp;
+ couple = NULL;
+ ref_orig = *refp;
*refp = NULL;
+ /*
+ * Tree walks are special: they look inside page structures that splits
+ * may want to free. Publish the tree is active during this window.
+ */
+ WT_ENTER_PAGE_INDEX(session);
+
/* If no page is active, begin a walk from the start/end of the tree. */
- if (ref == NULL) {
-restart: /*
- * We can be here with a NULL or root WT_REF; the page release
- * function handles them internally, don't complicate this code
- * by calling them out.
- */
- WT_ERR(__wt_page_release(session, couple, flags));
+ if ((ref = ref_orig) == NULL) {
+ if (0) {
+restart: /*
+ * Yield before retrying, and if we've yielded enough
+ * times, start sleeping so we don't burn CPU to no
+ * purpose.
+ */
+ __wt_spin_backoff(&yield_count, &sleep_usecs);
- /*
- * We're not supposed to walk trees without root pages. As this
- * has not always been the case, assert to debug that change.
- */
- WT_ASSERT(session, btree->root.page != NULL);
+ WT_ERR(__wt_page_release(session, couple, flags));
+ couple = NULL;
+ }
- couple = couple_orig = ref = &btree->root;
- initial_descent = true;
- goto descend;
+ if ((ref = ref_orig) == NULL) {
+ ref = &btree->root;
+ WT_INTL_INDEX_GET(session, ref->page, pindex);
+ slot = prev ? pindex->entries - 1 : 0;
+ goto descend;
+ }
}
/*
@@ -340,12 +396,9 @@ restart: /*
/*
* If at the root and returning internal pages, return
- * the root page, otherwise we're done. Regardless, no
- * hazard pointer is required, release the one we hold.
+ * the root page, otherwise we're done.
*/
if (__wt_ref_is_root(ref)) {
- WT_ERR(__wt_page_release(
- session, couple, flags));
if (!LF_ISSET(WT_READ_SKIP_INTL))
*refp = ref;
goto done;
@@ -356,7 +409,7 @@ restart: /*
* all of the child pages were deleted, mark it for
* eviction.
*/
- if (empty_internal && pindex->entries > 1) {
+ if (empty_internal) {
__wt_page_evict_soon(session, ref);
empty_internal = false;
}
@@ -367,17 +420,18 @@ restart: /*
* handle restart or not-found returns, it would require
* additional complexity and is not a possible return:
* we're moving to the parent of the current child page,
- * the parent can't have been evicted. (This is why we
- * don't pass "prev" to the page-swap function, we can't
- * handle the restart error returned if the parent page
- * is currently splitting.)
+ * the parent can't have been evicted.
*/
if (!LF_ISSET(WT_READ_SKIP_INTL)) {
WT_ERR(__wt_page_swap(
- session, couple, ref, false, flags));
+ session, couple, ref, flags));
+ couple = NULL;
*refp = ref;
goto done;
}
+
+ /* Encourage races. */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_8);
}
if (prev)
@@ -389,9 +443,9 @@ restart: /*
++*walkcntp;
for (;;) {
- /*
- * Move to the next slot, and set the reference hint if
- * it's wrong (used when we continue the walk). We don't
+descend: /*
+ * Get a reference, setting the reference hint if it's
+ * wrong (used when we continue the walk). We don't
* always update the hints when splitting, it's expected
* for them to be incorrect in some workloads.
*/
@@ -452,12 +506,41 @@ restart: /*
break;
}
- ret = __wt_page_swap(session, couple, ref, prev,
+ ret = __wt_page_swap(session, couple, ref,
WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK | flags);
+ if (ret == 0) {
+ /* Success, so "couple" has been released. */
+ couple = NULL;
+
+ /* Return leaf pages to our caller. */
+ if (!WT_PAGE_IS_INTERNAL(ref->page)) {
+ *refp = ref;
+ goto done;
+ }
+
+ /* Set the new "couple" value. */
+ couple = ref;
+
+ /* Configure traversal of any internal page. */
+ empty_internal = true;
+ if (prev) {
+ if (__split_prev_race(
+ session, ref, &pindex))
+ goto restart;
+ slot = pindex->entries - 1;
+ } else {
+ WT_INTL_INDEX_GET(
+ session, ref->page, pindex);
+ slot = 0;
+ }
+ continue;
+ }
/*
- * Not-found is an expected return when only walking
+ * Not-found is an expected return when walking only
* in-cache pages, or if we see a deleted page.
+ *
+ * An expected error, so "couple" is unchanged.
*/
if (ret == WT_NOTFOUND) {
WT_NOT_READ(ret, 0);
@@ -466,94 +549,24 @@ restart: /*
/*
* The page we're moving to might have split, in which
- * case move to the last position we held.
- */
- if (ret == WT_RESTART) {
- ret = 0;
-
- /*
- * Yield before retrying, and if we've yielded
- * enough times, start sleeping so we don't burn
- * CPU to no purpose.
- */
- __wt_spin_backoff(
- &yield_count, &sleep_usecs);
-
- /*
- * If a cursor is setting up at the end of the
- * tree, we can't use our parent page's index,
- * because it may have already split; restart
- * the walk.
- */
- if (prev && initial_descent)
- goto restart;
-
- /*
- * If a new walk that never coupled from the
- * root to a new saved position in the tree,
- * restart the walk.
- */
- if (couple == &btree->root)
- goto restart;
-
- /*
- * If restarting from some original position,
- * repeat the increment or decrement we made at
- * that time. Otherwise, couple is an internal
- * page we've acquired after moving from that
- * starting position and we can treat it as a
- * new page. This works because we never acquire
- * a hazard pointer on a leaf page we're not
- * going to return to our caller, this will quit
- * working if that ever changes.
- */
- WT_ASSERT(session,
- couple == couple_orig ||
- WT_PAGE_IS_INTERNAL(couple->page));
- ref = couple;
- __ref_index_slot(session, ref, &pindex, &slot);
- if (couple == couple_orig)
- break;
- }
- WT_ERR(ret);
- couple = ref;
-
- /*
- * A new page: configure for traversal of any internal
- * page's children, else return the leaf page.
+ * case restart the movement.
+ *
+ * An expected error, so "couple" is unchanged.
*/
- if (WT_PAGE_IS_INTERNAL(ref->page)) {
-descend: empty_internal = true;
-
- /*
- * There's a split race when a cursor is setting
- * up at the end of the tree.
- */
- if (prev && initial_descent) {
- if (!__ref_initial_descent_prev(
- session, ref, &pindex))
- goto restart;
- } else
- WT_INTL_INDEX_GET(
- session, ref->page, pindex);
- slot = prev ? pindex->entries - 1 : 0;
- continue;
- }
+ if (ret == WT_RESTART)
+ goto restart;
- /*
- * The tree-walk restart code knows we return any leaf
- * page we acquire (never hazard-pointer coupling on
- * after acquiring a leaf page), and asserts no restart
- * happens while holding a leaf page. This page must be
- * returned to our caller.
- */
- *refp = ref;
- goto done;
+ /* Unexpected error, so "couple" was released. */
+ couple = NULL;
+ goto err;
}
}
done:
-err: WT_LEAVE_PAGE_INDEX(session);
+err:
+ WT_TRET(__wt_page_release(session, couple, flags));
+ WT_TRET(__wt_page_release(session, ref_orig, flags));
+ WT_LEAVE_PAGE_INDEX(session);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index 8cc6630599b..123b640cdf4 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -179,6 +179,9 @@ descend: /*
descent = pindex->index[base - 1];
}
+ /* Encourage races. */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_9);
+
/*
* Swap the current page for the child page. If the page splits
* while we're retrieving it, restart the search at the root.
@@ -192,7 +195,7 @@ descend: /*
* holding nothing on failure.
*/
if ((ret = __wt_page_swap(session,
- current, descent, false, WT_READ_RESTART_OK)) == 0) {
+ current, descent, WT_READ_RESTART_OK)) == 0) {
current = descent;
continue;
}
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index 20acda8a1ab..a3f05a2700f 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -431,7 +431,10 @@ append: if (__wt_split_descent_race(
goto restart;
}
-descend: /*
+descend: /* Encourage races. */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_9);
+
+ /*
* Swap the current page for the child page. If the page splits
* while we're retrieving it, restart the search at the root.
* We cannot restart in the "current" page; for example, if a
@@ -444,7 +447,7 @@ descend: /*
* holding nothing on failure.
*/
if ((ret = __wt_page_swap(session,
- current, descent, false, WT_READ_RESTART_OK)) == 0) {
+ current, descent, WT_READ_RESTART_OK)) == 0) {
current = descent;
continue;
}
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index 54bf8c78171..cd11a3793c5 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -404,43 +404,34 @@ __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref)
return (false);
/*
- * If page image has the newest version of data and includes data newer
- * than the reader's snapshot then we should read the history.
+ * If some of the page's history overlaps with the reader's snapshot
+ * then we have to read it. This is only relevant if we chose versions
+ * that were unstable when the page was written.
*/
- if (ref->page_las->las_skew_newest &&
- WT_TXNID_LE(txn->snap_min, ref->page_las->las_max_txn))
+ if (ref->page_las->skew_newest &&
+ WT_TXNID_LE(txn->snap_min, ref->page_las->unstable_txn))
return (false);
- /*
- * If page image has the oldest version of data and some of the history
- * overlaps with the reader's snapshot then we should read the history.
- */
- if (!ref->page_las->las_skew_newest &&
- WT_TXNID_LE(ref->page_las->las_min_txn, txn->snap_max))
- return (false);
-
- if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) && ref->page_las->las_skew_newest)
- return (true);
+ if (!F_ISSET(txn, WT_TXN_HAS_TS_READ))
+ return (ref->page_las->skew_newest);
#ifdef HAVE_TIMESTAMPS
/*
* Skip lookaside pages if reading as of a timestamp, we evicted new
* versions of data and all the updates are in the past.
*/
- if (F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) &&
- ref->page_las->las_skew_newest &&
+ if (ref->page_las->skew_newest &&
__wt_timestamp_cmp(
- &ref->page_las->onpage_timestamp, &session->txn.read_timestamp) < 0)
+ &txn->read_timestamp, &ref->page_las->unstable_timestamp) > 0)
return (true);
/*
* Skip lookaside pages if reading as of a timestamp, we evicted old
- * versions of data and all the updates are in the future.
+ * versions of data and all the unstable updates are in the future.
*/
- if (F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) &&
- !ref->page_las->las_skew_newest &&
+ if (!ref->page_las->skew_newest &&
__wt_timestamp_cmp(
- &ref->page_las->min_timestamp, &session->txn.read_timestamp) > 0)
+ &txn->read_timestamp, &ref->page_las->unstable_timestamp) < 0)
return (true);
#endif
@@ -563,8 +554,8 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi)
(void)__wt_eviction_dirty_needed(session, &pct_dirty);
#ifdef HAVE_TIMESTAMPS
- WT_RET(__wt_timestamp_to_hex_string(
- session, hex_timestamp, &multi->page_las.min_timestamp));
+ WT_RET(__wt_timestamp_to_hex_string(session, hex_timestamp,
+ &multi->page_las.unstable_timestamp));
ts = hex_timestamp;
#else
ts = "disabled";
@@ -573,14 +564,14 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi)
WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY,
"Page reconciliation triggered lookaside write "
"file ID %" PRIu32 ", page ID %" PRIu64 ". "
- "Max txn ID %" PRIu64 ", min timestamp %s, skewed %s. "
+ "Max txn ID %" PRIu64 ", unstable timestamp %s, %s. "
"Entries now in lookaside file: %" PRId64 ", "
"cache dirty: %2.3f%% , "
"cache use: %2.3f%%",
btree_id, multi->page_las.las_pageid,
- multi->page_las.las_max_txn,
+ multi->page_las.max_txn,
ts,
- multi->page_las.las_skew_newest ? "newest" : "oldest",
+ multi->page_las.skew_newest ? "newest" : "not newest",
WT_STAT_READ(conn->stats, cache_lookaside_entries),
pct_dirty, pct_full);
}
@@ -724,8 +715,7 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
* table. (We check the length because row-store doesn't
* write zero-length data items.)
*/
- if (multi->page_las.las_skew_newest &&
- upd == list->onpage_upd &&
+ if (upd == list->onpage_upd &&
upd->size > 0 &&
(upd->type == WT_UPDATE_STANDARD ||
upd->type == WT_UPDATE_MODIFY)) {
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 7dee7a5e756..0945d768ce2 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -138,6 +138,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
{ "async", "category",
NULL, NULL,
confchk_wiredtiger_open_async_subconfigs, 3 },
+ { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
@@ -189,7 +190,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
{ "timing_stress_for_test", "list",
NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\"]",
+ "\"split_6\",\"split_7\",\"split_8\",\"split_9\"]",
NULL, 0 },
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\","
@@ -806,6 +807,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{ "buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0 },
{ "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
{ "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
+ { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
@@ -879,7 +881,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{ "timing_stress_for_test", "list",
NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\"]",
+ "\"split_6\",\"split_7\",\"split_8\",\"split_9\"]",
NULL, 0 },
{ "transaction_sync", "category",
NULL, NULL,
@@ -909,6 +911,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{ "buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0 },
{ "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
{ "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
+ { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
@@ -982,7 +985,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{ "timing_stress_for_test", "list",
NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\"]",
+ "\"split_6\",\"split_7\",\"split_8\",\"split_9\"]",
NULL, 0 },
{ "transaction_sync", "category",
NULL, NULL,
@@ -1013,6 +1016,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
{ "buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0 },
{ "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
{ "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
+ { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
@@ -1082,7 +1086,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
{ "timing_stress_for_test", "list",
NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\"]",
+ "\"split_6\",\"split_7\",\"split_8\",\"split_9\"]",
NULL, 0 },
{ "transaction_sync", "category",
NULL, NULL,
@@ -1111,6 +1115,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
{ "buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0 },
{ "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
{ "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
+ { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
@@ -1180,7 +1185,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
{ "timing_stress_for_test", "list",
NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\"]",
+ "\"split_6\",\"split_7\",\"split_8\",\"split_9\"]",
NULL, 0 },
{ "transaction_sync", "category",
NULL, NULL,
@@ -1250,13 +1255,13 @@ static const WT_CONFIG_ENTRY config_entries[] = {
confchk_WT_CONNECTION_query_timestamp, 1
},
{ "WT_CONNECTION.reconfigure",
- "async=(enabled=false,ops_max=1024,threads=2),cache_overhead=8,"
- "cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "compatibility=(release=),error_prefix=,eviction=(threads_max=8,"
- "threads_min=1),eviction_checkpoint_target=5,"
- "eviction_dirty_target=5,eviction_dirty_trigger=20,"
- "eviction_target=80,eviction_trigger=95,"
- "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+ "async=(enabled=false,ops_max=1024,threads=2),cache_max_wait_ms=0"
+ ",cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
+ "wait=0),compatibility=(release=),error_prefix=,"
+ "eviction=(threads_max=8,threads_min=1),"
+ "eviction_checkpoint_target=1,eviction_dirty_target=5,"
+ "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+ ",file_manager=(close_handle_minimum=250,close_idle_time=30,"
"close_scan_interval=10),log=(archive=true,prealloc=true,"
"zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4),"
"lsm_merge=true,operation_tracking=(enabled=false,path=\".\"),"
@@ -1264,7 +1269,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"statistics=none,statistics_log=(json=false,on_close=false,"
"sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"timing_stress_for_test=,verbose=",
- confchk_WT_CONNECTION_reconfigure, 22
+ confchk_WT_CONNECTION_reconfigure, 23
},
{ "WT_CONNECTION.rollback_to_stable",
"",
@@ -1489,66 +1494,67 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "wiredtiger_open",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
- ",builtin_extension_config=,cache_cursors=true,cache_overhead=8,"
- "cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "checkpoint_sync=true,compatibility=(release=,require_max=,"
- "require_min=),config_base=true,create=false,direct_io=,"
- "encryption=(keyid=,name=,secretkey=),error_prefix=,"
- "eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=5,eviction_dirty_target=5,"
- "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
- ",exclusive=false,extensions=,file_extend=,"
- "file_manager=(close_handle_minimum=250,close_idle_time=30,"
- "close_scan_interval=10),hazard_max=1000,in_memory=false,"
- "log=(archive=true,compressor=,enabled=false,file_max=100MB,"
- "path=\".\",prealloc=true,recover=on,zero_fill=false),"
- "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
- "path=\".\"),readonly=false,session_max=100,"
- "session_scratch_max=2MB,session_table_cache=true,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ ",builtin_extension_config=,cache_cursors=true,"
+ "cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
+ "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
+ "compatibility=(release=,require_max=,require_min=),"
+ "config_base=true,create=false,direct_io=,encryption=(keyid=,"
+ "name=,secretkey=),error_prefix=,eviction=(threads_max=8,"
+ "threads_min=1),eviction_checkpoint_target=1,"
+ "eviction_dirty_target=5,eviction_dirty_trigger=20,"
+ "eviction_target=80,eviction_trigger=95,exclusive=false,"
+ "extensions=,file_extend=,file_manager=(close_handle_minimum=250,"
+ "close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
+ "in_memory=false,log=(archive=true,compressor=,enabled=false,"
+ "file_max=100MB,path=\".\",prealloc=true,recover=on,"
+ "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4),"
+ "lsm_merge=true,mmap=true,multiprocess=false,"
+ "operation_tracking=(enabled=false,path=\".\"),readonly=false,"
+ "session_max=100,session_scratch_max=2MB,session_table_cache=true"
+ ",shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
"statistics=none,statistics_log=(json=false,on_close=false,"
"path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),use_environment=true,use_environment_priv=false,"
"verbose=,write_through=",
- confchk_wiredtiger_open, 45
+ confchk_wiredtiger_open, 46
},
{ "wiredtiger_open_all",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
- ",builtin_extension_config=,cache_cursors=true,cache_overhead=8,"
- "cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "checkpoint_sync=true,compatibility=(release=,require_max=,"
- "require_min=),config_base=true,create=false,direct_io=,"
- "encryption=(keyid=,name=,secretkey=),error_prefix=,"
- "eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=5,eviction_dirty_target=5,"
- "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
- ",exclusive=false,extensions=,file_extend=,"
- "file_manager=(close_handle_minimum=250,close_idle_time=30,"
- "close_scan_interval=10),hazard_max=1000,in_memory=false,"
- "log=(archive=true,compressor=,enabled=false,file_max=100MB,"
- "path=\".\",prealloc=true,recover=on,zero_fill=false),"
- "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
- "path=\".\"),readonly=false,session_max=100,"
- "session_scratch_max=2MB,session_table_cache=true,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ ",builtin_extension_config=,cache_cursors=true,"
+ "cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
+ "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
+ "compatibility=(release=,require_max=,require_min=),"
+ "config_base=true,create=false,direct_io=,encryption=(keyid=,"
+ "name=,secretkey=),error_prefix=,eviction=(threads_max=8,"
+ "threads_min=1),eviction_checkpoint_target=1,"
+ "eviction_dirty_target=5,eviction_dirty_trigger=20,"
+ "eviction_target=80,eviction_trigger=95,exclusive=false,"
+ "extensions=,file_extend=,file_manager=(close_handle_minimum=250,"
+ "close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
+ "in_memory=false,log=(archive=true,compressor=,enabled=false,"
+ "file_max=100MB,path=\".\",prealloc=true,recover=on,"
+ "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4),"
+ "lsm_merge=true,mmap=true,multiprocess=false,"
+ "operation_tracking=(enabled=false,path=\".\"),readonly=false,"
+ "session_max=100,session_scratch_max=2MB,session_table_cache=true"
+ ",shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
"statistics=none,statistics_log=(json=false,on_close=false,"
"path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),use_environment=true,use_environment_priv=false,"
"verbose=,version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_all, 46
+ confchk_wiredtiger_open_all, 47
},
{ "wiredtiger_open_basecfg",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
- ",builtin_extension_config=,cache_cursors=true,cache_overhead=8,"
- "cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "checkpoint_sync=true,compatibility=(release=,require_max=,"
- "require_min=),direct_io=,encryption=(keyid=,name=,secretkey=),"
- "error_prefix=,eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=5,eviction_dirty_target=5,"
+ ",builtin_extension_config=,cache_cursors=true,"
+ "cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
+ "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
+ "compatibility=(release=,require_max=,require_min=),direct_io=,"
+ "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+ "eviction=(threads_max=8,threads_min=1),"
+ "eviction_checkpoint_target=1,eviction_dirty_target=5,"
"eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
@@ -1563,16 +1569,17 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),verbose=,version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_basecfg, 40
+ confchk_wiredtiger_open_basecfg, 41
},
{ "wiredtiger_open_usercfg",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
- ",builtin_extension_config=,cache_cursors=true,cache_overhead=8,"
- "cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "checkpoint_sync=true,compatibility=(release=,require_max=,"
- "require_min=),direct_io=,encryption=(keyid=,name=,secretkey=),"
- "error_prefix=,eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=5,eviction_dirty_target=5,"
+ ",builtin_extension_config=,cache_cursors=true,"
+ "cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
+ "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
+ "compatibility=(release=,require_max=,require_min=),direct_io=,"
+ "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+ "eviction=(threads_max=8,threads_min=1),"
+ "eviction_checkpoint_target=1,eviction_dirty_target=5,"
"eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
@@ -1587,7 +1594,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),verbose=,write_through=",
- confchk_wiredtiger_open_usercfg, 39
+ confchk_wiredtiger_open_usercfg, 40
},
{ NULL, NULL, NULL, 0 }
};
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index d322caac04a..c1e6e1eb6cf 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -2024,6 +2024,8 @@ __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[])
{ "split_5", WT_TIMING_STRESS_SPLIT_5 },
{ "split_6", WT_TIMING_STRESS_SPLIT_6 },
{ "split_7", WT_TIMING_STRESS_SPLIT_7 },
+ { "split_8", WT_TIMING_STRESS_SPLIT_8 },
+ { "split_9", WT_TIMING_STRESS_SPLIT_9 },
{ NULL, 0 }
};
WT_CONFIG_ITEM cval, sval;
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index 00de16e6c21..dbb602921a8 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -143,6 +143,10 @@ __cache_config_local(WT_SESSION_IMPL *session, bool shared, const char *cfg[])
conn->evict_threads_max = evict_threads_max;
conn->evict_threads_min = evict_threads_min;
+ /* Retrieve the wait time and convert from milliseconds */
+ WT_RET(__wt_config_gets(session, cfg, "cache_max_wait_ms", &cval));
+ cache->cache_max_wait_us = (uint64_t)(cval.val * WT_THOUSAND);
+
return (0);
}
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 8396612b7ca..05397843fc7 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -563,7 +563,7 @@ __evict_update_work(WT_SESSION_IMPL *session)
conn = S2C(session);
cache = conn->cache;
- dirty_target = cache->eviction_dirty_target;
+ dirty_target = __wt_eviction_dirty_target(cache);
dirty_trigger = cache->eviction_dirty_trigger;
target = cache->eviction_target;
trigger = cache->eviction_trigger;
@@ -2345,7 +2345,8 @@ __wt_cache_eviction_worker(
WT_TRACK_OP_DECL;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *txn_state;
- uint64_t initial_progress, max_progress, time_start, time_stop;
+ uint64_t elapsed, time_start, time_stop;
+ uint64_t initial_progress, max_progress;
bool timer;
WT_TRACK_OP_INIT(session);
@@ -2367,8 +2368,7 @@ __wt_cache_eviction_worker(
__wt_evict_server_wake(session);
/* Track how long application threads spend doing eviction. */
- timer =
- WT_STAT_ENABLED(session) && !F_ISSET(session, WT_SESSION_INTERNAL);
+ timer = !F_ISSET(session, WT_SESSION_INTERNAL);
if (timer)
time_start = __wt_clock(session);
@@ -2405,22 +2405,10 @@ __wt_cache_eviction_worker(
/* See if eviction is still needed. */
if (!__wt_eviction_needed(session, busy, readonly, &pct_full) ||
- ((pct_full < 100.0 || cache->eviction_scrub_limit > 0.0) &&
- (cache->eviction_progress >
+ (pct_full < 100.0 && (cache->eviction_progress >
initial_progress + max_progress)))
break;
- /*
- * Don't make application threads participate in scrubbing for
- * checkpoints. Just throttle updates instead.
- */
- if (WT_EVICT_HAS_WORKERS(session) &&
- cache->eviction_scrub_limit > 0.0 &&
- !F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD)) {
- __wt_yield();
- continue;
- }
-
/* Evict a page. */
switch (ret = __evict_page(session, false)) {
case 0:
@@ -2438,13 +2426,26 @@ __wt_cache_eviction_worker(
default:
goto err;
}
+ /* Stop if we've exceeded the time out. */
+ if (timer && cache->cache_max_wait_us != 0) {
+ time_stop = __wt_clock(session);
+ if (session->cache_wait_us +
+ WT_CLOCKDIFF_US(time_stop, time_start) >
+ cache->cache_max_wait_us)
+ goto err;
+ }
}
err: if (timer) {
time_stop = __wt_clock(session);
- WT_STAT_CONN_INCRV(session,
- application_cache_time,
- WT_CLOCKDIFF_US(time_stop, time_start));
+ elapsed = WT_CLOCKDIFF_US(time_stop, time_start);
+ WT_STAT_CONN_INCRV(session, application_cache_time, elapsed);
+ session->cache_wait_us += elapsed;
+ if (cache->cache_max_wait_us != 0 &&
+ session->cache_wait_us > cache->cache_max_wait_us) {
+ WT_TRET(WT_CACHE_FULL);
+ WT_STAT_CONN_INCR(session, cache_timed_out_ops);
+ }
}
done: WT_TRACK_OP_END(session);
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index ca2176fcf0e..aabb19c86aa 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -48,6 +48,9 @@
WT_TRACK_OP_INIT(s); \
WT_SINGLE_THREAD_CHECK_START(s); \
WT_ERR(WT_SESSION_CHECK_PANIC(s)); \
+ /* Reset wait time if this isn't an API re entry. */ \
+ if (__oldname == NULL) \
+ (s)->cache_wait_us = 0; \
__wt_verbose((s), WT_VERB_API, "%s", "CALL: " #h ":" #n)
#define API_CALL_NOCONF(s, h, n, dh) do { \
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 33e382feba2..64e84e59d36 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -234,14 +234,13 @@ struct __wt_ovfl_reuse {
*/
struct __wt_page_lookaside {
uint64_t las_pageid; /* Page ID in lookaside */
- uint64_t las_max_txn; /* Max transaction ID in lookaside */
- uint64_t las_min_txn; /* Min transaction ID in lookaside */
- WT_DECL_TIMESTAMP(min_timestamp)/* Min timestamp in lookaside */
- /* Max timestamp on page */
- WT_DECL_TIMESTAMP(onpage_timestamp)
+ uint64_t max_txn; /* Maximum transaction ID */
+ uint64_t unstable_txn; /* First transaction ID not on page */
+ WT_DECL_TIMESTAMP(max_timestamp)/* Maximum timestamp */
+ WT_DECL_TIMESTAMP(unstable_timestamp)/* First timestamp not on page */
bool eviction_to_lookaside; /* Revert to lookaside on eviction */
- bool las_skew_newest; /* On-page skewed to newest */
bool invalid; /* History is required correct reads */
+ bool skew_newest; /* Page image has newest versions */
};
/*
@@ -270,6 +269,9 @@ struct __wt_page_modify {
uint64_t rec_max_txn;
WT_DECL_TIMESTAMP(rec_max_timestamp)
+ /* Stable timestamp at last reconciliation. */
+ WT_DECL_TIMESTAMP(last_stable_timestamp)
+
/* The largest update transaction ID (approximate). */
uint64_t update_txn;
@@ -481,7 +483,9 @@ struct __wt_page_modify {
#define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */
uint8_t rec_result; /* Reconciliation state */
- uint8_t update_restored; /* Page created by restoring updates */
+#define WT_PAGE_RS_LOOKASIDE 0x1
+#define WT_PAGE_RS_RESTORED 0x2
+ uint8_t restore_state; /* Created by restoring updates */
};
/*
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 81c166eb0e4..d7edcd00d5a 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1190,10 +1190,10 @@ __wt_page_las_active(WT_SESSION_IMPL *session, WT_REF *ref)
if ((page_las = ref->page_las) == NULL)
return (false);
- if (page_las->invalid || !ref->page_las->las_skew_newest)
+ if (page_las->invalid || !ref->page_las->skew_newest)
return (true);
- if (__wt_txn_visible_all(session, page_las->las_max_txn,
- WT_TIMESTAMP_NULL(&page_las->onpage_timestamp)))
+ if (__wt_txn_visible_all(session, page_las->max_txn,
+ WT_TIMESTAMP_NULL(&page_las->max_timestamp)))
return (false);
return (true);
@@ -1329,6 +1329,7 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
static inline bool
__wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
{
+ WT_DECL_TIMESTAMP(pinned_ts)
WT_PAGE_MODIFY *mod;
WT_TXN_GLOBAL *txn_global;
@@ -1338,7 +1339,8 @@ __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
* If the page hasn't been through one round of update/restore, give it
* a try.
*/
- if ((mod = page->modify) == NULL || !mod->update_restored)
+ if ((mod = page->modify) == NULL ||
+ !FLD_ISSET(mod->restore_state, WT_PAGE_RS_RESTORED))
return (true);
/*
@@ -1356,17 +1358,12 @@ __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
return (true);
#ifdef HAVE_TIMESTAMPS
- {
- bool same_timestamp;
-
- same_timestamp = false;
- if (!__wt_timestamp_iszero(&mod->last_eviction_timestamp))
- WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
- same_timestamp = __wt_timestamp_cmp(
+ if (!__wt_timestamp_iszero(&mod->last_eviction_timestamp)) {
+ __wt_txn_pinned_timestamp(session, &pinned_ts);
+ if (__wt_timestamp_cmp(
&mod->last_eviction_timestamp,
- &txn_global->pinned_timestamp) == 0);
- if (!same_timestamp)
- return (true);
+ &txn_global->pinned_timestamp) != 0)
+ return (true);
}
#endif
@@ -1605,6 +1602,8 @@ __wt_split_descent_race(
* update. A thread can read the parent page's original page index and
* then read the split page's replacement index.
*
+ * For example, imagine a search descending the tree.
+ *
* Because internal page splits work by truncating the original page to
* the initial part of the original page, the result of this race is we
* will have a search key that points past the end of the current page.
@@ -1649,73 +1648,17 @@ __wt_split_descent_race(
* work by truncating the split page, so the split page search is for
* content the split page retains after the split, and we ignore this
* race.
- */
- WT_INTL_INDEX_GET(session, ref->home, pindex);
- return (pindex != saved_pindex);
-}
-
-/*
- * __wt_split_prev_race --
- * Return if we raced with an internal page split when moving backwards
- * through the tree.
- */
-static inline bool
-__wt_split_prev_race(WT_SESSION_IMPL *session, WT_REF *ref)
-{
- WT_PAGE_INDEX *pindex;
-
- /*
- * There's a split race when a cursor moving backwards through the tree
- * descends the tree. If we're splitting an internal page into its
- * parent, we move the WT_REF structures and update the parent's page
- * index before updating the split page's page index, and it's not an
- * atomic update. A thread can read the parent and split page's original
- * indexes during a split, or read the parent page's replacement page
- * index and then read the split page's original index, either of which
- * can lead to skipping pages.
*
- * For example, imagine an internal page with 3 child pages, with the
- * namespaces a-f, g-h and i-j; the first child page splits. The parent
- * starts out with the following page-index:
+ * This code is a general purpose check for a descent race and we call
+ * it in other cases, for example, a cursor traversing backwards through
+ * the tree.
*
- * | ... | a | g | i | ... |
- *
- * The split page starts out with the following page-index:
- *
- * | a | b | c | d | e | f |
- *
- * The first step is to move the c-f ranges into a new subtree, so, for
- * example we might have two new internal pages 'c' and 'e', where the
- * new 'c' page references the c-d namespace and the new 'e' page
- * references the e-f namespace. The top of the subtree references the
- * parent page, but until the parent's page index is updated, threads in
- * the subtree won't be able to ascend out of the subtree. However, once
- * the parent page's page index is updated to this:
- *
- * | ... | a | c | e | g | i | ... |
- *
- * threads in the subtree can ascend into the parent. Imagine a cursor
- * in the c-d part of the namespace that ascends to the parent's 'c'
- * slot. It would then decrement to the slot before the 'c' slot, the
- * 'a' slot.
- *
- * The previous-cursor movement selects the last slot in the 'a' page;
- * if the split page's page-index hasn't been updated yet, it selects
- * the 'f' slot, which is incorrect. Once the split page's page index is
- * updated to this:
- *
- * | a | b |
- *
- * the previous-cursor movement will select the 'b' slot, which is
- * correct.
- *
- * This function takes an argument which is the internal page into which
- * we're coupling. If the last slot on the page no longer points to
- * the current page as its "home", the page is being split and part of
- * its namespace moved, we have to restart.
+ * Presumably we acquired a page index on the child page before calling
+ * this code, don't re-order that acquisition with this check.
*/
- WT_INTL_INDEX_GET(session, ref->page, pindex);
- return (pindex->index[pindex->entries - 1]->home != ref->page);
+ WT_BARRIER();
+ WT_INTL_INDEX_GET(session, ref->home, pindex);
+ return (pindex != saved_pindex);
}
/*
@@ -1724,8 +1667,8 @@ __wt_split_prev_race(WT_SESSION_IMPL *session, WT_REF *ref)
* coupling up/down the tree.
*/
static inline int
-__wt_page_swap_func(WT_SESSION_IMPL *session,
- WT_REF *held, WT_REF *want, bool prev_race, uint32_t flags
+__wt_page_swap_func(
+ WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags
#ifdef HAVE_DIAGNOSTIC
, const char *file, int line
#endif
@@ -1755,18 +1698,6 @@ __wt_page_swap_func(WT_SESSION_IMPL *session,
);
/*
- * We can race when descending into an internal page as part of moving
- * backwards through the tree, and we have to detect that race before
- * releasing the page from which we are coupling, else we can't restart
- * the movement.
- */
- if (ret == 0 && prev_race && WT_PAGE_IS_INTERNAL(want->page) &&
- __wt_split_prev_race(session, want)) {
- ret = WT_RESTART;
- WT_TRET(__wt_page_release(session, want, flags));
- }
-
- /*
* Expected failures: page not found or restart. Our callers list the
* errors they're expecting to handle.
*/
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index 1299d3e90e3..7d07e6dfd98 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -120,11 +120,11 @@ struct __wt_cache {
double eviction_checkpoint_target;/* Percent to reduce dirty
to during checkpoint scrubs */
- double eviction_scrub_limit; /* Percent of cache to trigger
- dirty eviction during checkpoint
- scrubs */
+ double eviction_scrub_target; /* Current scrub target */
u_int overhead_pct; /* Cache percent adjustment */
+ uint64_t cache_max_wait_us; /* Maximum time an operation waits for
+ * space in cache */
/*
* Eviction thread tuning information.
diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i
index fc127942d02..7f12949e162 100644
--- a/src/third_party/wiredtiger/src/include/cache.i
+++ b/src/third_party/wiredtiger/src/include/cache.i
@@ -276,6 +276,22 @@ __wt_eviction_clean_needed(WT_SESSION_IMPL *session, double *pct_fullp)
}
/*
+ * __wt_eviction_dirty_target --
+ * Return the effective dirty target (including checkpoint scrubbing).
+ */
+static inline double
+__wt_eviction_dirty_target(WT_CACHE *cache)
+{
+ double dirty_target, scrub_target;
+
+ dirty_target = cache->eviction_dirty_target;
+ scrub_target = cache->eviction_scrub_target;
+
+ return (scrub_target > 0 && scrub_target < dirty_target ?
+ scrub_target : dirty_target);
+}
+
+/*
* __wt_eviction_dirty_needed --
* Return if an application thread should do eviction due to the total
* volume of dirty data in cache.
@@ -284,7 +300,6 @@ static inline bool
__wt_eviction_dirty_needed(WT_SESSION_IMPL *session, double *pct_fullp)
{
WT_CACHE *cache;
- double dirty_trigger;
uint64_t dirty_inuse, bytes_max;
cache = S2C(session)->cache;
@@ -299,10 +314,8 @@ __wt_eviction_dirty_needed(WT_SESSION_IMPL *session, double *pct_fullp)
if (pct_fullp != NULL)
*pct_fullp = ((100.0 * dirty_inuse) / bytes_max);
- if ((dirty_trigger = cache->eviction_scrub_limit) < 1.0)
- dirty_trigger = cache->eviction_dirty_trigger;
-
- return (dirty_inuse > (uint64_t)(dirty_trigger * bytes_max) / 100);
+ return (dirty_inuse > (uint64_t)(
+ cache->eviction_dirty_trigger * bytes_max) / 100);
}
/*
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index d0bebe8da5d..22459b0072c 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -458,6 +458,8 @@ struct __wt_connection_impl {
#define WT_TIMING_STRESS_SPLIT_5 0x040u
#define WT_TIMING_STRESS_SPLIT_6 0x080u
#define WT_TIMING_STRESS_SPLIT_7 0x100u
+#define WT_TIMING_STRESS_SPLIT_8 0x200u
+#define WT_TIMING_STRESS_SPLIT_9 0x400u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint64_t timing_stress_flags;
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 7e2d4a4786d..c78c460f445 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -498,6 +498,7 @@ extern int __wt_lsm_work_switch(WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **ent
extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_lsm_chunk_visible_all(WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_work_enable_evict(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h
index f515e03519a..67ef28757ef 100644
--- a/src/third_party/wiredtiger/src/include/lsm.h
+++ b/src/third_party/wiredtiger/src/include/lsm.h
@@ -107,7 +107,8 @@ struct __wt_lsm_chunk {
uint32_t id; /* ID used to generate URIs */
uint32_t generation; /* Merge generation */
uint32_t refcnt; /* Number of worker thread references */
- uint32_t bloom_busy; /* Number of worker thread references */
+ uint32_t bloom_busy; /* Currently creating bloom filter */
+ uint32_t evict_enabled; /* Eviction allowed on the chunk */
int8_t empty; /* 1/0: checkpoint missing */
int8_t evicted; /* 1/0: in-memory chunk was evicted */
@@ -129,13 +130,19 @@ struct __wt_lsm_chunk {
* is required.
*/
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_LSM_WORK_BLOOM 0x01u /* Create a bloom filter */
-#define WT_LSM_WORK_DROP 0x02u /* Drop unused chunks */
-#define WT_LSM_WORK_FLUSH 0x04u /* Flush a chunk to disk */
-#define WT_LSM_WORK_MERGE 0x08u /* Look for a tree merge */
-#define WT_LSM_WORK_SWITCH 0x10u /* Switch to new in-memory chunk */
+#define WT_LSM_WORK_BLOOM 0x01u /* Create a bloom filter */
+#define WT_LSM_WORK_DROP 0x02u /* Drop unused chunks */
+#define WT_LSM_WORK_ENABLE_EVICT 0x04u /* Create a bloom filter */
+#define WT_LSM_WORK_FLUSH 0x08u /* Flush a chunk to disk */
+#define WT_LSM_WORK_MERGE 0x10u /* Look for a tree merge */
+#define WT_LSM_WORK_SWITCH 0x20u /* Switch the in-memory chunk */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
+/* Work units that are serviced by general worker threads. */
+#define WT_LSM_WORK_GENERAL_OPS \
+ (WT_LSM_WORK_BLOOM | WT_LSM_WORK_DROP | WT_LSM_WORK_ENABLE_EVICT |\
+ WT_LSM_WORK_FLUSH | WT_LSM_WORK_SWITCH)
+
/*
* WT_LSM_WORK_UNIT --
* A definition of maintenance that an LSM tree needs done.
diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h
index 1507e2d07cc..d76560d26e6 100644
--- a/src/third_party/wiredtiger/src/include/misc.h
+++ b/src/third_party/wiredtiger/src/include/misc.h
@@ -294,16 +294,15 @@ typedef void wt_timestamp_t;
__wt_scr_alloc_func(session, size, scratchp, __func__, __LINE__)
#define __wt_page_in(session, ref, flags) \
__wt_page_in_func(session, ref, flags, __func__, __LINE__)
-#define __wt_page_swap(session, held, want, prev_race, flags) \
- __wt_page_swap_func( \
- session, held, want, prev_race, flags, __func__, __LINE__)
+#define __wt_page_swap(session, held, want, flags) \
+ __wt_page_swap_func(session, held, want, flags, __func__, __LINE__)
#else
#define __wt_scr_alloc(session, size, scratchp) \
__wt_scr_alloc_func(session, size, scratchp)
#define __wt_page_in(session, ref, flags) \
__wt_page_in_func(session, ref, flags)
-#define __wt_page_swap(session, held, want, prev_race, flags) \
- __wt_page_swap_func(session, held, want, prev_race, flags)
+#define __wt_page_swap(session, held, want, flags) \
+ __wt_page_swap_func(session, held, want, flags)
#endif
/* Called on unexpected code path: locate the failure. */
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index e102d7f5057..cbf572f9a23 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -141,6 +141,8 @@ struct __wt_session_impl {
u_int ckpt_handle_next; /* Next empty slot */
size_t ckpt_handle_allocated; /* Bytes allocated */
+ uint64_t cache_wait_us; /* Wait time for cache for current operation */
+
/*
* Operations acting on handles.
*
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 92f28d88e62..1693b9baa82 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -406,6 +406,7 @@ struct __wt_connection_stats {
int64_t cache_eviction_maximum_page_size;
int64_t cache_eviction_dirty;
int64_t cache_eviction_app_dirty;
+ int64_t cache_timed_out_ops;
int64_t cache_read_overflow;
int64_t cache_eviction_deepen;
int64_t cache_write_lookaside;
@@ -663,6 +664,7 @@ struct __wt_connection_stats {
int64_t txn_pinned_checkpoint_range;
int64_t txn_pinned_snapshot_range;
int64_t txn_pinned_timestamp;
+ int64_t txn_pinned_timestamp_checkpoint;
int64_t txn_pinned_timestamp_oldest;
int64_t txn_sync;
int64_t txn_commit;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 32234dca23e..480d31b188e 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -147,7 +147,7 @@ struct __wt_txn_global {
volatile bool checkpoint_running; /* Checkpoint running */
volatile uint32_t checkpoint_id; /* Checkpoint's session ID */
WT_TXN_STATE checkpoint_state; /* Checkpoint's txn state */
- WT_TXN *checkpoint_txn; /* Checkpoint's txn structure */
+ WT_DECL_TIMESTAMP(checkpoint_timestamp) /* Checkpoint's timestamp */
volatile uint64_t metadata_pinned; /* Oldest ID for metadata */
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 9276ca62903..0efc32811e6 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -396,6 +396,60 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session)
return (checkpoint_pinned);
}
+#ifdef HAVE_TIMESTAMPS
+/*
+ * __wt_txn_pinned_timestamp --
+ * Get the first timestamp that has to be kept for the current tree.
+ */
+static inline void
+__wt_txn_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *pinned_tsp)
+{
+ WT_BTREE *btree;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t checkpoint_ts, pinned_ts;
+ bool include_checkpoint_txn;
+
+ btree = S2BT_SAFE(session);
+ txn_global = &S2C(session)->txn_global;
+
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(&pinned_ts, &txn_global->pinned_timestamp));
+ __wt_timestamp_set(pinned_tsp, &pinned_ts);
+
+ /*
+ * Checkpoint transactions often fall behind ordinary application
+ * threads. Take special effort to not keep changes pinned in cache if
+ * they are only required for the checkpoint and it has already seen
+ * them.
+ *
+ * If there is no active checkpoint or this handle is up to date with
+ * the active checkpoint then it's safe to ignore the checkpoint ID in
+ * the visibility check.
+ */
+ include_checkpoint_txn = btree == NULL ||
+ (!F_ISSET(btree, WT_BTREE_LOOKASIDE) &&
+ btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT));
+ if (!include_checkpoint_txn)
+ return;
+
+ /*
+ * The read of the timestamp pinned by a checkpoint needs to be
+ * carefully ordered: if a checkpoint is starting and we have to use
+ * the checkpoint timestamp, we take the minimum of it with the oldest
+ * timestamp, which is what we want.
+ */
+ WT_READ_BARRIER();
+
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(&checkpoint_ts,
+ &txn_global->checkpoint_timestamp));
+
+ if (!__wt_timestamp_iszero(&checkpoint_ts) &&
+ __wt_timestamp_cmp(&checkpoint_ts, &pinned_ts) < 0)
+ __wt_timestamp_set(pinned_tsp, &checkpoint_ts);
+}
+#endif
+
/*
* __txn_visible_all_id --
* Check if a given transaction ID is "globally visible". This is, if
@@ -427,8 +481,7 @@ __wt_txn_visible_all(
#ifdef HAVE_TIMESTAMPS
{
- WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
- int cmp;
+ wt_timestamp_t pinned_ts;
/* Timestamp check. */
if (timestamp == NULL || __wt_timestamp_iszero(timestamp))
@@ -438,20 +491,11 @@ __wt_txn_visible_all(
* If no oldest timestamp has been supplied, updates have to stay in
* cache until we are shutting down.
*/
- if (!txn_global->has_pinned_timestamp)
+ if (!S2C(session)->txn_global.has_pinned_timestamp)
return (F_ISSET(S2C(session), WT_CONN_CLOSING));
- WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
- cmp = __wt_timestamp_cmp(timestamp, &txn_global->pinned_timestamp));
-
- /*
- * We can discard updates with timestamps less than or equal to the
- * pinned timestamp. This is different to the situation for
- * transaction IDs, because we know that updates with timestamps are
- * definitely committed (and in this case, that the transaction ID is
- * globally visible).
- */
- return (cmp <= 0);
+ __wt_txn_pinned_timestamp(session, &pinned_ts);
+ return (__wt_timestamp_cmp(timestamp, &pinned_ts) <= 0);
}
#else
WT_UNUSED(timestamp);
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 159e9e2cf72..a4ba834d5ef 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -602,6 +602,14 @@ struct __wt_cursor {
*
* The key must first be set and the record must already exist.
*
+ * Note that reserve works by doing a special update operation that is
+ * not logged and does not change the value of the record. This update
+ * is aborted when the enclosing transaction ends regardless of whether
+ * it commits or rolls back. Given that, reserve can only be used to
+ * detect conflicts between transactions that execute concurrently. It
+ * cannot detect all logical conflicts between transactions. For that,
+ * some update to the record must be committed.
+ *
* @snippet ex_all.c Reserve a record
*
* On success, the cursor ends positioned at the specified record; to
@@ -1639,6 +1647,12 @@ struct __wt_session {
* the WT_CURSOR::next (WT_CURSOR::prev) method will iterate from the
* beginning (end) of the table.
*
+ * When a range truncate is in progress, and another transaction inserts
+ * a key into that range, the behavior is not well defined - a conflict
+ * may be detected or both transactions may be permitted to commit. If
+ * they do commit, and if there is a crash and recovery runs, the result
+ * may be different than what was in cache before the crash.
+ *
* @param session the session handle
* @param name the URI of the table or file to truncate
* @param start optional cursor marking the first record discarded;
@@ -2128,6 +2142,10 @@ struct __wt_connection {
* thread uses a session from the configured session_max., an integer
* between 1 and 20; default \c 2.}
* @config{ ),,}
+ * @config{cache_max_wait_ms, the maximum number of milliseconds an
+ * application thread will wait for space to be available in cache
+ * before giving up. Default will wait forever., an integer greater
+ * than or equal to 0; default \c 0.}
* @config{cache_overhead, assume the heap allocator overhead is the
* specified percentage\, and adjust the cache usage by that amount (for
* example\, if there is 10GB of data in cache\, a percentage of 10
@@ -2179,7 +2197,7 @@ struct __wt_connection {
* is a percentage of the cache size if the value is within the range of
* 0 to 100 or an absolute size when greater than 100. The value is not
* allowed to exceed the \c cache_size. Ignored if set to zero or \c
- * in_memory is \c true., an integer between 0 and 10TB; default \c 5.}
+ * in_memory is \c true., an integer between 0 and 10TB; default \c 1.}
* @config{eviction_dirty_target, perform eviction in worker threads
* when the cache contains at least this much dirty content. It is a
* percentage of the cache size if the value is within the range of 1 to
@@ -2708,6 +2726,10 @@ struct __wt_connection {
* default value for any sessions created\, and can be overridden in configuring
* \c cache_cursors in WT_CONNECTION.open_session., a boolean flag; default \c
* true.}
+ * @config{cache_max_wait_ms, the maximum number of milliseconds an application
+ * thread will wait for space to be available in cache before giving up.
+ * Default will wait forever., an integer greater than or equal to 0; default \c
+ * 0.}
* @config{cache_overhead, assume the heap allocator overhead is the specified
* percentage\, and adjust the cache usage by that amount (for example\, if
* there is 10GB of data in cache\, a percentage of 10 means WiredTiger treats
@@ -2808,7 +2830,7 @@ struct __wt_connection {
* percentage of the cache size if the value is within the range of 0 to 100 or
* an absolute size when greater than 100. The value is not allowed to exceed
* the \c cache_size. Ignored if set to zero or \c in_memory is \c true., an
- * integer between 0 and 10TB; default \c 5.}
+ * integer between 0 and 10TB; default \c 1.}
* @config{eviction_dirty_target, perform eviction in worker threads when the
* cache contains at least this much dirty content. It is a percentage of the
* cache size if the value is within the range of 1 to 100 or an absolute size
@@ -5103,596 +5125,600 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1091
/*! cache: modified pages evicted by application threads */
#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1092
+/*! cache: operations timed out waiting for space in cache */
+#define WT_STAT_CONN_CACHE_TIMED_OUT_OPS 1093
/*! cache: overflow pages read into cache */
-#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1093
+#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1094
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1094
+#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1095
/*! cache: page written requiring lookaside records */
-#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1095
+#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1096
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 1096
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 1097
/*! cache: pages evicted because they exceeded the in-memory maximum count */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1097
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1098
/*!
* cache: pages evicted because they exceeded the in-memory maximum time
* (usecs)
*/
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME 1098
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME 1099
/*! cache: pages evicted because they had chains of deleted items count */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1099
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1100
/*!
* cache: pages evicted because they had chains of deleted items time
* (usecs)
*/
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME 1100
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME 1101
/*! cache: pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP 1101
+#define WT_STAT_CONN_CACHE_EVICTION_APP 1102
/*! cache: pages queued for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1102
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1103
/*! cache: pages queued for urgent eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1103
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1104
/*! cache: pages queued for urgent eviction during walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1104
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1105
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1105
+#define WT_STAT_CONN_CACHE_READ 1106
/*! cache: pages read into cache after truncate */
-#define WT_STAT_CONN_CACHE_READ_DELETED 1106
+#define WT_STAT_CONN_CACHE_READ_DELETED 1107
/*! cache: pages read into cache after truncate in prepare state */
-#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1107
+#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1108
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1108
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1109
/*! cache: pages read into cache requiring lookaside for checkpoint */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_CHECKPOINT 1109
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_CHECKPOINT 1110
/*! cache: pages read into cache skipping older lookaside entries */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_SKIPPED 1110
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_SKIPPED 1111
/*!
* cache: pages read into cache with skipped lookaside entries needed
* later
*/
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY 1111
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY 1112
/*!
* cache: pages read into cache with skipped lookaside entries needed
* later by checkpoint
*/
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY_CHECKPOINT 1112
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY_CHECKPOINT 1113
/*! cache: pages requested from the cache */
-#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1113
+#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1114
/*! cache: pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1114
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1115
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1115
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1116
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1116
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1117
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1117
+#define WT_STAT_CONN_CACHE_WRITE 1118
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1118
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1119
/*! cache: percentage overhead */
-#define WT_STAT_CONN_CACHE_OVERHEAD 1119
+#define WT_STAT_CONN_CACHE_OVERHEAD 1120
/*! cache: tracked bytes belonging to internal pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1120
+#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1121
/*! cache: tracked bytes belonging to leaf pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_LEAF 1121
+#define WT_STAT_CONN_CACHE_BYTES_LEAF 1122
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1122
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1123
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1123
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1124
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1124
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1125
/*! connection: auto adjusting condition resets */
-#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1125
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1126
/*! connection: auto adjusting condition wait calls */
-#define WT_STAT_CONN_COND_AUTO_WAIT 1126
+#define WT_STAT_CONN_COND_AUTO_WAIT 1127
/*! connection: detected system time went backwards */
-#define WT_STAT_CONN_TIME_TRAVEL 1127
+#define WT_STAT_CONN_TIME_TRAVEL 1128
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1128
+#define WT_STAT_CONN_FILE_OPEN 1129
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1129
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1130
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1130
+#define WT_STAT_CONN_MEMORY_FREE 1131
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1131
+#define WT_STAT_CONN_MEMORY_GROW 1132
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1132
+#define WT_STAT_CONN_COND_WAIT 1133
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1133
+#define WT_STAT_CONN_RWLOCK_READ 1134
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1134
+#define WT_STAT_CONN_RWLOCK_WRITE 1135
/*! connection: total fsync I/Os */
-#define WT_STAT_CONN_FSYNC_IO 1135
+#define WT_STAT_CONN_FSYNC_IO 1136
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1136
+#define WT_STAT_CONN_READ_IO 1137
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1137
+#define WT_STAT_CONN_WRITE_IO 1138
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1138
+#define WT_STAT_CONN_CURSOR_CREATE 1139
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1139
+#define WT_STAT_CONN_CURSOR_INSERT 1140
/*! cursor: cursor modify calls */
-#define WT_STAT_CONN_CURSOR_MODIFY 1140
+#define WT_STAT_CONN_CURSOR_MODIFY 1141
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1141
+#define WT_STAT_CONN_CURSOR_NEXT 1142
/*! cursor: cursor operation restarted */
-#define WT_STAT_CONN_CURSOR_RESTART 1142
+#define WT_STAT_CONN_CURSOR_RESTART 1143
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1143
+#define WT_STAT_CONN_CURSOR_PREV 1144
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1144
+#define WT_STAT_CONN_CURSOR_REMOVE 1145
/*! cursor: cursor reserve calls */
-#define WT_STAT_CONN_CURSOR_RESERVE 1145
+#define WT_STAT_CONN_CURSOR_RESERVE 1146
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1146
+#define WT_STAT_CONN_CURSOR_RESET 1147
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1147
+#define WT_STAT_CONN_CURSOR_SEARCH 1148
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1148
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1149
/*! cursor: cursor sweep buckets */
-#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1149
+#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1150
/*! cursor: cursor sweep cursors closed */
-#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1150
+#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1151
/*! cursor: cursor sweep cursors examined */
-#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1151
+#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1152
/*! cursor: cursor sweeps */
-#define WT_STAT_CONN_CURSOR_SWEEP 1152
+#define WT_STAT_CONN_CURSOR_SWEEP 1153
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1153
+#define WT_STAT_CONN_CURSOR_UPDATE 1154
/*! cursor: cursors cached on close */
-#define WT_STAT_CONN_CURSOR_CACHE 1154
+#define WT_STAT_CONN_CURSOR_CACHE 1155
/*! cursor: cursors reused from cache */
-#define WT_STAT_CONN_CURSOR_REOPEN 1155
+#define WT_STAT_CONN_CURSOR_REOPEN 1156
/*! cursor: truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1156
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1157
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1157
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1158
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1158
+#define WT_STAT_CONN_DH_SWEEP_REF 1159
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1159
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1160
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1160
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1161
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1161
+#define WT_STAT_CONN_DH_SWEEP_TOD 1162
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1162
+#define WT_STAT_CONN_DH_SWEEPS 1163
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1163
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1164
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1164
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1165
/*! lock: checkpoint lock acquisitions */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1165
+#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1166
/*! lock: checkpoint lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1166
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1167
/*! lock: checkpoint lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1167
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1168
/*!
* lock: commit timestamp queue lock application thread time waiting for
* the dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1168
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1169
/*!
* lock: commit timestamp queue lock internal thread time waiting for the
* dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1169
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1170
/*! lock: commit timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1170
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1171
/*! lock: commit timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1171
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1172
/*!
* lock: dhandle lock application thread time waiting for the dhandle
* lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1172
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1173
/*!
* lock: dhandle lock internal thread time waiting for the dhandle lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1173
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1174
/*! lock: dhandle read lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1174
+#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1175
/*! lock: dhandle write lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1175
+#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1176
/*! lock: metadata lock acquisitions */
-#define WT_STAT_CONN_LOCK_METADATA_COUNT 1176
+#define WT_STAT_CONN_LOCK_METADATA_COUNT 1177
/*! lock: metadata lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1177
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1178
/*! lock: metadata lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1178
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1179
/*!
* lock: read timestamp queue lock application thread time waiting for
* the dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1179
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1180
/*!
* lock: read timestamp queue lock internal thread time waiting for the
* dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1180
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1181
/*! lock: read timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1181
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1182
/*! lock: read timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1182
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1183
/*! lock: schema lock acquisitions */
-#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1183
+#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1184
/*! lock: schema lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1184
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1185
/*! lock: schema lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1185
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1186
/*!
* lock: table lock application thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1186
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1187
/*!
* lock: table lock internal thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1187
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1188
/*! lock: table read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1188
+#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1189
/*! lock: table write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1189
+#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1190
/*!
* lock: txn global lock application thread time waiting for the dhandle
* lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1190
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1191
/*!
* lock: txn global lock internal thread time waiting for the dhandle
* lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1191
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1192
/*! lock: txn global read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1192
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1193
/*! lock: txn global write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1193
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1194
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1194
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1195
/*! log: force archive time sleeping (usecs) */
-#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1195
+#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1196
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1196
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1197
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1197
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1198
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1198
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1199
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1199
+#define WT_STAT_CONN_LOG_FLUSH 1200
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1200
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1201
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1201
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1202
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1202
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1203
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1203
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1204
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1204
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1205
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1205
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1206
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1206
+#define WT_STAT_CONN_LOG_SCANS 1207
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1207
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1208
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1208
+#define WT_STAT_CONN_LOG_WRITE_LSN 1209
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1209
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1210
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1210
+#define WT_STAT_CONN_LOG_SYNC 1211
/*! log: log sync time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DURATION 1211
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1212
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1212
+#define WT_STAT_CONN_LOG_SYNC_DIR 1213
/*! log: log sync_dir time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1213
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1214
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1214
+#define WT_STAT_CONN_LOG_WRITES 1215
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1215
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1216
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1216
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1217
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1217
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1218
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1218
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1219
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1219
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1220
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1220
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1221
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1221
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1222
/*! log: slot close lost race */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1222
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1223
/*! log: slot close unbuffered waits */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1223
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1224
/*! log: slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1224
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1225
/*! log: slot join atomic update races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1225
+#define WT_STAT_CONN_LOG_SLOT_RACES 1226
/*! log: slot join calls atomic updates raced */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1226
+#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1227
/*! log: slot join calls did not yield */
-#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1227
+#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1228
/*! log: slot join calls found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1228
+#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1229
/*! log: slot join calls slept */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1229
+#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1230
/*! log: slot join calls yielded */
-#define WT_STAT_CONN_LOG_SLOT_YIELD 1230
+#define WT_STAT_CONN_LOG_SLOT_YIELD 1231
/*! log: slot join found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1231
+#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1232
/*! log: slot joins yield time (usecs) */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1232
+#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1233
/*! log: slot transitions unable to find free slot */
-#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1233
+#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1234
/*! log: slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1234
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1235
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1235
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1236
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1236
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1237
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1237
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1238
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1238
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1239
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1239
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1240
/*! perf: file system read latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1240
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1241
/*! perf: file system read latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1241
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1242
/*! perf: file system read latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1242
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1243
/*! perf: file system read latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1243
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1244
/*! perf: file system read latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1244
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1245
/*! perf: file system read latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1245
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1246
/*! perf: file system write latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1246
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1247
/*! perf: file system write latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1247
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1248
/*! perf: file system write latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1248
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1249
/*! perf: file system write latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1249
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1250
/*! perf: file system write latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1250
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1251
/*! perf: file system write latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1251
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1252
/*! perf: operation read latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1252
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1253
/*! perf: operation read latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1253
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1254
/*! perf: operation read latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1254
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1255
/*! perf: operation read latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1255
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1256
/*! perf: operation read latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1256
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1257
/*! perf: operation write latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1257
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1258
/*! perf: operation write latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1258
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1259
/*! perf: operation write latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1259
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1260
/*! perf: operation write latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1260
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1261
/*! perf: operation write latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1261
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1262
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1262
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1263
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1263
+#define WT_STAT_CONN_REC_PAGES 1264
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1264
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1265
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1265
+#define WT_STAT_CONN_REC_PAGE_DELETE 1266
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1266
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1267
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1267
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1268
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1268
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1269
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1269
+#define WT_STAT_CONN_SESSION_OPEN 1270
/*! session: table alter failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1270
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1271
/*! session: table alter successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1271
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1272
/*! session: table alter unchanged and skipped */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1272
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1273
/*! session: table compact failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1273
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1274
/*! session: table compact successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1274
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1275
/*! session: table create failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1275
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1276
/*! session: table create successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1276
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1277
/*! session: table drop failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1277
+#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1278
/*! session: table drop successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1278
+#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1279
/*! session: table rebalance failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1279
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1280
/*! session: table rebalance successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1280
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1281
/*! session: table rename failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1281
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1282
/*! session: table rename successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1282
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1283
/*! session: table salvage failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1283
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1284
/*! session: table salvage successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1284
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1285
/*! session: table truncate failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1285
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1286
/*! session: table truncate successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1286
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1287
/*! session: table verify failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1287
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1288
/*! session: table verify successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1288
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1289
/*! thread-state: active filesystem fsync calls */
-#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1289
+#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1290
/*! thread-state: active filesystem read calls */
-#define WT_STAT_CONN_THREAD_READ_ACTIVE 1290
+#define WT_STAT_CONN_THREAD_READ_ACTIVE 1291
/*! thread-state: active filesystem write calls */
-#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1291
+#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1292
/*! thread-yield: application thread time evicting (usecs) */
-#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1292
+#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1293
/*! thread-yield: application thread time waiting for cache (usecs) */
-#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1293
+#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1294
/*!
* thread-yield: connection close blocked waiting for transaction state
* stabilization
*/
-#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1294
+#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1295
/*! thread-yield: connection close yielded for lsm manager shutdown */
-#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1295
+#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1296
/*! thread-yield: data handle lock yielded */
-#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1296
+#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1297
/*!
* thread-yield: get reference for page index and slot time sleeping
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1297
+#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1298
/*! thread-yield: log server sync yielded for log write */
-#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1298
+#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1299
/*! thread-yield: page access yielded due to prepare state change */
-#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1299
+#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1300
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1300
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1301
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1301
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1302
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1302
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1303
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1303
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1304
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1304
+#define WT_STAT_CONN_PAGE_SLEEP 1305
/*!
* thread-yield: page delete rollback time sleeping for state change
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1305
+#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1306
/*! thread-yield: page reconciliation yielded due to child modification */
-#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1306
+#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1307
/*! transaction: commit timestamp queue insert to empty */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1307
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1308
/*! transaction: commit timestamp queue inserts to tail */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_TAIL 1308
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_TAIL 1309
/*! transaction: commit timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1309
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1310
/*! transaction: commit timestamp queue length */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1310
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1311
/*! transaction: number of named snapshots created */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1311
+#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1312
/*! transaction: number of named snapshots dropped */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1312
+#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1313
/*! transaction: prepared transactions */
-#define WT_STAT_CONN_TXN_PREPARE 1313
+#define WT_STAT_CONN_TXN_PREPARE 1314
/*! transaction: prepared transactions committed */
-#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1314
+#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1315
/*! transaction: prepared transactions currently active */
-#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1315
+#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1316
/*! transaction: prepared transactions rolled back */
-#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1316
+#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1317
/*! transaction: query timestamp calls */
-#define WT_STAT_CONN_TXN_QUERY_TS 1317
+#define WT_STAT_CONN_TXN_QUERY_TS 1318
/*! transaction: read timestamp queue insert to empty */
-#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1318
+#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1319
/*! transaction: read timestamp queue inserts to head */
-#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1319
+#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1320
/*! transaction: read timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1320
+#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1321
/*! transaction: read timestamp queue length */
-#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1321
+#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1322
/*! transaction: rollback to stable calls */
-#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE 1322
+#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE 1323
/*! transaction: rollback to stable updates aborted */
-#define WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED 1323
+#define WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED 1324
/*! transaction: rollback to stable updates removed from lookaside */
-#define WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED 1324
+#define WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED 1325
/*! transaction: set timestamp calls */
-#define WT_STAT_CONN_TXN_SET_TS 1325
+#define WT_STAT_CONN_TXN_SET_TS 1326
/*! transaction: set timestamp commit calls */
-#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1326
+#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1327
/*! transaction: set timestamp commit updates */
-#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1327
+#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1328
/*! transaction: set timestamp oldest calls */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1328
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1329
/*! transaction: set timestamp oldest updates */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1329
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1330
/*! transaction: set timestamp stable calls */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE 1330
+#define WT_STAT_CONN_TXN_SET_TS_STABLE 1331
/*! transaction: set timestamp stable updates */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1331
+#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1332
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1332
+#define WT_STAT_CONN_TXN_BEGIN 1333
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1333
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1334
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1334
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1335
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1335
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1336
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1336
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1337
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1337
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1338
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1338
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1339
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1339
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1340
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1340
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1341
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1341
+#define WT_STAT_CONN_TXN_CHECKPOINT 1342
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1342
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1343
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1343
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1344
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1344
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1345
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1345
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1346
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1346
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1347
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1347
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1348
/*!
* transaction: transaction range of IDs currently pinned by named
* snapshots
*/
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1348
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1349
/*! transaction: transaction range of timestamps currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1349
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1350
+/*! transaction: transaction range of timestamps pinned by a checkpoint */
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1351
/*!
* transaction: transaction range of timestamps pinned by the oldest
* timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1350
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1352
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1351
+#define WT_STAT_CONN_TXN_SYNC 1353
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1352
+#define WT_STAT_CONN_TXN_COMMIT 1354
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1353
+#define WT_STAT_CONN_TXN_ROLLBACK 1355
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1354
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1356
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
index 4d9f6f92832..13d7d857a04 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
@@ -265,6 +265,12 @@ open: WT_WITH_SCHEMA_LOCK(session,
}
if (!F_ISSET(clsm, WT_CLSM_ACTIVE)) {
+ /*
+ * Opening this LSM cursor has opened a number of btree
+ * cursors, ensure other code doesn't think this is the first
+ * cursor in a session.
+ */
+ ++session->ncursors;
WT_RET(__cursor_enter(session));
F_SET(clsm, WT_CLSM_ACTIVE);
}
@@ -284,6 +290,7 @@ __clsm_leave(WT_CURSOR_LSM *clsm)
session = (WT_SESSION_IMPL *)clsm->iface.session;
if (F_ISSET(clsm, WT_CLSM_ACTIVE)) {
+ --session->ncursors;
__cursor_leave(session);
F_CLR(clsm, WT_CLSM_ACTIVE);
}
@@ -365,12 +372,17 @@ __clsm_deleted_decode(WT_CURSOR_LSM *clsm, WT_ITEM *value)
* Close any btree cursors that are not needed.
*/
static int
-__clsm_close_cursors(WT_CURSOR_LSM *clsm, u_int start, u_int end)
+__clsm_close_cursors(
+ WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, u_int start, u_int end)
{
WT_BLOOM *bloom;
WT_CURSOR *c;
u_int i;
+ __wt_verbose(session, WT_VERB_LSM,
+ "LSM closing cursor session(%p):clsm(%p), start: %u, end: %u",
+ (void *)session, (void *)clsm, start, end);
+
if (clsm->chunks == NULL || clsm->nchunks == 0)
return (0);
@@ -609,7 +621,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
saved_gen = lsm_tree->dsk_gen;
locked = false;
__wt_lsm_tree_readunlock(session, lsm_tree);
- WT_ERR(__clsm_close_cursors(
+ WT_ERR(__clsm_close_cursors(session,
clsm, close_range_start, close_range_end));
__wt_lsm_tree_readlock(session, lsm_tree);
locked = true;
@@ -626,6 +638,10 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
clsm->nchunks = nchunks;
/* Open the cursors for chunks that have changed. */
+ __wt_verbose(session, WT_VERB_LSM,
+ "LSM opening cursor session(%p):clsm(%p)%s, chunks: %u, good: %u",
+ (void *)session, (void *)clsm,
+ update ? ", update" : "", nchunks, ngood);
for (i = ngood; i != nchunks; i++) {
chunk = lsm_tree->chunk[i + start_chunk];
/* Copy the maximum transaction ID. */
@@ -1736,7 +1752,7 @@ __wt_clsm_close(WT_CURSOR *cursor)
*/
clsm = (WT_CURSOR_LSM *)cursor;
CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
- WT_TRET(__clsm_close_cursors(clsm, 0, clsm->nchunks));
+ WT_TRET(__clsm_close_cursors(session, clsm, 0, clsm->nchunks));
__clsm_free_chunks(session, clsm);
/* In case we were somehow left positioned, clear that. */
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
index 40ff5fc0b26..1a5c60344bc 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
@@ -72,11 +72,7 @@ __lsm_general_worker_start(WT_SESSION_IMPL *session)
worker_args->type =
WT_LSM_WORK_DROP | WT_LSM_WORK_SWITCH;
else {
- worker_args->type =
- WT_LSM_WORK_BLOOM |
- WT_LSM_WORK_DROP |
- WT_LSM_WORK_FLUSH |
- WT_LSM_WORK_SWITCH;
+ worker_args->type = WT_LSM_WORK_GENERAL_OPS;
/*
* Only allow half of the threads to run merges to
* avoid all all workers getting stuck in long-running
@@ -422,9 +418,10 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
fillms = 10000;
/*
* If the tree appears to not be triggering enough
- * LSM maintenance, help it out. Additional work units
- * don't hurt, and can be necessary if some work
- * units aren't completed for some reason.
+ * LSM maintenance, help it out. Some types of
+ * additional work units don't hurt, and can be
+ * necessary if some work units aren't completed for
+ * some reason.
* If the tree hasn't been modified, and there are
* more than 1 chunks - try to get the tree smaller
* so queries run faster.
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
index 6f18f4fb152..a283670eba6 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
@@ -313,6 +313,37 @@ __wt_lsm_chunk_visible_all(
}
/*
+ * __lsm_set_chunk_evictable --
+ * Enable eviction in an LSM chunk.
+ */
+static int
+__lsm_set_chunk_evictable(
+ WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk, bool need_handle)
+{
+ WT_BTREE *btree;
+ WT_DECL_RET;
+
+ if (chunk->evict_enabled != 0)
+ return (0);
+
+ /* See if we win the race to enable eviction. */
+ if (__wt_atomic_cas32(&chunk->evict_enabled, 0, 1)) {
+ if (need_handle)
+ WT_RET(__wt_session_get_dhandle(
+ session, chunk->uri, NULL, NULL, 0));
+ btree = session->dhandle->handle;
+ if (btree->evict_disabled_open) {
+ btree->evict_disabled_open = false;
+ __wt_evict_file_exclusive_off(session);
+ }
+
+ if (need_handle)
+ WT_TRET(__wt_session_release_dhandle(session));
+ }
+ return (ret);
+}
+
+/*
* __lsm_checkpoint_chunk --
* Checkpoint an LSM chunk, separated out to make locking easier.
*/
@@ -340,7 +371,6 @@ int
__wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
{
- WT_BTREE *btree;
WT_DECL_RET;
WT_TXN_ISOLATION saved_isolation;
bool flush_set, release_dhandle;
@@ -375,6 +405,14 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
WT_RET(__wt_txn_update_oldest(
session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
if (!__wt_lsm_chunk_visible_all(session, chunk)) {
+ /*
+ * If there is cache pressure consider making a chunk evictable
+ * to avoid the cache getting stuck when history is required.
+ */
+ if (__wt_eviction_needed(session, false, false, NULL))
+ WT_ERR(__wt_lsm_manager_push_entry(
+ session, WT_LSM_WORK_ENABLE_EVICT, 0, lsm_tree));
+
__wt_verbose(session, WT_VERB_LSM,
"LSM worker %s: running transaction, return",
chunk->uri);
@@ -446,11 +484,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
* Enable eviction on the live chunk so it doesn't block the cache.
* Future reads should direct to the on-disk chunk anyway.
*/
- btree = session->dhandle->handle;
- if (btree->evict_disabled_open) {
- btree->evict_disabled_open = false;
- __wt_evict_file_exclusive_off(session);
- }
+ WT_ERR(__lsm_set_chunk_evictable(session, chunk, false));
release_dhandle = false;
WT_ERR(__wt_session_release_dhandle(session));
@@ -481,6 +515,54 @@ err: if (flush_set)
}
/*
+ * __wt_lsm_work_enable_evict --
+ * LSM usually pins live chunks in memory - preferring to force them
+ * out via a checkpoint when they are no longer required. For applications
+ * that keep data pinned for a long time this can lead to the cache
+ * being pinned full. This work unit detects that case, and enables
+ * regular eviction in chunks that can be correctly evicted.
+ */
+int
+__wt_lsm_work_enable_evict(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+{
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_WORKER_COOKIE cookie;
+ u_int i;
+
+ WT_CLEAR(cookie);
+
+ /* Only do this if there is cache pressure */
+ if (!__wt_eviction_needed(session, false, false, NULL))
+ return (0);
+
+ WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, false));
+
+ /*
+ * Turn on eviction in chunks that have had some chance to
+ * checkpoint if there is cache pressure.
+ */
+ for (i = 0; cookie.nchunks > 2 && i < cookie.nchunks - 2; i++) {
+ chunk = cookie.chunk_array[i];
+
+ /*
+ * Skip if the chunk isn't on disk yet, or if it's still in
+ * cache for a reason other than transaction visibility.
+ */
+ if (!F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) ||
+ chunk->evict_enabled != 0 ||
+ __wt_lsm_chunk_visible_all(session, chunk))
+ continue;
+
+ WT_ERR(__lsm_set_chunk_evictable(session, chunk, true));
+ }
+
+err: __lsm_unpin_chunks(session, &cookie);
+ __wt_free(session, cookie.chunk_array);
+ return (ret);
+}
+
+/*
* __lsm_bloom_create --
* Create a bloom filter for a chunk of the LSM tree that has been
* checkpointed but not yet been merged.
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_worker.c b/src/third_party/wiredtiger/src/lsm/lsm_worker.c
index 82f72bdf355..8588737f6c3 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_worker.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_worker.c
@@ -42,7 +42,9 @@ __wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args)
/*
* __lsm_worker_general_op --
- * Execute a single bloom, drop or flush work unit.
+ * Execute a single medium importance maintenance operation that should
+ * not be super long running. That includes bloom creation, drop or flush
+ * work unit types.
*/
static int
__lsm_worker_general_op(
@@ -55,11 +57,7 @@ __lsm_worker_general_op(
*completed = false;
- /*
- * Return if this thread cannot process a bloom, drop or flush.
- */
- if (!FLD_ISSET(cookie->type,
- WT_LSM_WORK_BLOOM | WT_LSM_WORK_DROP | WT_LSM_WORK_FLUSH))
+ if (!FLD_ISSET(cookie->type, WT_LSM_WORK_GENERAL_OPS))
return (WT_NOTFOUND);
if ((ret = __wt_lsm_manager_pop_entry(session,
@@ -88,6 +86,8 @@ __lsm_worker_general_op(
WT_ERR(__wt_lsm_free_chunks(session, entry->lsm_tree));
else if (entry->type == WT_LSM_WORK_BLOOM)
WT_ERR(__wt_lsm_work_bloom(session, entry->lsm_tree));
+ else if (entry->type == WT_LSM_WORK_ENABLE_EVICT)
+ WT_ERR(__wt_lsm_work_enable_evict(session, entry->lsm_tree));
*completed = true;
err: __wt_lsm_manager_free_work_unit(session, entry);
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index eb3b0038525..c0ce4c2d235 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -47,10 +47,11 @@ typedef struct {
/* Track the page's min/maximum transactions. */
uint64_t max_txn;
- uint64_t min_txn_unstable;
WT_DECL_TIMESTAMP(max_timestamp)
- WT_DECL_TIMESTAMP(max_onpage_timestamp)
- WT_DECL_TIMESTAMP(min_saved_timestamp)
+
+ /* Lookaside boundary tracking. */
+ uint64_t unstable_txn;
+ WT_DECL_TIMESTAMP(unstable_timestamp)
u_int updates_seen; /* Count of updates seen. */
u_int updates_unstable; /* Count of updates not visible_all. */
@@ -422,14 +423,27 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
return (EBUSY);
}
+ /* Initialize the reconciliation structure for each new run. */
+ if ((ret = __rec_init(
+ session, ref, flags, salvage, &session->reconcile)) != 0) {
+ WT_PAGE_UNLOCK(session, page);
+ return (ret);
+ }
+ r = session->reconcile;
+
oldest_id = __wt_txn_oldest_id(session);
+
+ /*
+ * During eviction, save the transaction state that causes history to
+ * be pinned, regardless of whether reconciliation succeeds or fails.
+ * There is usually no point retrying eviction until this state
+ * changes.
+ */
if (LF_ISSET(WT_REC_EVICT)) {
mod->last_eviction_id = oldest_id;
#ifdef HAVE_TIMESTAMPS
- WT_WITH_TIMESTAMP_READLOCK(session,
- &S2C(session)->txn_global.rwlock,
- __wt_timestamp_set(&mod->last_eviction_timestamp,
- &S2C(session)->txn_global.pinned_timestamp));
+ __wt_txn_pinned_timestamp(
+ session, &mod->last_eviction_timestamp);
#endif
mod->last_evict_pass_gen = S2C(session)->cache->evict_pass_gen;
}
@@ -444,14 +458,6 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
mod->last_oldest_id = oldest_id;
#endif
- /* Initialize the reconciliation structure for each new run. */
- if ((ret = __rec_init(
- session, ref, flags, salvage, &session->reconcile)) != 0) {
- WT_PAGE_UNLOCK(session, page);
- return (ret);
- }
- r = session->reconcile;
-
/* Reconcile the page. */
switch (page->type) {
case WT_PAGE_COL_FIX:
@@ -495,6 +501,17 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
else
WT_TRET(__rec_write_wrapup_err(session, r, page));
+#ifdef HAVE_TIMESTAMPS
+ /*
+ * If reconciliation completes successfully, save the stable timestamp.
+ */
+ if (ret == 0 && S2C(session)->txn_global.has_stable_timestamp)
+ WT_WITH_TIMESTAMP_READLOCK(session,
+ &S2C(session)->txn_global.rwlock,
+ __wt_timestamp_set(&mod->last_stable_timestamp,
+ &S2C(session)->txn_global.stable_timestamp));
+#endif
+
/* Release the reconciliation lock. */
WT_PAGE_UNLOCK(session, page);
@@ -681,7 +698,7 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r)
} else {
/*
* Track the page's maximum transaction ID (used to decide if
- * we're likely to be able to evict this page in the future).
+ * we can evict a clean page and discard its history).
*/
mod->rec_max_txn = r->max_txn;
__wt_timestamp_set(&mod->rec_max_timestamp, &r->max_timestamp);
@@ -873,7 +890,6 @@ __rec_init(WT_SESSION_IMPL *session,
WT_PAGE *page;
WT_RECONCILE *r;
WT_TXN_GLOBAL *txn_global;
- bool las_skew_oldest;
btree = S2BT(session);
page = ref->page;
@@ -928,27 +944,24 @@ __rec_init(WT_SESSION_IMPL *session,
* We usually prefer to skew to newer versions, the logic being that by
* the time the next checkpoint runs, it is likely that all the updates
* we choose will be stable. However, if checkpointing with a
- * timestamp (indicated by a stable_timestamp being set), and the
- * timestamp hasn't changed since the last time this page was
- * reconciled, skew oldest instead. If a checkpoint is already running,
- * the oldest version is more likely to be what it needs.
+ * timestamp (indicated by a stable_timestamp being set), and there is
+ * a checkpoint already running, or this page was read with lookaside
+ * history, or the stable timestamp hasn't changed since last time this
+ * page was successfully, skew oldest instead.
*/
- if (__wt_btree_immediately_durable(session))
- las_skew_oldest = false;
- else {
- WT_ORDERED_READ(las_skew_oldest,
- txn_global->has_stable_timestamp);
- if (las_skew_oldest) {
- las_skew_oldest = (ref->page_las != NULL &&
- !__wt_txn_visible_all(session, WT_TXN_NONE,
- WT_TIMESTAMP_NULL(
- &ref->page_las->min_timestamp))) ||
- btree->checkpoint_gen !=
- __wt_gen(session, WT_GEN_CHECKPOINT);
- }
- }
- r->las_skew_newest = LF_ISSET(WT_REC_LOOKASIDE) &&
- LF_ISSET(WT_REC_VISIBLE_ALL) && !las_skew_oldest;
+ r->las_skew_newest =
+ LF_ISSET(WT_REC_LOOKASIDE) && LF_ISSET(WT_REC_VISIBLE_ALL);
+#ifdef HAVE_TIMESTAMPS
+ if (r->las_skew_newest &&
+ !__wt_btree_immediately_durable(session) &&
+ txn_global->has_stable_timestamp &&
+ ((btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT) &&
+ txn_global->stable_is_pinned) ||
+ FLD_ISSET(page->modify->restore_state, WT_PAGE_RS_LOOKASIDE) ||
+ __wt_timestamp_cmp(&page->modify->last_stable_timestamp,
+ &txn_global->stable_timestamp) == 0))
+ r->las_skew_newest = false;
+#endif
/*
* When operating on the lookaside table, we should never try
@@ -979,10 +992,21 @@ __rec_init(WT_SESSION_IMPL *session,
/* Track the page's min/maximum transaction */
r->max_txn = WT_TXN_NONE;
- r->min_txn_unstable = WT_TXN_ABORTED;
__wt_timestamp_set_zero(&r->max_timestamp);
- __wt_timestamp_set_zero(&r->max_onpage_timestamp);
- __wt_timestamp_set_inf(&r->min_saved_timestamp);
+
+ /*
+ * Track the first unstable transaction (when skewing newest this is
+ * the newest update, otherwise the newest update not on the page).
+ * This is the boundary between the on-page information and the history
+ * stored in the lookaside table.
+ */
+ if (r->las_skew_newest) {
+ r->unstable_txn = WT_TXN_NONE;
+ __wt_timestamp_set_zero(&r->unstable_timestamp);
+ } else {
+ r->unstable_txn = WT_TXN_ABORTED;
+ __wt_timestamp_set_inf(&r->unstable_timestamp);
+ }
/* Track if updates were used and/or uncommitted. */
r->updates_seen = r->updates_unstable = 0;
@@ -1264,7 +1288,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
WT_UPDATE *first_txn_upd, *first_upd, *upd;
wt_timestamp_t *timestampp;
size_t upd_memsize;
- uint64_t max_txn, min_txn_unstable, txnid;
+ uint64_t max_txn, txnid;
bool all_visible, skipped_birthmark, uncommitted;
#ifdef HAVE_TIMESTAMPS
@@ -1280,7 +1304,6 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
first_txn_upd = NULL;
upd_memsize = 0;
max_txn = WT_TXN_NONE;
- min_txn_unstable = WT_TXN_ABORTED;
skipped_birthmark = uncommitted = false;
/*
@@ -1380,13 +1403,6 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (upd->type == WT_UPDATE_BIRTHMARK)
skipped_birthmark = true;
- /*
- * Track minimum transaction ID for unstable updates.
- */
- if (txnid != WT_TXN_NONE &&
- WT_TXNID_LT(txnid, min_txn_unstable))
- min_txn_unstable = txnid;
-
continue;
}
@@ -1434,23 +1450,11 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (WT_TXNID_LT(r->max_txn, max_txn))
r->max_txn = max_txn;
- /*
- * Track the oldest unstable transaction in the page. It is used to
- * decide whether to or not to read the history during a page read.
- */
- if (WT_TXNID_LT(min_txn_unstable, r->min_txn_unstable))
- r->min_txn_unstable = min_txn_unstable;
-
#ifdef HAVE_TIMESTAMPS
/* Update the maximum timestamp. */
if (first_ts_upd != NULL &&
__wt_timestamp_cmp(&r->max_timestamp, &first_ts_upd->timestamp) < 0)
__wt_timestamp_set(&r->max_timestamp, &first_ts_upd->timestamp);
-
- /* Update the maximum on-page timestamp. */
- if (upd != NULL &&
- __wt_timestamp_cmp(&upd->timestamp, &r->max_onpage_timestamp) > 0)
- __wt_timestamp_set(&r->max_onpage_timestamp, &upd->timestamp);
#endif
/*
@@ -1527,24 +1531,38 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (upd_savedp != NULL)
*upd_savedp = true;
+ /*
+ * Track the first off-page update when saving history in the lookaside
+ * table. When skewing newest, we want the first (non-aborted) update
+ * after the one stored on the page. Otherwise, we want the update
+ * before the on-page update.
+ */
+ if (F_ISSET(r, WT_REC_LOOKASIDE) && r->las_skew_newest) {
+ if (WT_TXNID_LT(r->unstable_txn, first_upd->txnid))
+ r->unstable_txn = first_upd->txnid;
#ifdef HAVE_TIMESTAMPS
- /* Track the oldest saved timestamp for lookaside. */
- if (F_ISSET(r, WT_REC_LOOKASIDE)) {
- /* If no updates had timestamps, we're done. */
- if (first_ts_upd == NULL)
- __wt_timestamp_set_zero(&r->min_saved_timestamp);
+ if (first_ts_upd != NULL &&
+ __wt_timestamp_cmp(&r->unstable_timestamp,
+ &first_ts_upd->timestamp) < 0)
+ __wt_timestamp_set(&r->unstable_timestamp,
+ &first_ts_upd->timestamp);
+#endif
+ } else if (F_ISSET(r, WT_REC_LOOKASIDE)) {
for (upd = first_upd; upd != *updp; upd = upd->next) {
- if (upd->txnid != WT_TXN_ABORTED &&
- __wt_timestamp_cmp(&upd->timestamp,
- &r->min_saved_timestamp) < 0)
- __wt_timestamp_set(&r->min_saved_timestamp,
- &upd->timestamp);
+ if (upd->txnid == WT_TXN_ABORTED)
+ continue;
- WT_ASSERT(session, upd->txnid == WT_TXN_ABORTED ||
- WT_TXNID_LE(upd->txnid, r->max_txn));
+ if (upd->txnid != WT_TXN_NONE &&
+ WT_TXNID_LT(upd->txnid, r->unstable_txn))
+ r->unstable_txn = upd->txnid;
+#ifdef HAVE_TIMESTAMPS
+ if (__wt_timestamp_cmp(&upd->timestamp,
+ &r->unstable_timestamp) < 0)
+ __wt_timestamp_set(&r->unstable_timestamp,
+ &upd->timestamp);
+#endif
}
}
-#endif
check_original_value:
/*
@@ -3429,16 +3447,15 @@ __rec_split_write_supd(WT_SESSION_IMPL *session,
done: if (F_ISSET(r, WT_REC_LOOKASIDE)) {
/* Track the oldest lookaside timestamp seen so far. */
- multi->page_las.las_skew_newest = r->las_skew_newest;
- multi->page_las.las_max_txn = r->max_txn;
- multi->page_las.las_min_txn = r->min_txn_unstable;
- WT_ASSERT(session, r->max_txn != WT_TXN_NONE);
- WT_ASSERT(session, r->min_txn_unstable != WT_TXN_NONE);
+ multi->page_las.skew_newest = r->las_skew_newest;
+ multi->page_las.max_txn = r->max_txn;
+ multi->page_las.unstable_txn = r->unstable_txn;
+ WT_ASSERT(session, r->unstable_txn != WT_TXN_NONE);
#ifdef HAVE_TIMESTAMPS
- __wt_timestamp_set(&multi->page_las.min_timestamp,
- &r->min_saved_timestamp);
- __wt_timestamp_set(&multi->page_las.onpage_timestamp,
- &r->max_onpage_timestamp);
+ __wt_timestamp_set(&multi->page_las.max_timestamp,
+ &r->max_timestamp);
+ __wt_timestamp_set(&multi->page_las.unstable_timestamp,
+ &r->unstable_timestamp);
#endif
}
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 909019b3b24..0d39a5b682e 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -840,6 +840,7 @@ static const char * const __stats_connection_desc[] = {
"cache: maximum page size at eviction",
"cache: modified pages evicted",
"cache: modified pages evicted by application threads",
+ "cache: operations timed out waiting for space in cache",
"cache: overflow pages read into cache",
"cache: page split during eviction deepened the tree",
"cache: page written requiring lookaside records",
@@ -1097,6 +1098,7 @@ static const char * const __stats_connection_desc[] = {
"transaction: transaction range of IDs currently pinned by a checkpoint",
"transaction: transaction range of IDs currently pinned by named snapshots",
"transaction: transaction range of timestamps currently pinned",
+ "transaction: transaction range of timestamps pinned by a checkpoint",
"transaction: transaction range of timestamps pinned by the oldest timestamp",
"transaction: transaction sync calls",
"transaction: transactions committed",
@@ -1237,6 +1239,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing cache_eviction_maximum_page_size */
stats->cache_eviction_dirty = 0;
stats->cache_eviction_app_dirty = 0;
+ stats->cache_timed_out_ops = 0;
stats->cache_read_overflow = 0;
stats->cache_eviction_deepen = 0;
stats->cache_write_lookaside = 0;
@@ -1494,6 +1497,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing txn_pinned_checkpoint_range */
/* not clearing txn_pinned_snapshot_range */
/* not clearing txn_pinned_timestamp */
+ /* not clearing txn_pinned_timestamp_checkpoint */
/* not clearing txn_pinned_timestamp_oldest */
stats->txn_sync = 0;
stats->txn_commit = 0;
@@ -1662,6 +1666,7 @@ __wt_stat_connection_aggregate(
to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty);
to->cache_eviction_app_dirty +=
WT_STAT_READ(from, cache_eviction_app_dirty);
+ to->cache_timed_out_ops += WT_STAT_READ(from, cache_timed_out_ops);
to->cache_read_overflow += WT_STAT_READ(from, cache_read_overflow);
to->cache_eviction_deepen +=
WT_STAT_READ(from, cache_eviction_deepen);
@@ -2047,6 +2052,8 @@ __wt_stat_connection_aggregate(
to->txn_pinned_snapshot_range +=
WT_STAT_READ(from, txn_pinned_snapshot_range);
to->txn_pinned_timestamp += WT_STAT_READ(from, txn_pinned_timestamp);
+ to->txn_pinned_timestamp_checkpoint +=
+ WT_STAT_READ(from, txn_pinned_timestamp_checkpoint);
to->txn_pinned_timestamp_oldest +=
WT_STAT_READ(from, txn_pinned_timestamp_oldest);
to->txn_sync += WT_STAT_READ(from, txn_sync);
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index cf233ab9a5d..4cb780c0042 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -119,9 +119,11 @@ void
__wt_txn_release_snapshot(WT_SESSION_IMPL *session)
{
WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *txn_state;
txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
txn_state = WT_SESSION_TXN_STATE(session);
WT_ASSERT(session,
@@ -131,6 +133,14 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session)
txn_state->metadata_pinned = txn_state->pinned_id = WT_TXN_NONE;
F_CLR(txn, WT_TXN_HAS_SNAPSHOT);
+
+ /* Clear a checkpoint's pinned ID. */
+ if (WT_SESSION_IS_CHECKPOINT(session)) {
+ txn_global->checkpoint_state.pinned_id = WT_TXN_NONE;
+ __wt_timestamp_set_zero(&txn_global->checkpoint_timestamp);
+ }
+
+ __wt_txn_clear_read_timestamp(session);
}
/*
@@ -528,8 +538,7 @@ __wt_txn_release(WT_SESSION_IMPL *session)
if (WT_SESSION_IS_CHECKPOINT(session)) {
WT_ASSERT(session,
WT_SESSION_TXN_STATE(session)->id == WT_TXN_NONE);
- txn->id = txn_global->checkpoint_state.id =
- txn_global->checkpoint_state.pinned_id = WT_TXN_NONE;
+ txn->id = txn_global->checkpoint_state.id = WT_TXN_NONE;
/*
* Be extra careful to cleanup everything for checkpoints: once
@@ -548,7 +557,6 @@ __wt_txn_release(WT_SESSION_IMPL *session)
}
__wt_txn_clear_commit_timestamp(session);
- __wt_txn_clear_read_timestamp(session);
/* Free the scratch buffer allocated for logging. */
__wt_logrec_free(session, &txn->logrec);
@@ -1283,12 +1291,24 @@ __wt_txn_stats_update(WT_SESSION_IMPL *session)
txn_global->current - txn_global->oldest_id);
#if WT_TIMESTAMP_SIZE == 8
+ {
+ WT_DECL_TIMESTAMP(checkpoint_timestamp)
+ WT_DECL_TIMESTAMP(commit_timestamp)
+ WT_DECL_TIMESTAMP(pinned_timestamp)
+
+ checkpoint_timestamp = txn_global->checkpoint_timestamp;
+ commit_timestamp = txn_global->commit_timestamp;
+ pinned_timestamp = txn_global->pinned_timestamp;
+ if (checkpoint_timestamp.val != 0 &&
+ checkpoint_timestamp.val < pinned_timestamp.val)
+ pinned_timestamp = checkpoint_timestamp;
WT_STAT_SET(session, stats, txn_pinned_timestamp,
- txn_global->commit_timestamp.val -
- txn_global->pinned_timestamp.val);
+ commit_timestamp.val - pinned_timestamp.val);
+ WT_STAT_SET(session, stats, txn_pinned_timestamp_checkpoint,
+ commit_timestamp.val - checkpoint_timestamp.val);
WT_STAT_SET(session, stats, txn_pinned_timestamp_oldest,
- txn_global->commit_timestamp.val -
- txn_global->oldest_timestamp.val);
+ commit_timestamp.val - txn_global->oldest_timestamp.val);
+ }
#endif
WT_STAT_SET(session, stats, txn_pinned_snapshot_range,
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index b0ba6735056..ad8351923a0 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -372,12 +372,11 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
{
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
- double current_dirty, delta, scrub_min;
- uint64_t bytes_written_last, bytes_written_start, bytes_written_total;
+ double current_dirty, prev_dirty;
+ uint64_t bytes_written_start, bytes_written_total;
uint64_t cache_size, max_write;
- uint64_t current_us, stepdown_us, total_ms, work_us;
- uint64_t time_last, time_start, time_stop;
- bool progress;
+ uint64_t time_start, time_stop;
+ uint64_t total_ms;
conn = S2C(session);
cache = conn->cache;
@@ -388,61 +387,41 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
* scrubbing cannot help).
*/
if (F_ISSET(conn, WT_CONN_CLOSING_TIMESTAMP) ||
- cache->eviction_checkpoint_target < DBL_EPSILON ||
- cache->eviction_checkpoint_target >= cache->eviction_dirty_trigger)
+ cache->eviction_checkpoint_target < DBL_EPSILON)
return;
- time_last = time_start = __wt_clock(session);
- bytes_written_last = 0;
+ time_start = __wt_clock(session);
bytes_written_start = cache->bytes_written;
- cache_size = conn->cache_size;
+
/*
* If the cache size is zero or very small, we're done. The cache
* size can briefly become zero if we're transitioning to a shared
* cache via reconfigure. This avoids potential divide by zero.
*/
- if (cache_size < 10 * WT_MEGABYTE)
+ if ((cache_size = conn->cache_size) < 10 * WT_MEGABYTE)
return;
- /*
- * Skip scrubbing if it won't perform at-least some minimum amount of
- * work. Scrubbing is supposed to bring down the dirty data to eviction
- * checkpoint target before the actual checkpoint starts. Do not perform
- * scrubbing if the dirty data to scrub is less than a pre-configured
- * size. This size is to an extent based on the configured cache size
- * without being too large or too small for large cache sizes. For the
- * values chosen, for instance, 100 GB cache will require at-least
- * 200 MB of dirty data above eviction checkpoint target, which should
- * equate to a scrub phase a few seconds long. That said, the value of
- * 0.2% and 500 MB are still somewhat arbitrary.
- */
- scrub_min = WT_MIN((0.2 * conn->cache_size) / 100, 500 * WT_MEGABYTE);
- if (__wt_cache_dirty_leaf_inuse(cache) <
- ((cache->eviction_checkpoint_target * conn->cache_size) / 100) +
- scrub_min)
+ current_dirty =
+ (100.0 * __wt_cache_dirty_leaf_inuse(cache)) / cache_size;
+ if (current_dirty <= cache->eviction_checkpoint_target)
return;
- stepdown_us = 10000;
- work_us = 0;
- progress = false;
-
- /* Step down the scrub target (as a percentage) in units of 10MB. */
- delta = WT_MIN(1.0, (100 * 10.0 * WT_MEGABYTE) / cache_size);
-
- /*
- * Start with the scrub target equal to the expected maximum percentage
- * of dirty data in cache.
- */
- cache->eviction_scrub_limit = cache->eviction_dirty_trigger;
-
/* Stop if we write as much dirty data as is currently in cache. */
max_write = __wt_cache_dirty_leaf_inuse(cache);
- /* Step down the dirty target to the eviction trigger */
+ /* Set the dirty trigger to the target value. */
+ cache->eviction_scrub_target = cache->eviction_checkpoint_target;
+ WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0);
+
+ /* Wait while the dirty level is going down. */
for (;;) {
+ __wt_sleep(0, 100 * WT_THOUSAND);
+
+ prev_dirty = current_dirty;
current_dirty =
(100.0 * __wt_cache_dirty_leaf_inuse(cache)) / cache_size;
- if (current_dirty <= cache->eviction_checkpoint_target)
+ if (current_dirty <= cache->eviction_checkpoint_target ||
+ current_dirty >= prev_dirty)
break;
/*
@@ -452,63 +431,17 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
if (F_ISSET(cache, WT_CACHE_EVICT_LOOKASIDE))
break;
- __wt_sleep(0, stepdown_us / 10);
- time_stop = __wt_clock(session);
- current_us = WT_CLOCKDIFF_US(time_stop, time_last);
- bytes_written_total =
- cache->bytes_written - bytes_written_start;
-
- if (current_dirty > cache->eviction_scrub_limit) {
- /*
- * We haven't reached the current target.
- *
- * Don't wait indefinitely: there might be dirty pages
- * that can't be evicted. If we can't meet the target,
- * give up and start the checkpoint for real.
- */
- if (current_us > WT_MAX(WT_MILLION, 10 * stepdown_us) ||
- bytes_written_total > max_write)
- break;
- continue;
- }
-
/*
- * Estimate how long the next step down of dirty data should
- * take.
- *
- * The calculation here assumes that the system is writing from
- * cache as fast as it can, and determines the write throughput
- * based on the change in the bytes written from cache since
- * the start of the call. We use that to estimate how long it
- * will take to step the dirty target down by delta.
+ * We haven't reached the current target.
*
- * Take care to avoid dividing by zero.
- */
- if (bytes_written_total - bytes_written_last > WT_MEGABYTE &&
- work_us > 0) {
- stepdown_us = (uint64_t)((delta * cache_size / 100) /
- ((double)bytes_written_total / work_us));
- stepdown_us = WT_MAX(1, stepdown_us);
- if (!progress)
- stepdown_us = WT_MIN(stepdown_us, 200000);
- progress = true;
-
- bytes_written_last = bytes_written_total;
- }
-
- work_us += current_us;
-
- /*
- * Smooth out step down: try to limit the impact on
- * performance to 10% by waiting once we reach the last
- * level.
+ * Don't wait indefinitely: there might be dirty pages
+ * that can't be evicted. If we can't meet the target,
+ * give up and start the checkpoint for real.
*/
- __wt_sleep(0, 10 * stepdown_us);
- cache->eviction_scrub_limit =
- WT_MAX(cache->eviction_dirty_target, current_dirty - delta);
- WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target,
- cache->eviction_scrub_limit);
- time_last = __wt_clock(session);
+ bytes_written_total =
+ cache->bytes_written - bytes_written_start;
+ if (bytes_written_total > max_write)
+ break;
}
time_stop = __wt_clock(session);
@@ -681,8 +614,7 @@ __checkpoint_prepare(
*/
__wt_writelock(session, &txn_global->rwlock);
txn_global->checkpoint_state = *txn_state;
- txn_global->checkpoint_txn = txn;
- txn_global->checkpoint_state.pinned_id = WT_MIN(txn->id, txn->snap_min);
+ txn_global->checkpoint_state.pinned_id = txn->snap_min;
/*
* Sanity check that the oldest ID hasn't moved on before we have
@@ -724,6 +656,8 @@ __checkpoint_prepare(
if (txn_global->has_stable_timestamp) {
__wt_timestamp_set(&txn->read_timestamp,
&txn_global->stable_timestamp);
+ __wt_timestamp_set(&txn_global->checkpoint_timestamp,
+ &txn->read_timestamp);
F_SET(txn, WT_TXN_HAS_TS_READ);
if (!F_ISSET(conn, WT_CONN_RECOVERING))
__wt_timestamp_set(
@@ -975,7 +909,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* Unblock updates -- we can figure out that any updates to clean pages
* after this point are too new to be written in the checkpoint.
*/
- cache->eviction_scrub_limit = 0.0;
+ cache->eviction_scrub_target = 0.0;
WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0);
/* Tell logging that we have started a database checkpoint. */
@@ -1125,7 +1059,7 @@ err: /*
if (tracking)
WT_TRET(__wt_meta_track_off(session, false, failed));
- cache->eviction_scrub_limit = 0.0;
+ cache->eviction_scrub_target = 0.0;
WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0);
if (F_ISSET(txn, WT_TXN_RUNNING)) {
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 35a89eeb072..e01db53fda9 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -275,8 +275,9 @@ __txn_rollback_to_stable_btree_walk(
WT_READ_CACHE | WT_READ_LOOKASIDE | WT_READ_NO_EVICT)) == 0 &&
ref != NULL) {
if (ref->page_las != NULL &&
+ ref->page_las->skew_newest &&
__wt_timestamp_cmp(rollback_timestamp,
- &ref->page_las->onpage_timestamp) < 0)
+ &ref->page_las->unstable_timestamp) < 0)
ref->page_las->invalid = true;
/* Review deleted page saved to the ref */
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index a10ff740df6..64887c9a583 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -193,6 +193,44 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name,
}
/*
+ * __txn_get_pinned_timestamp --
+ * Calculate the current pinned timestamp.
+ */
+static int
+__txn_get_pinned_timestamp(
+ WT_SESSION_IMPL *session, wt_timestamp_t *tsp, bool include_checkpoint)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+
+ if (!txn_global->has_oldest_timestamp)
+ return (WT_NOTFOUND);
+ __wt_readlock(session, &txn_global->rwlock);
+ __wt_timestamp_set(tsp, &txn_global->oldest_timestamp);
+
+ /* Check for a running checkpoint */
+ if (include_checkpoint &&
+ !__wt_timestamp_iszero(&txn_global->checkpoint_timestamp) &&
+ __wt_timestamp_cmp(&txn_global->checkpoint_timestamp, tsp) < 0)
+ __wt_timestamp_set(tsp, &txn_global->checkpoint_timestamp);
+ __wt_readunlock(session, &txn_global->rwlock);
+
+ /* Look for the oldest ordinary reader. */
+ __wt_readlock(session, &txn_global->read_timestamp_rwlock);
+ txn = TAILQ_FIRST(&txn_global->read_timestamph);
+ if (txn != NULL &&
+ __wt_timestamp_cmp(&txn->read_timestamp, tsp) < 0)
+ __wt_timestamp_set(tsp, &txn->read_timestamp);
+ __wt_readunlock(session, &txn_global->read_timestamp_rwlock);
+
+ return (0);
+}
+
+/*
* __txn_global_query_timestamp --
* Query a timestamp.
*/
@@ -248,26 +286,7 @@ __txn_global_query_timestamp(
WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
__wt_timestamp_set(&ts, &txn_global->oldest_timestamp));
} else if (WT_STRING_MATCH("pinned", cval.str, cval.len)) {
- if (!txn_global->has_oldest_timestamp)
- return (WT_NOTFOUND);
- __wt_readlock(session, &txn_global->rwlock);
- __wt_timestamp_set(&ts, &txn_global->oldest_timestamp);
-
- /* Check for a running checkpoint */
- txn = txn_global->checkpoint_txn;
- if (txn_global->checkpoint_state.pinned_id != WT_TXN_NONE &&
- !__wt_timestamp_iszero(&txn->read_timestamp) &&
- __wt_timestamp_cmp(&txn->read_timestamp, &ts) < 0)
- __wt_timestamp_set(&ts, &txn->read_timestamp);
- __wt_readunlock(session, &txn_global->rwlock);
-
- /* Look for the oldest ordinary reader. */
- __wt_readlock(session, &txn_global->read_timestamp_rwlock);
- txn = TAILQ_FIRST(&txn_global->read_timestamph);
- if (txn != NULL &&
- __wt_timestamp_cmp(&txn->read_timestamp, &ts) < 0)
- __wt_timestamp_set(&ts, &txn->read_timestamp);
- __wt_readunlock(session, &txn_global->read_timestamp_rwlock);
+ WT_RET(__txn_get_pinned_timestamp(session, &ts, true));
} else if (WT_STRING_MATCH("recovery", cval.str, cval.len))
/* Read-only value forever. No lock needed. */
__wt_timestamp_set(&ts, &txn_global->recovery_timestamp);
@@ -320,8 +339,6 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force)
WT_TXN_GLOBAL *txn_global;
wt_timestamp_t active_timestamp, last_pinned_timestamp;
wt_timestamp_t oldest_timestamp, pinned_timestamp;
- const char *query_cfg[] = { WT_CONFIG_BASE(session,
- WT_CONNECTION_query_timestamp), "get=pinned", NULL };
txn_global = &S2C(session)->txn_global;
@@ -334,8 +351,8 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force)
&oldest_timestamp, &txn_global->oldest_timestamp));
/* Scan to find the global pinned timestamp. */
- if ((ret = __txn_global_query_timestamp(
- session, &active_timestamp, query_cfg)) != 0)
+ if ((ret = __txn_get_pinned_timestamp(
+ session, &active_timestamp, false)) != 0)
return (ret == WT_NOTFOUND ? 0 : ret);
if (__wt_timestamp_cmp(&oldest_timestamp, &active_timestamp) < 0) {
@@ -362,6 +379,9 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force)
txn_global->oldest_is_pinned = __wt_timestamp_cmp(
&txn_global->pinned_timestamp,
&txn_global->oldest_timestamp) == 0;
+ txn_global->stable_is_pinned = __wt_timestamp_cmp(
+ &txn_global->pinned_timestamp,
+ &txn_global->stable_timestamp) == 0;
__wt_verbose_timestamp(session,
&pinned_timestamp, "Updated pinned timestamp");
}
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index 51dc906465a..c398c1a96b2 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -331,41 +331,49 @@ static CONFIG c[] = {
C_IGNORE, 0, 0, UINT_MAX, &g.c_timer, NULL },
{ "timing_stress_checkpoint",
- "configure slow checkpoints", /* 2% */
+ "stress checkpoints", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_checkpoint, NULL },
{ "timing_stress_lookaside_sweep",
- "configure slow lookaside sweep", /* 2% */
+ "stress lookaside sweep", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_lookaside_sweep, NULL },
{ "timing_stress_split_1",
- "configure slow splits (#1)", /* 2% */
+ "stress splits (#1)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_1, NULL },
{ "timing_stress_split_2",
- "configure slow splits (#2)", /* 2% */
+ "stress splits (#2)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_2, NULL },
{ "timing_stress_split_3",
- "configure slow splits (#3)", /* 2% */
+ "stress splits (#3)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_3, NULL },
{ "timing_stress_split_4",
- "configure slow splits (#4)", /* 2% */
+ "stress splits (#4)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_4, NULL },
{ "timing_stress_split_5",
- "configure slow splits (#5)", /* 2% */
+ "stress splits (#5)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_5, NULL },
{ "timing_stress_split_6",
- "configure slow splits (#6)", /* 2% */
+ "stress splits (#6)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_6, NULL },
{ "timing_stress_split_7",
- "configure slow splits (#7)", /* 2% */
+ "stress splits (#7)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_7, NULL },
+ { "timing_stress_split_8",
+ "stress splits (#8)", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_8, NULL },
+
+ { "timing_stress_split_9",
+ "stress splits (#9)", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_9, NULL },
+
{ "transaction_timestamps", /* 10% */
"enable transaction timestamp support",
C_BOOL, 10, 0, 0, &g.c_txn_timestamps, NULL },
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index 0eca6657dd9..1406d2b3fb5 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -221,6 +221,8 @@ typedef struct {
uint32_t c_timing_stress_split_5;
uint32_t c_timing_stress_split_6;
uint32_t c_timing_stress_split_7;
+ uint32_t c_timing_stress_split_8;
+ uint32_t c_timing_stress_split_9;
uint32_t c_truncate;
uint32_t c_txn_freq;
uint32_t c_txn_timestamps;
diff --git a/src/third_party/wiredtiger/test/format/lrt.c b/src/third_party/wiredtiger/test/format/lrt.c
index 9d99933ef64..31c5de93870 100644
--- a/src/third_party/wiredtiger/test/format/lrt.c
+++ b/src/third_party/wiredtiger/test/format/lrt.c
@@ -110,8 +110,15 @@ lrt(void *arg)
*/
testutil_check(session->snapshot(session, "name=test"));
__wt_sleep(1, 0);
- testutil_check(session->begin_transaction(
- session, "snapshot=test"));
+ /*
+ * Keep trying to start a new transaction if it's
+ * timing out - we know there aren't any resources
+ * pinned so it should succeed eventually.
+ */
+ while ((ret = session->begin_transaction(
+ session, "snapshot=test")) == WT_CACHE_FULL)
+ ;
+ testutil_check(ret);
testutil_check(session->snapshot(
session, "drop=(all)"));
testutil_check(session->commit_transaction(
@@ -123,8 +130,10 @@ lrt(void *arg)
* positioned. As soon as the cursor loses its position
* a new snapshot will be allocated.
*/
- testutil_check(session->begin_transaction(
- session, "isolation=snapshot"));
+ while ((ret = session->begin_transaction(
+ session, "snapshot=snapshot")) == WT_CACHE_FULL)
+ ;
+ testutil_check(ret);
/* Read a record at the end of the table. */
do {
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 7d08dbd8bd8..b3f5fd51ce2 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -498,6 +498,7 @@ static void
begin_transaction(TINFO *tinfo, WT_SESSION *session, u_int *iso_configp)
{
u_int v;
+ int ret;
const char *config;
char config_buf[64];
bool locked;
@@ -523,7 +524,15 @@ begin_transaction(TINFO *tinfo, WT_SESSION *session, u_int *iso_configp)
}
*iso_configp = v;
- testutil_check(session->begin_transaction(session, config));
+ /*
+ * Keep trying to start a new transaction if it's timing out - we
+ * know there aren't any resources pinned so it should succeed
+ * eventually.
+ */
+ while ((ret =
+ session->begin_transaction(session, config)) == WT_CACHE_FULL)
+ ;
+ testutil_check(ret);
if (v == ISOLATION_SNAPSHOT && g.c_txn_timestamps) {
/* Avoid starting a new reader when a prepare is in progress. */
diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c
index dd87adeae56..8040142aa19 100644
--- a/src/third_party/wiredtiger/test/format/wts.c
+++ b/src/third_party/wiredtiger/test/format/wts.c
@@ -262,6 +262,10 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp)
CONFIG_APPEND(p, ",split_6");
if (g.c_timing_stress_split_7)
CONFIG_APPEND(p, ",split_7");
+ if (g.c_timing_stress_split_8)
+ CONFIG_APPEND(p, ",split_8");
+ if (g.c_timing_stress_split_9)
+ CONFIG_APPEND(p, ",split_9");
CONFIG_APPEND(p, "]");
/* Extensions. */
diff --git a/src/third_party/wiredtiger/test/suite/test_las03.py b/src/third_party/wiredtiger/test/suite/test_las03.py
new file mode 100644
index 00000000000..6934bd9a741
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_las03.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+from helper import copy_wiredtiger_home
+import wiredtiger, wttest
+from wiredtiger import stat
+from wtdataset import SimpleDataSet
+
+def timestamp_str(t):
+ return '%x' % t
+
+# test_las03.py
+# Ensure checkpoints don't read too unnecessary lookaside entries.
+class test_las03(wttest.WiredTigerTestCase):
+ # Force a small cache.
+ def conn_config(self):
+ return 'cache_size=50MB,statistics=(fast)'
+
+ def get_stat(self, stat):
+ stat_cursor = self.session.open_cursor('statistics:')
+ val = stat_cursor[stat][2]
+ stat_cursor.close()
+ return val
+
+ def large_updates(self, session, uri, value, ds, nrows, nops):
+ # Update a large number of records, we'll hang if the lookaside table
+ # isn't doing its thing.
+ cursor = session.open_cursor(uri)
+ for i in range(nrows + 1, nrows + nops + 1):
+ session.begin_transaction()
+ cursor[ds.key(i)] = value
+ session.commit_transaction('commit_timestamp=' + timestamp_str(i))
+ cursor.close()
+
+ def test_checkpoint_las_reads(self):
+ if not wiredtiger.timestamp_build():
+ self.skipTest('requires a timestamp build')
+
+ # Create a small table.
+ uri = "table:test_las03"
+ nrows = 100
+ ds = SimpleDataSet(self, uri, nrows, key_format="S", value_format='u')
+ ds.populate()
+ bigvalue = "aaaaa" * 100
+
+ # Initially load huge data
+ cursor = self.session.open_cursor(uri)
+ for i in range(1, 10000):
+ cursor[ds.key(nrows + i)] = bigvalue
+ cursor.close()
+ self.session.checkpoint()
+
+ # Check to see LAS working with old timestamp
+ bigvalue2 = "ddddd" * 100
+ self.conn.set_timestamp('stable_timestamp=' + timestamp_str(1))
+ las_writes_start = self.get_stat(stat.conn.cache_write_lookaside)
+ self.large_updates(self.session, uri, bigvalue2, ds, nrows, 10000)
+
+ # If the test sizing is correct, the history will overflow the cache
+ self.session.checkpoint()
+ las_writes = self.get_stat(stat.conn.cache_write_lookaside) - las_writes_start
+ self.assertGreaterEqual(las_writes, 0)
+
+ for ts in range(2, 4):
+ self.conn.set_timestamp('stable_timestamp=' + timestamp_str(ts))
+
+ # Now just update one record and checkpoint again
+ self.large_updates(self.session, uri, bigvalue2, ds, nrows, 1)
+
+ las_reads_start = self.get_stat(stat.conn.cache_read_lookaside)
+ self.session.checkpoint()
+ las_reads = self.get_stat(stat.conn.cache_read_lookaside) - las_reads_start
+
+ # Since we're dealing with eviction concurrent with checkpoints
+ # and skewing is controlled by a heuristic, we can't put too tight
+ # a bound on this.
+ self.assertLessEqual(las_reads, 100)
+
+if __name__ == '__main__':
+ wttest.run()