summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2018-07-11 13:26:10 +1000
committerLuke Chen <luke.chen@mongodb.com>2018-07-11 13:26:10 +1000
commitc21c6829444f923729ccd6348fee5d5d49fb14f6 (patch)
tree1be133e0633cf1ecc21d0865c21119d9ebdefc14
parent3346e3cb9609422e4f6ac41b6791e175bdfb5124 (diff)
downloadmongo-c21c6829444f923729ccd6348fee5d5d49fb14f6.tar.gz
Import wiredtiger: e6c749653220cf701c23634cd704ae0c2d882dd9 from branch mongodb-4.0
ref: 7d3e691fd4..e6c7496532 for: 4.0.1 WT-3839 Document the undefined behavior when a range truncate overlaps with inserts WT-3917 Enhance WT_CURSOR::reserve documentation around commit visibility WT-4024 Fix a race between split and next/prev WT-4048 Generalize timing_stress_for_test split functionality WT-4067 Enhance LSM to not pin as much history in cache WT-4101 Don't abort the eviction server during session verify when oldest_timestamp is held back WT-4111 Improve checkpoint scrubbing algorithm WT-4125 Ensure that subsequent checkpoints with stable timestamp don't read too much WT-4133 Coverity 1393445, 1393446 Dereference before null check WT-4136 Add a new timing stress flag that yields during tree search WT-4138 Add an option to timeout waiting for space in the cache WT-4139 rename the cursor restart statistic to match implementation WT-4140 Cursor walk limits quick eviction page selection unnecessarily. WT-4141 Enhance checkpoint with timestamps to unblock eviction sooner WT-4143 Use WiredTiger.turtle.set if it exists but WiredTiger.turtle does not WT-4145 Only include the checkpoint timestamp during checkpoints WT-4146 Coverity 1393639, unused variable WT-4152 Save return value for later comparison in transaction code WT-4163 Lint
-rw-r--r--src/third_party/wiredtiger/.gitignore2
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py10
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py18
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/bloom/bloom.c11
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c13
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_random.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c12
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c61
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c20
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c315
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c5
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c7
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c66
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c137
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c22
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache.c4
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_reconfig.c3
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_file.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_join.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_metadata.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_stat.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_std.c4
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c41
-rw-r--r--src/third_party/wiredtiger/src/include/api.h3
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h18
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i118
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h6
-rw-r--r--src/third_party/wiredtiger/src/include/cache.i23
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h2
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h2
-rw-r--r--src/third_party/wiredtiger/src/include/lsm.h19
-rw-r--r--src/third_party/wiredtiger/src/include/misc.h21
-rw-r--r--src/third_party/wiredtiger/src/include/misc.i23
-rw-r--r--src/third_party/wiredtiger/src/include/session.h2
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h6
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h2
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i72
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in584
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor.c22
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_manager.c13
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_work_unit.c94
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_worker.c12
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_turtle.c40
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c185
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c6
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c21
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c34
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c142
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c3
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c72
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_load.c2
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_load_json.c2
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_loadtext.c8
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_read.c8
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_write.c8
-rw-r--r--src/third_party/wiredtiger/test/format/config.h26
-rw-r--r--src/third_party/wiredtiger/test/format/format.h2
-rw-r--r--src/third_party/wiredtiger/test/format/lrt.c17
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c19
-rw-r--r--src/third_party/wiredtiger/test/format/wts.c4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_bug020.py45
-rw-r--r--src/third_party/wiredtiger/test/suite/test_las03.py105
65 files changed, 1482 insertions, 1081 deletions
diff --git a/src/third_party/wiredtiger/.gitignore b/src/third_party/wiredtiger/.gitignore
index 49e737fe301..4a9e098a17f 100644
--- a/src/third_party/wiredtiger/.gitignore
+++ b/src/third_party/wiredtiger/.gitignore
@@ -126,6 +126,8 @@ _wiredtiger.pyd
**/test/csuite/test_wt3338_partial_update
**/test/csuite/test_wt3363_checkpoint_op_races
**/test/csuite/test_wt3874_pad_byte_collator
+**/test/csuite/test_wt4105_large_doc_small_upd
+**/test/csuite/test_wt4117_checksum
**/test/cursor_order/cursor_order
**/test/fops/t
**/test/format/s_dumpcmp
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 7d8a58c83bb..d29e9655fb3 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -417,6 +417,11 @@ connection_runtime_config = [
maximum heap memory to allocate for the cache. A database should
configure either \c cache_size or \c shared_cache but not both''',
min='1MB', max='10TB'),
+ Config('cache_max_wait_ms', '0', r'''
+ the maximum number of milliseconds an application thread will wait
+ for space to be available in cache before giving up. Default will
+ wait forever''',
+ min=0),
Config('cache_overhead', '8', r'''
assume the heap allocator overhead is the specified percentage, and
adjust the cache usage by that amount (for example, if there is 10GB
@@ -460,7 +465,7 @@ connection_runtime_config = [
vary depending on the current eviction load''',
min=1, max=20),
]),
- Config('eviction_checkpoint_target', '5', r'''
+ Config('eviction_checkpoint_target', '1', r'''
perform eviction at the beginning of checkpoints to bring the dirty
content in cache to this level. It is a percentage of the cache size if
the value is within the range of 0 to 100 or an absolute size when
@@ -585,7 +590,8 @@ connection_runtime_config = [
type='list', undoc=True,
choices=[
'checkpoint_slow', 'lookaside_sweep_race', 'split_1', 'split_2',
- 'split_3', 'split_4', 'split_5', 'split_6', 'split_7']),
+ 'split_3', 'split_4', 'split_5', 'split_6', 'split_7', 'split_8',
+ 'split_9']),
Config('verbose', '', r'''
enable messages for various events. Options are given as a
list, such as <code>"verbose=[evictserver,read]"</code>''',
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index c1b43ac6b2d..f4f8f61ee1e 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -238,9 +238,9 @@ connection_stats = [
CacheStat('cache_eviction_state', 'eviction state', 'no_clear,no_scale'),
CacheStat('cache_eviction_target_page_ge128', 'eviction walk target pages histogram - 128 and higher'),
CacheStat('cache_eviction_target_page_lt10', 'eviction walk target pages histogram - 0-9'),
+ CacheStat('cache_eviction_target_page_lt128', 'eviction walk target pages histogram - 64-128'),
CacheStat('cache_eviction_target_page_lt32', 'eviction walk target pages histogram - 10-31'),
CacheStat('cache_eviction_target_page_lt64', 'eviction walk target pages histogram - 32-63'),
- CacheStat('cache_eviction_target_page_lt128', 'eviction walk target pages histogram - 64-128'),
CacheStat('cache_eviction_walk', 'pages walked for eviction'),
CacheStat('cache_eviction_walk_from_root', 'eviction walks started from root of tree'),
CacheStat('cache_eviction_walk_passes', 'eviction passes of a file'),
@@ -279,6 +279,7 @@ connection_stats = [
CacheStat('cache_read_lookaside_delay_checkpoint', 'pages read into cache with skipped lookaside entries needed later by checkpoint'),
CacheStat('cache_read_lookaside_skipped', 'pages read into cache skipping older lookaside entries'),
CacheStat('cache_read_overflow', 'overflow pages read into cache'),
+ CacheStat('cache_timed_out_ops', 'operations timed out waiting for space in cache'),
CacheStat('cache_write', 'pages written from cache'),
CacheStat('cache_write_app_count', 'application threads page write from cache to disk count'),
CacheStat('cache_write_app_time', 'application threads page write from cache to disk time (usecs)'),
@@ -294,11 +295,11 @@ connection_stats = [
CursorStat('cursor_modify', 'cursor modify calls'),
CursorStat('cursor_next', 'cursor next calls'),
CursorStat('cursor_prev', 'cursor prev calls'),
- CursorStat('cursor_reopen', 'cursors reused from cache'),
CursorStat('cursor_remove', 'cursor remove calls'),
+ CursorStat('cursor_reopen', 'cursors reused from cache'),
CursorStat('cursor_reserve', 'cursor reserve calls'),
CursorStat('cursor_reset', 'cursor reset calls'),
- CursorStat('cursor_restart', 'cursor restarted searches'),
+ CursorStat('cursor_restart', 'cursor operation restarted'),
CursorStat('cursor_search', 'cursor search calls'),
CursorStat('cursor_search_near', 'cursor search near calls'),
CursorStat('cursor_truncate', 'truncate calls'),
@@ -309,8 +310,8 @@ connection_stats = [
##########################################
CursorStat('cursor_sweep', 'cursor sweeps'),
CursorStat('cursor_sweep_buckets', 'cursor sweep buckets'),
- CursorStat('cursor_sweep_examined', 'cursor sweep cursors examined'),
CursorStat('cursor_sweep_closed', 'cursor sweep cursors closed'),
+ CursorStat('cursor_sweep_examined', 'cursor sweep cursors examined'),
##########################################
# Dhandle statistics
@@ -507,14 +508,15 @@ connection_stats = [
TxnStat('txn_checkpoint_time_total', 'transaction checkpoint total time (msecs)', 'no_clear,no_scale'),
TxnStat('txn_commit', 'transactions committed'),
TxnStat('txn_commit_queue_empty', 'commit timestamp queue insert to empty'),
- TxnStat('txn_commit_queue_tail', 'commit timestamp queue inserts to tail'),
TxnStat('txn_commit_queue_inserts', 'commit timestamp queue inserts total'),
TxnStat('txn_commit_queue_len', 'commit timestamp queue length'),
+ TxnStat('txn_commit_queue_tail', 'commit timestamp queue inserts to tail'),
TxnStat('txn_fail_cache', 'transaction failures due to cache overflow'),
TxnStat('txn_pinned_checkpoint_range', 'transaction range of IDs currently pinned by a checkpoint', 'no_clear,no_scale'),
TxnStat('txn_pinned_range', 'transaction range of IDs currently pinned', 'no_clear,no_scale'),
TxnStat('txn_pinned_snapshot_range', 'transaction range of IDs currently pinned by named snapshots', 'no_clear,no_scale'),
TxnStat('txn_pinned_timestamp', 'transaction range of timestamps currently pinned', 'no_clear,no_scale'),
+ TxnStat('txn_pinned_timestamp_checkpoint', 'transaction range of timestamps pinned by a checkpoint', 'no_clear,no_scale'),
TxnStat('txn_pinned_timestamp_oldest', 'transaction range of timestamps pinned by the oldest timestamp', 'no_clear,no_scale'),
TxnStat('txn_prepare', 'prepared transactions'),
TxnStat('txn_prepare_active', 'prepared transactions currently active'),
@@ -622,9 +624,9 @@ dsrc_stats = [
CacheStat('cache_eviction_split_leaf', 'leaf pages split during eviction'),
CacheStat('cache_eviction_target_page_ge128', 'eviction walk target pages histogram - 128 and higher'),
CacheStat('cache_eviction_target_page_lt10', 'eviction walk target pages histogram - 0-9'),
+ CacheStat('cache_eviction_target_page_lt128', 'eviction walk target pages histogram - 64-128'),
CacheStat('cache_eviction_target_page_lt32', 'eviction walk target pages histogram - 10-31'),
CacheStat('cache_eviction_target_page_lt64', 'eviction walk target pages histogram - 32-63'),
- CacheStat('cache_eviction_target_page_lt128', 'eviction walk target pages histogram - 64-128'),
CacheStat('cache_eviction_walk_from_root', 'eviction walks started from root of tree'),
CacheStat('cache_eviction_walk_passes', 'eviction walk passes of a file'),
CacheStat('cache_eviction_walk_saved_pos', 'eviction walks started from saved location in tree'),
@@ -697,7 +699,7 @@ dsrc_stats = [
CursorStat('cursor_reopen', 'cursors reused from cache'),
CursorStat('cursor_reserve', 'reserve calls'),
CursorStat('cursor_reset', 'reset calls'),
- CursorStat('cursor_restart', 'restarted searches'),
+ CursorStat('cursor_restart', 'cursor operation restarted'),
CursorStat('cursor_search', 'search calls'),
CursorStat('cursor_search_near', 'search near calls'),
CursorStat('cursor_truncate', 'truncate calls'),
@@ -741,8 +743,8 @@ dsrc_stats = [
##########################################
# Session operations
##########################################
- SessionStat('session_cursor_cached', 'cached cursor count', 'no_clear,no_scale'),
SessionStat('session_compact', 'object compaction'),
+ SessionStat('session_cursor_cached', 'cached cursor count', 'no_clear,no_scale'),
SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'),
##########################################
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 702b2430339..0d25134e924 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "7d3e691fd4d5ba7810647c317692129fea694602",
+ "commit": "e6c749653220cf701c23634cd704ae0c2d882dd9",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-4.0"
diff --git a/src/third_party/wiredtiger/src/bloom/bloom.c b/src/third_party/wiredtiger/src/bloom/bloom.c
index cf4743009ee..d506af89ab7 100644
--- a/src/third_party/wiredtiger/src/bloom/bloom.c
+++ b/src/third_party/wiredtiger/src/bloom/bloom.c
@@ -302,7 +302,16 @@ __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash)
err: if (c != NULL)
WT_TRET(c->reset(c));
- /* Don't return WT_NOTFOUND from a failed cursor open or search. */
+ /*
+ * Error handling from this function is complex. A search in the
+ * backing bit field should never return WT_NOTFOUND - so translate
+ * that into a different error code and report an error. If we got a
+ * WT_ROLLBACK it may be because there is a lot of cache pressure and
+ * the transaction is being killed - don't report an error message in
+ * that case.
+ */
+ if (ret == WT_ROLLBACK || ret == WT_CACHE_FULL)
+ return (ret);
WT_RET_MSG(bloom->session,
ret == WT_NOTFOUND ? WT_ERROR : ret,
"Failed lookup in bloom filter");
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 02cceab3123..3a031b49db5 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -429,7 +429,8 @@ __cursor_key_order_check_row(
WT_ERR(__wt_scr_alloc(session, 512, &b));
WT_PANIC_ERR(session, EINVAL,
- "WT_CURSOR.%s out-of-order returns: returned key %s then key %s",
+ "WT_CURSOR.%s out-of-order returns: returned key %.1024s then "
+ "key %.1024s",
next ? "next" : "prev",
__wt_buf_set_printable_format(session,
cbt->lastkey->data, cbt->lastkey->size, btree->key_format, a),
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 566157abd61..16e25c1fe25 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -805,11 +805,13 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
WT_PAGE_INDEX *pindex;
WT_PAGE_MODIFY *mod;
WT_SESSION_IMPL *session;
+ uint64_t split_gen;
uint32_t entries;
session = ds->session;
page = ref->page;
mod = page->modify;
+ split_gen = 0;
WT_RET(ds->f(ds, "%p", (void *)ref));
@@ -818,6 +820,7 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno));
WT_INTL_INDEX_GET(session, page, pindex);
entries = pindex->entries;
+ split_gen = page->pg_intl_split_gen;
break;
case WT_PAGE_COL_FIX:
WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno));
@@ -830,6 +833,7 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
case WT_PAGE_ROW_INT:
WT_INTL_INDEX_GET(session, page, pindex);
entries = pindex->entries;
+ split_gen = page->pg_intl_split_gen;
break;
case WT_PAGE_ROW_LEAF:
entries = page->entries;
@@ -845,8 +849,6 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
WT_RET(ds->f(ds, ", entries %" PRIu32, entries));
WT_RET(ds->f(ds,
", %s", __wt_page_is_modified(page) ? "dirty" : "clean"));
- WT_RET(ds->f(ds,
- ", memory_size %" WT_SIZET_FMT, page->memory_footprint));
if (F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS))
WT_RET(ds->f(ds, ", keys-built"));
@@ -878,9 +880,12 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
break;
WT_ILLEGAL_VALUE(session);
}
+ if (split_gen != 0)
+ WT_RET(ds->f(ds, ", split-gen=%" PRIu64, split_gen));
if (mod != NULL)
- WT_RET(
- ds->f(ds, ", write generation=%" PRIu32, mod->write_gen));
+ WT_RET(ds->f(ds, ", write-gen=%" PRIu32, mod->write_gen));
+ WT_RET(ds->f(ds,
+ ", memory-size %" WT_SIZET_FMT, page->memory_footprint));
WT_RET(ds->f(ds, "\n"));
return (0);
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index e6f8bad8e31..9160ff1dd21 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -893,11 +893,11 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
* reset it to the default.
*/
if (btree->maxintlkey == 0 || btree->maxintlkey > intl_split_size / 10)
- btree->maxintlkey = intl_split_size / 10;
+ btree->maxintlkey = intl_split_size / 10;
if (btree->maxleafkey == 0)
- btree->maxleafkey = leaf_split_size / 10;
+ btree->maxleafkey = leaf_split_size / 10;
if (btree->maxleafvalue == 0)
- btree->maxleafvalue = leaf_split_size / 2;
+ btree->maxleafvalue = leaf_split_size / 2;
return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index 17497561248..4f310b27237 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -262,7 +262,7 @@ restart: /*
* holding nothing on failure.
*/
descend: if ((ret = __wt_page_swap(
- session, current, descent, false, flags)) == 0) {
+ session, current, descent, flags)) == 0) {
current = descent;
continue;
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 9e530be4f0e..c8368624d3c 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -276,13 +276,15 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
*/
page->modify->first_dirty_txn = WT_TXN_FIRST;
- if (ref->page_las->las_skew_newest &&
+ FLD_SET(page->modify->restore_state, WT_PAGE_RS_LOOKASIDE);
+
+ if (ref->page_las->skew_newest &&
!S2C(session)->txn_global.has_stable_timestamp &&
- __wt_txn_visible_all(session, ref->page_las->las_max_txn,
- WT_TIMESTAMP_NULL(&ref->page_las->onpage_timestamp))) {
- page->modify->rec_max_txn = ref->page_las->las_max_txn;
+ __wt_txn_visible_all(session, ref->page_las->unstable_txn,
+ WT_TIMESTAMP_NULL(&ref->page_las->unstable_timestamp))) {
+ page->modify->rec_max_txn = ref->page_las->max_txn;
__wt_timestamp_set(&page->modify->rec_max_timestamp,
- &ref->page_las->onpage_timestamp);
+ &ref->page_las->max_timestamp);
__wt_page_modify_clear(session, page);
}
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 7061b92af78..a98de6c6c9f 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -31,24 +31,6 @@ typedef enum {
} WT_SPLIT_ERROR_PHASE;
/*
- * __page_split_timing_stress --
- * Optionally add delay to simulate the race conditions in page split for
- * debug purposes. The purpose is to uncover the race conditions in page split.
- */
-static void
-__page_split_timing_stress(
- WT_SESSION_IMPL *session, uint64_t flag, uint64_t micro_seconds)
-{
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
- /* We only want to sleep when page split race flag is set. */
- if (FLD_ISSET(conn->timing_stress_flags, flag))
- __wt_sleep(0, micro_seconds);
-}
-
-/*
* __split_safe_free --
* Free a buffer if we can be sure no thread is accessing it, or schedule
* it to be freed otherwise.
@@ -566,8 +548,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
WT_ERR(__split_ref_prepare(session, alloc_index, &locked, false));
/* Encourage a race */
- __page_split_timing_stress(
- session, WT_TIMING_STRESS_SPLIT_1, TIMING_STRESS_TEST_SLEEP);
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_1);
/*
* Confirm the root page's index hasn't moved, then update it, which
@@ -578,8 +559,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
alloc_index = NULL;
/* Encourage a race */
- __page_split_timing_stress(
- session, WT_TIMING_STRESS_SPLIT_2, TIMING_STRESS_TEST_SLEEP);
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_2);
/*
* Get a generation for this split, mark the root page. This must be
@@ -772,8 +752,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
WT_NOT_READ(complete, WT_ERR_PANIC);
/* Encourage a race */
- __page_split_timing_stress(
- session, WT_TIMING_STRESS_SPLIT_3, TIMING_STRESS_TEST_SLEEP);
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_3);
/*
* Confirm the parent page's index hasn't moved then update it, which
@@ -784,8 +763,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
alloc_index = NULL;
/* Encourage a race */
- __page_split_timing_stress(
- session, WT_TIMING_STRESS_SPLIT_4, TIMING_STRESS_TEST_SLEEP);
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_4);
/*
* Get a generation for this split, mark the page. This must be after
@@ -1125,8 +1103,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
WT_ERR(__split_ref_prepare(session, alloc_index, &locked, true));
/* Encourage a race */
- __page_split_timing_stress(
- session, WT_TIMING_STRESS_SPLIT_5, TIMING_STRESS_TEST_SLEEP);
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_5);
/* Split into the parent. */
WT_ERR(__split_parent(session, page_ref, alloc_index->index,
@@ -1140,8 +1117,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
WT_INTL_INDEX_SET(page, replace_index);
/* Encourage a race */
- __page_split_timing_stress(
- session, WT_TIMING_STRESS_SPLIT_6, TIMING_STRESS_TEST_SLEEP);
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_6);
/*
* Get a generation for this split, mark the parent page. This must be
@@ -1259,8 +1235,7 @@ __split_internal_lock(
parent = ref->home;
/* Encourage races. */
- __page_split_timing_stress(
- session, WT_TIMING_STRESS_SPLIT_7, WT_THOUSAND);
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_7);
/* Page locks live in the modify structure. */
WT_RET(__wt_page_modify_init(session, parent));
@@ -1439,6 +1414,7 @@ __split_multi_inmem(
WT_DECL_ITEM(key);
WT_DECL_RET;
WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
WT_SAVE_UPD *supd;
WT_UPDATE *upd;
uint64_t recno;
@@ -1545,17 +1521,26 @@ __split_multi_inmem(
* might be older than that. Set the first dirty transaction to an
* impossibly old value so this page is never skipped in a checkpoint.
*/
- page->modify->first_dirty_txn = WT_TXN_FIRST;
+ mod = page->modify;
+ mod->first_dirty_txn = WT_TXN_FIRST;
/*
* If the new page is modified, save the eviction generation to avoid
* repeatedly attempting eviction on the same page.
*/
- page->modify->last_evict_pass_gen = orig->modify->last_evict_pass_gen;
- page->modify->last_eviction_id = orig->modify->last_eviction_id;
- __wt_timestamp_set(&page->modify->last_eviction_timestamp,
+ mod->last_evict_pass_gen = orig->modify->last_evict_pass_gen;
+ mod->last_eviction_id = orig->modify->last_eviction_id;
+ __wt_timestamp_set(&mod->last_eviction_timestamp,
&orig->modify->last_eviction_timestamp);
- page->modify->update_restored = 1;
+
+ /* Add the update/restore flag to any previous state. */
+ __wt_timestamp_set(&mod->last_stable_timestamp,
+ &orig->modify->last_stable_timestamp);
+ mod->rec_max_txn = orig->modify->rec_max_txn;
+ __wt_timestamp_set(&mod->rec_max_timestamp,
+ &orig->modify->rec_max_timestamp);
+ mod->restore_state = orig->modify->restore_state;
+ FLD_SET(mod->restore_state, WT_PAGE_RS_RESTORED);
err: /* Free any resources that may have been cached in the cursor. */
WT_TRET(__wt_btcur_close(&cbt, true));
@@ -1709,7 +1694,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
WT_RET(__wt_calloc_one(session, &ref->page_las));
*ref->page_las = multi->page_las;
- WT_ASSERT(session, ref->page_las->las_max_txn != WT_TXN_NONE);
+ WT_ASSERT(session, ref->page_las->max_txn != WT_TXN_NONE);
ref->state = WT_REF_LOOKASIDE;
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index ad7d7d9fcab..24eea097cdf 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -118,6 +118,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
WT_REF *prev, *walk;
WT_TXN *txn;
uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages;
@@ -256,9 +257,24 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
if (walk == NULL)
break;
- /* Skip clean pages. */
- if (!__wt_page_is_modified(walk->page))
+ /*
+ * Skip clean pages, but need to make sure maximum
+ * transaction ID is always updated.
+ */
+ if (!__wt_page_is_modified(walk->page)) {
+ if (((mod = walk->page->modify) != NULL) &&
+ mod->rec_max_txn > btree->rec_max_txn)
+ btree->rec_max_txn = mod->rec_max_txn;
+#ifdef HAVE_TIMESTAMPS
+ if (mod != NULL && __wt_timestamp_cmp(
+ &btree->rec_max_timestamp,
+ &mod->rec_max_timestamp) < 0)
+ __wt_timestamp_set(
+ &btree->rec_max_timestamp,
+ &mod->rec_max_timestamp);
+#endif
continue;
+ }
/*
* Take a local reference to the page modify structure
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index d445184b7dd..a2386d907c7 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -176,44 +176,96 @@ __ref_ascend(WT_SESSION_IMPL *session,
}
/*
- * __ref_initial_descent_prev --
- * Descend the tree one level, when setting up the initial cursor position
- * for a previous-cursor walk.
+ * __split_prev_race --
+ * Check for races when descending the tree during a previous-cursor walk.
*/
static inline bool
-__ref_initial_descent_prev(
+__split_prev_race(
WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
{
WT_PAGE_INDEX *pindex;
/*
- * When splitting an internal page into its parent, we move the WT_REF
- * structures and update the parent's page index before updating the
- * split page's page index, and it's not an atomic update. A thread can
- * read the parent page's replacement page index, then read the split
- * page's original index, or the parent page's original and the split
- * page's replacement.
+ * Handle a cursor moving backwards through the tree or setting up at
+ * the end of the tree. We're passed the child page into which we're
+ * descending, and the parent page's page-index we used to find that
+ * child page.
*
- * This isn't a problem for a cursor setting up at the start of the tree
- * because we do right-hand splits on internal pages and the initial
- * part of the split page's namespace won't change as part of a split.
- * A thread reading the parent page's and split page's indexes will move
- * to the same slot no matter what order of indexes are read.
- *
- * Handle a cursor setting up at the end of the tree.
+ * When splitting an internal page into its parent, we move the split
+ * pages WT_REF structures, then update the parent's page index, then
+ * update the split page's page index, and nothing is atomic. A thread
+ * can read the parent page's replacement page index and then the split
+ * page's original index, or vice-versa, and either change can cause a
+ * cursor moving backwards through the tree to skip pages.
*
- * We're passed a child page into which we're descending, and on which
- * we have a hazard pointer.
+ * This isn't a problem for a cursor setting up at the start of the tree
+ * or moving forward through the tree because we do right-hand splits on
+ * internal pages and the initial part of the split page's namespace
+ * won't change as part of a split (in other words, a thread reading the
+ * parent page's and split page's indexes will move to the same slot no
+ * matter what order of indexes are read.
*
- * Acquire a page index for the child page and then confirm we haven't
- * raced with a parent split.
+ * Acquire the child's page index, then confirm the parent's page index
+ * hasn't changed, to check for reading an old version of the parent's
+ * page index and then reading a new version of the child's page index.
*/
WT_INTL_INDEX_GET(session, ref->page, pindex);
if (__wt_split_descent_race(session, ref, *pindexp))
- return (false);
+ return (true);
+
+ /*
+ * That doesn't check if we read a new version of parent's page index
+ * and then an old version of the child's page index. For example, if
+ * a thread were in a newly created split page subtree, the split
+ * completes into the parent before the thread reads it and descends
+ * into the child (where the split hasn't yet completed).
+ *
+ * Imagine an internal page with 3 child pages, with the namespaces a-f,
+ * g-h and i-j; the first child page splits. The parent starts out with
+ * the following page-index:
+ *
+ * | ... | a | g | i | ... |
+ *
+ * The split page starts out with the following page-index:
+ *
+ * | a | b | c | d | e | f |
+ *
+ * The first step is to move the c-f ranges into a new subtree, so, for
+ * example we might have two new internal pages 'c' and 'e', where the
+ * new 'c' page references the c-d namespace and the new 'e' page
+ * references the e-f namespace. The top of the subtree references the
+ * parent page, but until the parent's page index is updated, threads in
+ * the subtree won't be able to ascend out of the subtree. However, once
+ * the parent page's page index is updated to this:
+ *
+ * | ... | a | c | e | g | i | ... |
+ *
+ * threads in the subtree can ascend into the parent. Imagine a cursor
+ * in the c-d part of the namespace that ascends to the parent's 'c'
+ * slot. It would then decrement to the slot before the 'c' slot, the
+ * 'a' slot.
+ *
+ * The previous-cursor movement selects the last slot in the 'a' page;
+ * if the split page's page-index hasn't been updated yet, it selects
+ * the 'f' slot, which is incorrect. Once the split page's page index is
+ * updated to this:
+ *
+ * | a | b |
+ *
+ * the previous-cursor movement will select the 'b' slot, which is
+ * correct.
+ *
+ * If the last slot on the page no longer points to the current page as
+ * its "home", the page is being split and part of its namespace moved,
+ * restart. (We probably don't have to restart, I think we could spin
+ * until the page-index is updated, but I'm not willing to debug that
+ * one if I'm wrong.)
+ */
+ if (pindex->index[pindex->entries - 1]->home != ref->page)
+ return (true);
*pindexp = pindex;
- return (true);
+ return (false);
}
/*
@@ -229,22 +281,21 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
WT_BTREE *btree;
WT_DECL_RET;
WT_PAGE_INDEX *pindex;
- WT_REF *couple, *couple_orig, *ref;
+ WT_REF *couple, *ref, *ref_orig;
uint64_t sleep_usecs, yield_count;
uint32_t current_state, slot;
- bool empty_internal, initial_descent, prev, skip;
+ bool empty_internal, prev, skip;
btree = S2BT(session);
pindex = NULL;
sleep_usecs = yield_count = 0;
- empty_internal = initial_descent = false;
+ empty_internal = false;
/*
- * Tree walks are special: they look inside page structures that splits
- * may want to free. Publish that the tree is active during this
- * window.
+ * We're not supposed to walk trees without root pages. As this has not
+ * always been the case, assert to debug that change.
*/
- WT_ENTER_PAGE_INDEX(session);
+ WT_ASSERT(session, btree->root.page != NULL);
/* Check whether deleted pages can be skipped. */
if (!LF_ISSET(WT_READ_DELETED_SKIP))
@@ -284,36 +335,41 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
* new leaf, couple to the next page to which we're descending, it
* saves a hazard-pointer swap for each cursor page movement.
*
- * !!!
- * NOTE: we depend on the fact it's OK to release a page we don't hold,
- * that is, it's OK to release couple when couple is set to NULL.
- *
- * Take a copy of any held page and clear the return value. Remember
- * the hazard pointer we're currently holding.
- *
- * Clear the returned value, it makes future error handling easier.
+ * The hazard pointer on the original location is held until the end of
+ * the movement, in case we have to restart the movement. Take a copy
+ * of any held page and clear the return value (it makes future error
+ * handling easier).
*/
- couple = couple_orig = ref = *refp;
+ couple = NULL;
+ ref_orig = *refp;
*refp = NULL;
+ /*
+ * Tree walks are special: they look inside page structures that splits
+ * may want to free. Publish the tree is active during this window.
+ */
+ WT_ENTER_PAGE_INDEX(session);
+
/* If no page is active, begin a walk from the start/end of the tree. */
- if (ref == NULL) {
-restart: /*
- * We can be here with a NULL or root WT_REF; the page release
- * function handles them internally, don't complicate this code
- * by calling them out.
- */
- WT_ERR(__wt_page_release(session, couple, flags));
+ if ((ref = ref_orig) == NULL) {
+ if (0) {
+restart: /*
+ * Yield before retrying, and if we've yielded enough
+ * times, start sleeping so we don't burn CPU to no
+ * purpose.
+ */
+ __wt_spin_backoff(&yield_count, &sleep_usecs);
- /*
- * We're not supposed to walk trees without root pages. As this
- * has not always been the case, assert to debug that change.
- */
- WT_ASSERT(session, btree->root.page != NULL);
+ WT_ERR(__wt_page_release(session, couple, flags));
+ couple = NULL;
+ }
- couple = couple_orig = ref = &btree->root;
- initial_descent = true;
- goto descend;
+ if ((ref = ref_orig) == NULL) {
+ ref = &btree->root;
+ WT_INTL_INDEX_GET(session, ref->page, pindex);
+ slot = prev ? pindex->entries - 1 : 0;
+ goto descend;
+ }
}
/*
@@ -340,12 +396,9 @@ restart: /*
/*
* If at the root and returning internal pages, return
- * the root page, otherwise we're done. Regardless, no
- * hazard pointer is required, release the one we hold.
+ * the root page, otherwise we're done.
*/
if (__wt_ref_is_root(ref)) {
- WT_ERR(__wt_page_release(
- session, couple, flags));
if (!LF_ISSET(WT_READ_SKIP_INTL))
*refp = ref;
goto done;
@@ -356,7 +409,7 @@ restart: /*
* all of the child pages were deleted, mark it for
* eviction.
*/
- if (empty_internal && pindex->entries > 1) {
+ if (empty_internal) {
__wt_page_evict_soon(session, ref);
empty_internal = false;
}
@@ -367,17 +420,18 @@ restart: /*
* handle restart or not-found returns, it would require
* additional complexity and is not a possible return:
* we're moving to the parent of the current child page,
- * the parent can't have been evicted. (This is why we
- * don't pass "prev" to the page-swap function, we can't
- * handle the restart error returned if the parent page
- * is currently splitting.)
+ * the parent can't have been evicted.
*/
if (!LF_ISSET(WT_READ_SKIP_INTL)) {
WT_ERR(__wt_page_swap(
- session, couple, ref, false, flags));
+ session, couple, ref, flags));
+ couple = NULL;
*refp = ref;
goto done;
}
+
+ /* Encourage races. */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_8);
}
if (prev)
@@ -389,9 +443,9 @@ restart: /*
++*walkcntp;
for (;;) {
- /*
- * Move to the next slot, and set the reference hint if
- * it's wrong (used when we continue the walk). We don't
+descend: /*
+ * Get a reference, setting the reference hint if it's
+ * wrong (used when we continue the walk). We don't
* always update the hints when splitting, it's expected
* for them to be incorrect in some workloads.
*/
@@ -452,12 +506,41 @@ restart: /*
break;
}
- ret = __wt_page_swap(session, couple, ref, prev,
+ ret = __wt_page_swap(session, couple, ref,
WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK | flags);
+ if (ret == 0) {
+ /* Success, so "couple" has been released. */
+ couple = NULL;
+
+ /* Return leaf pages to our caller. */
+ if (!WT_PAGE_IS_INTERNAL(ref->page)) {
+ *refp = ref;
+ goto done;
+ }
+
+ /* Set the new "couple" value. */
+ couple = ref;
+
+ /* Configure traversal of any internal page. */
+ empty_internal = true;
+ if (prev) {
+ if (__split_prev_race(
+ session, ref, &pindex))
+ goto restart;
+ slot = pindex->entries - 1;
+ } else {
+ WT_INTL_INDEX_GET(
+ session, ref->page, pindex);
+ slot = 0;
+ }
+ continue;
+ }
/*
- * Not-found is an expected return when only walking
+ * Not-found is an expected return when walking only
* in-cache pages, or if we see a deleted page.
+ *
+ * An expected error, so "couple" is unchanged.
*/
if (ret == WT_NOTFOUND) {
WT_NOT_READ(ret, 0);
@@ -466,94 +549,24 @@ restart: /*
/*
* The page we're moving to might have split, in which
- * case move to the last position we held.
- */
- if (ret == WT_RESTART) {
- ret = 0;
-
- /*
- * Yield before retrying, and if we've yielded
- * enough times, start sleeping so we don't burn
- * CPU to no purpose.
- */
- __wt_spin_backoff(
- &yield_count, &sleep_usecs);
-
- /*
- * If a cursor is setting up at the end of the
- * tree, we can't use our parent page's index,
- * because it may have already split; restart
- * the walk.
- */
- if (prev && initial_descent)
- goto restart;
-
- /*
- * If a new walk that never coupled from the
- * root to a new saved position in the tree,
- * restart the walk.
- */
- if (couple == &btree->root)
- goto restart;
-
- /*
- * If restarting from some original position,
- * repeat the increment or decrement we made at
- * that time. Otherwise, couple is an internal
- * page we've acquired after moving from that
- * starting position and we can treat it as a
- * new page. This works because we never acquire
- * a hazard pointer on a leaf page we're not
- * going to return to our caller, this will quit
- * working if that ever changes.
- */
- WT_ASSERT(session,
- couple == couple_orig ||
- WT_PAGE_IS_INTERNAL(couple->page));
- ref = couple;
- __ref_index_slot(session, ref, &pindex, &slot);
- if (couple == couple_orig)
- break;
- }
- WT_ERR(ret);
- couple = ref;
-
- /*
- * A new page: configure for traversal of any internal
- * page's children, else return the leaf page.
+ * case restart the movement.
+ *
+ * An expected error, so "couple" is unchanged.
*/
- if (WT_PAGE_IS_INTERNAL(ref->page)) {
-descend: empty_internal = true;
-
- /*
- * There's a split race when a cursor is setting
- * up at the end of the tree.
- */
- if (prev && initial_descent) {
- if (!__ref_initial_descent_prev(
- session, ref, &pindex))
- goto restart;
- } else
- WT_INTL_INDEX_GET(
- session, ref->page, pindex);
- slot = prev ? pindex->entries - 1 : 0;
- continue;
- }
+ if (ret == WT_RESTART)
+ goto restart;
- /*
- * The tree-walk restart code knows we return any leaf
- * page we acquire (never hazard-pointer coupling on
- * after acquiring a leaf page), and asserts no restart
- * happens while holding a leaf page. This page must be
- * returned to our caller.
- */
- *refp = ref;
- goto done;
+ /* Unexpected error, so "couple" was released. */
+ couple = NULL;
+ goto err;
}
}
done:
-err: WT_LEAVE_PAGE_INDEX(session);
+err:
+ WT_TRET(__wt_page_release(session, couple, flags));
+ WT_TRET(__wt_page_release(session, ref_orig, flags));
+ WT_LEAVE_PAGE_INDEX(session);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index 8cc6630599b..123b640cdf4 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -179,6 +179,9 @@ descend: /*
descent = pindex->index[base - 1];
}
+ /* Encourage races. */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_9);
+
/*
* Swap the current page for the child page. If the page splits
* while we're retrieving it, restart the search at the root.
@@ -192,7 +195,7 @@ descend: /*
* holding nothing on failure.
*/
if ((ret = __wt_page_swap(session,
- current, descent, false, WT_READ_RESTART_OK)) == 0) {
+ current, descent, WT_READ_RESTART_OK)) == 0) {
current = descent;
continue;
}
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index 20acda8a1ab..a3f05a2700f 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -431,7 +431,10 @@ append: if (__wt_split_descent_race(
goto restart;
}
-descend: /*
+descend: /* Encourage races. */
+ __wt_timing_stress(session, WT_TIMING_STRESS_SPLIT_9);
+
+ /*
* Swap the current page for the child page. If the page splits
* while we're retrieving it, restart the search at the root.
* We cannot restart in the "current" page; for example, if a
@@ -444,7 +447,7 @@ descend: /*
* holding nothing on failure.
*/
if ((ret = __wt_page_swap(session,
- current, descent, false, WT_READ_RESTART_OK)) == 0) {
+ current, descent, WT_READ_RESTART_OK)) == 0) {
current = descent;
continue;
}
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index 64fe93806e9..cd11a3793c5 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -19,24 +19,6 @@
WT_SESSION_NO_RECONCILE)
/*
- * __las_timing_stress --
- * Optionally add delay to simulate the race conditions in lookaside
- * sweep for debug purposes.
- */
-static void
-__las_timing_stress(WT_SESSION_IMPL *session)
-{
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
- /* Only sleep when lookaside sweep race flag is set. */
- if (FLD_ISSET(conn->timing_stress_flags,
- WT_TIMING_STRESS_LOOKASIDE_SWEEP))
- __wt_sleep(0, TIMING_STRESS_TEST_SLEEP);
-}
-
-/*
* __las_set_isolation --
* Switch to read-uncommitted.
*/
@@ -422,43 +404,34 @@ __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref)
return (false);
/*
- * If page image has the newest version of data and includes data newer
- * than the reader's snapshot then we should read the history.
+ * If some of the page's history overlaps with the reader's snapshot
+ * then we have to read it. This is only relevant if we chose versions
+ * that were unstable when the page was written.
*/
- if (ref->page_las->las_skew_newest &&
- WT_TXNID_LE(txn->snap_min, ref->page_las->las_max_txn))
+ if (ref->page_las->skew_newest &&
+ WT_TXNID_LE(txn->snap_min, ref->page_las->unstable_txn))
return (false);
- /*
- * If page image has the oldest version of data and some of the history
- * overlaps with the reader's snapshot then we should read the history.
- */
- if (!ref->page_las->las_skew_newest &&
- WT_TXNID_LE(ref->page_las->las_min_txn, txn->snap_max))
- return (false);
-
- if (!F_ISSET(txn, WT_TXN_HAS_TS_READ) && ref->page_las->las_skew_newest)
- return (true);
+ if (!F_ISSET(txn, WT_TXN_HAS_TS_READ))
+ return (ref->page_las->skew_newest);
#ifdef HAVE_TIMESTAMPS
/*
* Skip lookaside pages if reading as of a timestamp, we evicted new
* versions of data and all the updates are in the past.
*/
- if (F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) &&
- ref->page_las->las_skew_newest &&
+ if (ref->page_las->skew_newest &&
__wt_timestamp_cmp(
- &ref->page_las->onpage_timestamp, &session->txn.read_timestamp) < 0)
+ &txn->read_timestamp, &ref->page_las->unstable_timestamp) > 0)
return (true);
/*
* Skip lookaside pages if reading as of a timestamp, we evicted old
- * versions of data and all the updates are in the future.
+ * versions of data and all the unstable updates are in the future.
*/
- if (F_ISSET(&session->txn, WT_TXN_HAS_TS_READ) &&
- !ref->page_las->las_skew_newest &&
+ if (!ref->page_las->skew_newest &&
__wt_timestamp_cmp(
- &ref->page_las->min_timestamp, &session->txn.read_timestamp) > 0)
+ &txn->read_timestamp, &ref->page_las->unstable_timestamp) < 0)
return (true);
#endif
@@ -581,8 +554,8 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi)
(void)__wt_eviction_dirty_needed(session, &pct_dirty);
#ifdef HAVE_TIMESTAMPS
- WT_RET(__wt_timestamp_to_hex_string(
- session, hex_timestamp, &multi->page_las.min_timestamp));
+ WT_RET(__wt_timestamp_to_hex_string(session, hex_timestamp,
+ &multi->page_las.unstable_timestamp));
ts = hex_timestamp;
#else
ts = "disabled";
@@ -591,14 +564,14 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi)
WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY,
"Page reconciliation triggered lookaside write "
"file ID %" PRIu32 ", page ID %" PRIu64 ". "
- "Max txn ID %" PRIu64 ", min timestamp %s, skewed %s. "
+ "Max txn ID %" PRIu64 ", unstable timestamp %s, %s. "
"Entries now in lookaside file: %" PRId64 ", "
"cache dirty: %2.3f%% , "
"cache use: %2.3f%%",
btree_id, multi->page_las.las_pageid,
- multi->page_las.las_max_txn,
+ multi->page_las.max_txn,
ts,
- multi->page_las.las_skew_newest ? "newest" : "oldest",
+ multi->page_las.skew_newest ? "newest" : "not newest",
WT_STAT_READ(conn->stats, cache_lookaside_entries),
pct_dirty, pct_full);
}
@@ -742,8 +715,7 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
* table. (We check the length because row-store doesn't
* write zero-length data items.)
*/
- if (multi->page_las.las_skew_newest &&
- upd == list->onpage_upd &&
+ if (upd == list->onpage_upd &&
upd->size > 0 &&
(upd->type == WT_UPDATE_STANDARD ||
upd->type == WT_UPDATE_MODIFY)) {
@@ -1052,7 +1024,7 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
locked = true;
/* Encourage a race */
- __las_timing_stress(session);
+ __wt_timing_stress(session, WT_TIMING_STRESS_LOOKASIDE_SWEEP);
/*
* When continuing a sweep, position the cursor using the key from the
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 7dee7a5e756..0945d768ce2 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -138,6 +138,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
{ "async", "category",
NULL, NULL,
confchk_wiredtiger_open_async_subconfigs, 3 },
+ { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
@@ -189,7 +190,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
{ "timing_stress_for_test", "list",
NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\"]",
+ "\"split_6\",\"split_7\",\"split_8\",\"split_9\"]",
NULL, 0 },
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\","
@@ -806,6 +807,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{ "buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0 },
{ "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
{ "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
+ { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
@@ -879,7 +881,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{ "timing_stress_for_test", "list",
NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\"]",
+ "\"split_6\",\"split_7\",\"split_8\",\"split_9\"]",
NULL, 0 },
{ "transaction_sync", "category",
NULL, NULL,
@@ -909,6 +911,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{ "buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0 },
{ "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
{ "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
+ { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
@@ -982,7 +985,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{ "timing_stress_for_test", "list",
NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\"]",
+ "\"split_6\",\"split_7\",\"split_8\",\"split_9\"]",
NULL, 0 },
{ "transaction_sync", "category",
NULL, NULL,
@@ -1013,6 +1016,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
{ "buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0 },
{ "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
{ "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
+ { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
@@ -1082,7 +1086,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
{ "timing_stress_for_test", "list",
NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\"]",
+ "\"split_6\",\"split_7\",\"split_8\",\"split_9\"]",
NULL, 0 },
{ "transaction_sync", "category",
NULL, NULL,
@@ -1111,6 +1115,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
{ "buffer_alignment", "int", NULL, "min=-1,max=1MB", NULL, 0 },
{ "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
{ "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
+ { "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
@@ -1180,7 +1185,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
{ "timing_stress_for_test", "list",
NULL, "choices=[\"checkpoint_slow\",\"lookaside_sweep_race\","
"\"split_1\",\"split_2\",\"split_3\",\"split_4\",\"split_5\","
- "\"split_6\",\"split_7\"]",
+ "\"split_6\",\"split_7\",\"split_8\",\"split_9\"]",
NULL, 0 },
{ "transaction_sync", "category",
NULL, NULL,
@@ -1250,13 +1255,13 @@ static const WT_CONFIG_ENTRY config_entries[] = {
confchk_WT_CONNECTION_query_timestamp, 1
},
{ "WT_CONNECTION.reconfigure",
- "async=(enabled=false,ops_max=1024,threads=2),cache_overhead=8,"
- "cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "compatibility=(release=),error_prefix=,eviction=(threads_max=8,"
- "threads_min=1),eviction_checkpoint_target=5,"
- "eviction_dirty_target=5,eviction_dirty_trigger=20,"
- "eviction_target=80,eviction_trigger=95,"
- "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+ "async=(enabled=false,ops_max=1024,threads=2),cache_max_wait_ms=0"
+ ",cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
+ "wait=0),compatibility=(release=),error_prefix=,"
+ "eviction=(threads_max=8,threads_min=1),"
+ "eviction_checkpoint_target=1,eviction_dirty_target=5,"
+ "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+ ",file_manager=(close_handle_minimum=250,close_idle_time=30,"
"close_scan_interval=10),log=(archive=true,prealloc=true,"
"zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4),"
"lsm_merge=true,operation_tracking=(enabled=false,path=\".\"),"
@@ -1264,7 +1269,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"statistics=none,statistics_log=(json=false,on_close=false,"
"sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"timing_stress_for_test=,verbose=",
- confchk_WT_CONNECTION_reconfigure, 22
+ confchk_WT_CONNECTION_reconfigure, 23
},
{ "WT_CONNECTION.rollback_to_stable",
"",
@@ -1489,66 +1494,67 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "wiredtiger_open",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
- ",builtin_extension_config=,cache_cursors=true,cache_overhead=8,"
- "cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "checkpoint_sync=true,compatibility=(release=,require_max=,"
- "require_min=),config_base=true,create=false,direct_io=,"
- "encryption=(keyid=,name=,secretkey=),error_prefix=,"
- "eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=5,eviction_dirty_target=5,"
- "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
- ",exclusive=false,extensions=,file_extend=,"
- "file_manager=(close_handle_minimum=250,close_idle_time=30,"
- "close_scan_interval=10),hazard_max=1000,in_memory=false,"
- "log=(archive=true,compressor=,enabled=false,file_max=100MB,"
- "path=\".\",prealloc=true,recover=on,zero_fill=false),"
- "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
- "path=\".\"),readonly=false,session_max=100,"
- "session_scratch_max=2MB,session_table_cache=true,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ ",builtin_extension_config=,cache_cursors=true,"
+ "cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
+ "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
+ "compatibility=(release=,require_max=,require_min=),"
+ "config_base=true,create=false,direct_io=,encryption=(keyid=,"
+ "name=,secretkey=),error_prefix=,eviction=(threads_max=8,"
+ "threads_min=1),eviction_checkpoint_target=1,"
+ "eviction_dirty_target=5,eviction_dirty_trigger=20,"
+ "eviction_target=80,eviction_trigger=95,exclusive=false,"
+ "extensions=,file_extend=,file_manager=(close_handle_minimum=250,"
+ "close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
+ "in_memory=false,log=(archive=true,compressor=,enabled=false,"
+ "file_max=100MB,path=\".\",prealloc=true,recover=on,"
+ "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4),"
+ "lsm_merge=true,mmap=true,multiprocess=false,"
+ "operation_tracking=(enabled=false,path=\".\"),readonly=false,"
+ "session_max=100,session_scratch_max=2MB,session_table_cache=true"
+ ",shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
"statistics=none,statistics_log=(json=false,on_close=false,"
"path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),use_environment=true,use_environment_priv=false,"
"verbose=,write_through=",
- confchk_wiredtiger_open, 45
+ confchk_wiredtiger_open, 46
},
{ "wiredtiger_open_all",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
- ",builtin_extension_config=,cache_cursors=true,cache_overhead=8,"
- "cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "checkpoint_sync=true,compatibility=(release=,require_max=,"
- "require_min=),config_base=true,create=false,direct_io=,"
- "encryption=(keyid=,name=,secretkey=),error_prefix=,"
- "eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=5,eviction_dirty_target=5,"
- "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
- ",exclusive=false,extensions=,file_extend=,"
- "file_manager=(close_handle_minimum=250,close_idle_time=30,"
- "close_scan_interval=10),hazard_max=1000,in_memory=false,"
- "log=(archive=true,compressor=,enabled=false,file_max=100MB,"
- "path=\".\",prealloc=true,recover=on,zero_fill=false),"
- "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
- "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
- "path=\".\"),readonly=false,session_max=100,"
- "session_scratch_max=2MB,session_table_cache=true,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
+ ",builtin_extension_config=,cache_cursors=true,"
+ "cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
+ "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
+ "compatibility=(release=,require_max=,require_min=),"
+ "config_base=true,create=false,direct_io=,encryption=(keyid=,"
+ "name=,secretkey=),error_prefix=,eviction=(threads_max=8,"
+ "threads_min=1),eviction_checkpoint_target=1,"
+ "eviction_dirty_target=5,eviction_dirty_trigger=20,"
+ "eviction_target=80,eviction_trigger=95,exclusive=false,"
+ "extensions=,file_extend=,file_manager=(close_handle_minimum=250,"
+ "close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
+ "in_memory=false,log=(archive=true,compressor=,enabled=false,"
+ "file_max=100MB,path=\".\",prealloc=true,recover=on,"
+ "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4),"
+ "lsm_merge=true,mmap=true,multiprocess=false,"
+ "operation_tracking=(enabled=false,path=\".\"),readonly=false,"
+ "session_max=100,session_scratch_max=2MB,session_table_cache=true"
+ ",shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
"statistics=none,statistics_log=(json=false,on_close=false,"
"path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),use_environment=true,use_environment_priv=false,"
"verbose=,version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_all, 46
+ confchk_wiredtiger_open_all, 47
},
{ "wiredtiger_open_basecfg",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
- ",builtin_extension_config=,cache_cursors=true,cache_overhead=8,"
- "cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "checkpoint_sync=true,compatibility=(release=,require_max=,"
- "require_min=),direct_io=,encryption=(keyid=,name=,secretkey=),"
- "error_prefix=,eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=5,eviction_dirty_target=5,"
+ ",builtin_extension_config=,cache_cursors=true,"
+ "cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
+ "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
+ "compatibility=(release=,require_max=,require_min=),direct_io=,"
+ "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+ "eviction=(threads_max=8,threads_min=1),"
+ "eviction_checkpoint_target=1,eviction_dirty_target=5,"
"eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
@@ -1563,16 +1569,17 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),verbose=,version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_basecfg, 40
+ confchk_wiredtiger_open_basecfg, 41
},
{ "wiredtiger_open_usercfg",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
- ",builtin_extension_config=,cache_cursors=true,cache_overhead=8,"
- "cache_size=100MB,checkpoint=(log_size=0,wait=0),"
- "checkpoint_sync=true,compatibility=(release=,require_max=,"
- "require_min=),direct_io=,encryption=(keyid=,name=,secretkey=),"
- "error_prefix=,eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=5,eviction_dirty_target=5,"
+ ",builtin_extension_config=,cache_cursors=true,"
+ "cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
+ "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
+ "compatibility=(release=,require_max=,require_min=),direct_io=,"
+ "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+ "eviction=(threads_max=8,threads_min=1),"
+ "eviction_checkpoint_target=1,eviction_dirty_target=5,"
"eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
@@ -1587,7 +1594,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),verbose=,write_through=",
- confchk_wiredtiger_open_usercfg, 39
+ confchk_wiredtiger_open_usercfg, 40
},
{ NULL, NULL, NULL, 0 }
};
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index d322caac04a..589560acc88 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -137,7 +137,7 @@ __conn_add_collator(WT_CONNECTION *wt_conn,
CONNECTION_API_CALL(conn, session, add_collator, config, cfg);
WT_UNUSED(cfg);
- if (WT_STREQ(name, "none"))
+ if (strcmp(name, "none") == 0)
WT_ERR_MSG(session, EINVAL,
"invalid name for a collator: %s", name);
@@ -243,7 +243,7 @@ __conn_add_compressor(WT_CONNECTION *wt_conn,
CONNECTION_API_CALL(conn, session, add_compressor, config, cfg);
WT_UNUSED(cfg);
- if (WT_STREQ(name, "none"))
+ if (strcmp(name, "none") == 0)
WT_ERR_MSG(session, EINVAL,
"invalid name for a compressor: %s", name);
@@ -482,7 +482,7 @@ __conn_add_encryptor(WT_CONNECTION *wt_conn,
CONNECTION_API_CALL(conn, session, add_encryptor, config, cfg);
WT_UNUSED(cfg);
- if (WT_STREQ(name, "none"))
+ if (strcmp(name, "none") == 0)
WT_ERR_MSG(session, EINVAL,
"invalid name for an encryptor: %s", name);
@@ -578,7 +578,7 @@ __conn_add_extractor(WT_CONNECTION *wt_conn,
CONNECTION_API_CALL(conn, session, add_extractor, config, cfg);
WT_UNUSED(cfg);
- if (WT_STREQ(name, "none"))
+ if (strcmp(name, "none") == 0)
WT_ERR_MSG(session, EINVAL,
"invalid name for an extractor: %s", name);
@@ -1692,11 +1692,11 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
is_create || exist ? WT_FS_OPEN_CREATE : 0, &conn->lock_fh);
/*
- * If this is a read-only connection and we cannot grab the lock
- * file, check if it is because there is not write permission or
- * if the file does not exist. If so, then ignore the error.
- * XXX Ignoring the error does allow multiple read-only
- * connections to exist at the same time on a read-only directory.
+ * If this is a read-only connection and we cannot grab the lock file,
+ * check if it is because there's no write permission or if the file
+ * does not exist. If so, then ignore the error.
+ * XXX Ignoring the error does allow multiple read-only connections to
+ * exist at the same time on a read-only directory.
*
* If we got an expected permission or non-existence error then skip
* the byte lock.
@@ -1776,7 +1776,7 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
* and there's never a database home after that point without a turtle
* file. If the turtle file doesn't exist, it's a create.
*/
- WT_ERR(__wt_fs_exist(session, WT_METADATA_TURTLE, &exist));
+ WT_ERR(__wt_turtle_exists(session, &exist));
conn->is_new = exist ? 0 : 1;
if (conn->is_new) {
@@ -2024,6 +2024,8 @@ __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[])
{ "split_5", WT_TIMING_STRESS_SPLIT_5 },
{ "split_6", WT_TIMING_STRESS_SPLIT_6 },
{ "split_7", WT_TIMING_STRESS_SPLIT_7 },
+ { "split_8", WT_TIMING_STRESS_SPLIT_8 },
+ { "split_9", WT_TIMING_STRESS_SPLIT_9 },
{ NULL, 0 }
};
WT_CONFIG_ITEM cval, sval;
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index 00de16e6c21..dbb602921a8 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -143,6 +143,10 @@ __cache_config_local(WT_SESSION_IMPL *session, bool shared, const char *cfg[])
conn->evict_threads_max = evict_threads_max;
conn->evict_threads_min = evict_threads_min;
+ /* Retrieve the wait time and convert from milliseconds */
+ WT_RET(__wt_config_gets(session, cfg, "cache_max_wait_ms", &cval));
+ cache->cache_max_wait_us = (uint64_t)(cval.val * WT_THOUSAND);
+
return (0);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_reconfig.c b/src/third_party/wiredtiger/src/conn/conn_reconfig.c
index 8672e824579..8bf2a53e49b 100644
--- a/src/third_party/wiredtiger/src/conn/conn_reconfig.c
+++ b/src/third_party/wiredtiger/src/conn/conn_reconfig.c
@@ -232,8 +232,7 @@ done: conn->req_max_major = max_major;
conn->req_min_major = min_major;
conn->req_min_minor = min_minor;
-err: if (value != NULL)
- __wt_free(session, value);
+err: __wt_free(session, value);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c
index 93f1392aef9..4f81cc0c10b 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_file.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_file.c
@@ -710,7 +710,7 @@ __curfile_create(WT_SESSION_IMPL *session,
* WiredTiger.wt should not be cached, doing so interferes
* with named checkpoints.
*/
- if (cacheable && !WT_STREQ(WT_METAFILE_URI, cursor->internal_uri))
+ if (cacheable && strcmp(WT_METAFILE_URI, cursor->internal_uri) != 0)
F_SET(cursor, WT_CURSTD_CACHEABLE);
WT_ERR(__wt_cursor_init(
diff --git a/src/third_party/wiredtiger/src/cursor/cur_join.c b/src/third_party/wiredtiger/src/cursor/cur_join.c
index af11ced4ff1..8d0abbeccbf 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_join.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_join.c
@@ -188,7 +188,7 @@ __curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *iter, u_int entry_pos)
size = strlen(to_dup->internal_uri) + 3;
WT_ERR(__wt_calloc(session, size, 1, &uri));
WT_ERR(__wt_snprintf(uri, size, "%s()", to_dup->internal_uri));
- if ((c = iter->cursor) == NULL || !WT_STREQ(c->uri, uri)) {
+ if ((c = iter->cursor) == NULL || strcmp(c->uri, uri) != 0) {
iter->cursor = NULL;
if (c != NULL)
WT_ERR(c->close(c));
diff --git a/src/third_party/wiredtiger/src/cursor/cur_metadata.c b/src/third_party/wiredtiger/src/cursor/cur_metadata.c
index c584c9c5dc3..63c005ceeef 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_metadata.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_metadata.c
@@ -625,7 +625,7 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session,
* We'll need some extra cursors to pull out column group information
* and chase "source" entries.
*/
- if (WT_STREQ(uri, "metadata:create")) {
+ if (strcmp(uri, "metadata:create") == 0) {
F_SET(mdc, WT_MDC_CREATEONLY);
WT_ERR(__wt_metadata_cursor_open(session, cfg[1],
&mdc->create_cursor));
diff --git a/src/third_party/wiredtiger/src/cursor/cur_stat.c b/src/third_party/wiredtiger/src/cursor/cur_stat.c
index 9cd0ee2c484..9d68e2399be 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_stat.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_stat.c
@@ -529,7 +529,7 @@ __wt_curstat_init(WT_SESSION_IMPL *session,
dsrc_uri = uri + strlen("statistics:");
- if (WT_STREQ(dsrc_uri, "join"))
+ if (strcmp(dsrc_uri, "join") == 0)
WT_RET(__curstat_join_init(session, curjoin, cfg, cst));
else if (WT_PREFIX_MATCH(dsrc_uri, "colgroup:"))
diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c
index 766712c244c..5e719599160 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_std.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_std.c
@@ -722,7 +722,7 @@ __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri,
/* Fast path overwrite configuration */
if (have_config && cfg[2] == NULL &&
- WT_STREQ(cfg[1], "overwrite=false")) {
+ strcmp(cfg[1], "overwrite=false") == 0) {
have_config = false;
overwrite_flag = 0;
} else
@@ -780,7 +780,7 @@ __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri,
bucket = hash_value % WT_HASH_ARRAY_SIZE;
TAILQ_FOREACH(cursor, &session->cursor_cache[bucket], q) {
if (cursor->uri_hash == hash_value &&
- WT_STREQ(cursor->uri, uri)) {
+ strcmp(cursor->uri, uri) == 0) {
if ((ret = cursor->reopen(cursor, false)) != 0) {
F_CLR(cursor, WT_CURSTD_CACHEABLE);
session->dhandle = NULL;
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 8396612b7ca..05397843fc7 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -563,7 +563,7 @@ __evict_update_work(WT_SESSION_IMPL *session)
conn = S2C(session);
cache = conn->cache;
- dirty_target = cache->eviction_dirty_target;
+ dirty_target = __wt_eviction_dirty_target(cache);
dirty_trigger = cache->eviction_dirty_trigger;
target = cache->eviction_target;
trigger = cache->eviction_trigger;
@@ -2345,7 +2345,8 @@ __wt_cache_eviction_worker(
WT_TRACK_OP_DECL;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *txn_state;
- uint64_t initial_progress, max_progress, time_start, time_stop;
+ uint64_t elapsed, time_start, time_stop;
+ uint64_t initial_progress, max_progress;
bool timer;
WT_TRACK_OP_INIT(session);
@@ -2367,8 +2368,7 @@ __wt_cache_eviction_worker(
__wt_evict_server_wake(session);
/* Track how long application threads spend doing eviction. */
- timer =
- WT_STAT_ENABLED(session) && !F_ISSET(session, WT_SESSION_INTERNAL);
+ timer = !F_ISSET(session, WT_SESSION_INTERNAL);
if (timer)
time_start = __wt_clock(session);
@@ -2405,22 +2405,10 @@ __wt_cache_eviction_worker(
/* See if eviction is still needed. */
if (!__wt_eviction_needed(session, busy, readonly, &pct_full) ||
- ((pct_full < 100.0 || cache->eviction_scrub_limit > 0.0) &&
- (cache->eviction_progress >
+ (pct_full < 100.0 && (cache->eviction_progress >
initial_progress + max_progress)))
break;
- /*
- * Don't make application threads participate in scrubbing for
- * checkpoints. Just throttle updates instead.
- */
- if (WT_EVICT_HAS_WORKERS(session) &&
- cache->eviction_scrub_limit > 0.0 &&
- !F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD)) {
- __wt_yield();
- continue;
- }
-
/* Evict a page. */
switch (ret = __evict_page(session, false)) {
case 0:
@@ -2438,13 +2426,26 @@ __wt_cache_eviction_worker(
default:
goto err;
}
+ /* Stop if we've exceeded the time out. */
+ if (timer && cache->cache_max_wait_us != 0) {
+ time_stop = __wt_clock(session);
+ if (session->cache_wait_us +
+ WT_CLOCKDIFF_US(time_stop, time_start) >
+ cache->cache_max_wait_us)
+ goto err;
+ }
}
err: if (timer) {
time_stop = __wt_clock(session);
- WT_STAT_CONN_INCRV(session,
- application_cache_time,
- WT_CLOCKDIFF_US(time_stop, time_start));
+ elapsed = WT_CLOCKDIFF_US(time_stop, time_start);
+ WT_STAT_CONN_INCRV(session, application_cache_time, elapsed);
+ session->cache_wait_us += elapsed;
+ if (cache->cache_max_wait_us != 0 &&
+ session->cache_wait_us > cache->cache_max_wait_us) {
+ WT_TRET(WT_CACHE_FULL);
+ WT_STAT_CONN_INCR(session, cache_timed_out_ops);
+ }
}
done: WT_TRACK_OP_END(session);
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index ca2176fcf0e..aabb19c86aa 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -48,6 +48,9 @@
WT_TRACK_OP_INIT(s); \
WT_SINGLE_THREAD_CHECK_START(s); \
WT_ERR(WT_SESSION_CHECK_PANIC(s)); \
+ /* Reset wait time if this isn't an API re entry. */ \
+ if (__oldname == NULL) \
+ (s)->cache_wait_us = 0; \
__wt_verbose((s), WT_VERB_API, "%s", "CALL: " #h ":" #n)
#define API_CALL_NOCONF(s, h, n, dh) do { \
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 33e382feba2..64e84e59d36 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -234,14 +234,13 @@ struct __wt_ovfl_reuse {
*/
struct __wt_page_lookaside {
uint64_t las_pageid; /* Page ID in lookaside */
- uint64_t las_max_txn; /* Max transaction ID in lookaside */
- uint64_t las_min_txn; /* Min transaction ID in lookaside */
- WT_DECL_TIMESTAMP(min_timestamp)/* Min timestamp in lookaside */
- /* Max timestamp on page */
- WT_DECL_TIMESTAMP(onpage_timestamp)
+ uint64_t max_txn; /* Maximum transaction ID */
+ uint64_t unstable_txn; /* First transaction ID not on page */
+ WT_DECL_TIMESTAMP(max_timestamp)/* Maximum timestamp */
+ WT_DECL_TIMESTAMP(unstable_timestamp)/* First timestamp not on page */
bool eviction_to_lookaside; /* Revert to lookaside on eviction */
- bool las_skew_newest; /* On-page skewed to newest */
bool invalid; /* History is required correct reads */
+ bool skew_newest; /* Page image has newest versions */
};
/*
@@ -270,6 +269,9 @@ struct __wt_page_modify {
uint64_t rec_max_txn;
WT_DECL_TIMESTAMP(rec_max_timestamp)
+ /* Stable timestamp at last reconciliation. */
+ WT_DECL_TIMESTAMP(last_stable_timestamp)
+
/* The largest update transaction ID (approximate). */
uint64_t update_txn;
@@ -481,7 +483,9 @@ struct __wt_page_modify {
#define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */
uint8_t rec_result; /* Reconciliation state */
- uint8_t update_restored; /* Page created by restoring updates */
+#define WT_PAGE_RS_LOOKASIDE 0x1
+#define WT_PAGE_RS_RESTORED 0x2
+ uint8_t restore_state; /* Created by restoring updates */
};
/*
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 81c166eb0e4..7813f1299fd 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1158,8 +1158,7 @@ __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref)
* Return if a truncate operation is active.
*/
static inline bool
-__wt_page_del_active(
- WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
+__wt_page_del_active(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
{
WT_PAGE_DELETED *page_del;
uint8_t prepare_state;
@@ -1190,10 +1189,10 @@ __wt_page_las_active(WT_SESSION_IMPL *session, WT_REF *ref)
if ((page_las = ref->page_las) == NULL)
return (false);
- if (page_las->invalid || !ref->page_las->las_skew_newest)
+ if (page_las->invalid || !ref->page_las->skew_newest)
return (true);
- if (__wt_txn_visible_all(session, page_las->las_max_txn,
- WT_TIMESTAMP_NULL(&page_las->onpage_timestamp)))
+ if (__wt_txn_visible_all(session, page_las->max_txn,
+ WT_TIMESTAMP_NULL(&page_las->max_timestamp)))
return (false);
return (true);
@@ -1329,6 +1328,7 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
static inline bool
__wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
{
+ WT_DECL_TIMESTAMP(pinned_ts)
WT_PAGE_MODIFY *mod;
WT_TXN_GLOBAL *txn_global;
@@ -1338,7 +1338,8 @@ __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
* If the page hasn't been through one round of update/restore, give it
* a try.
*/
- if ((mod = page->modify) == NULL || !mod->update_restored)
+ if ((mod = page->modify) == NULL ||
+ !FLD_ISSET(mod->restore_state, WT_PAGE_RS_RESTORED))
return (true);
/*
@@ -1356,17 +1357,12 @@ __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
return (true);
#ifdef HAVE_TIMESTAMPS
- {
- bool same_timestamp;
-
- same_timestamp = false;
- if (!__wt_timestamp_iszero(&mod->last_eviction_timestamp))
- WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
- same_timestamp = __wt_timestamp_cmp(
+ if (!__wt_timestamp_iszero(&mod->last_eviction_timestamp)) {
+ __wt_txn_pinned_timestamp(session, &pinned_ts);
+ if (__wt_timestamp_cmp(
&mod->last_eviction_timestamp,
- &txn_global->pinned_timestamp) == 0);
- if (!same_timestamp)
- return (true);
+ &txn_global->pinned_timestamp) != 0)
+ return (true);
}
#endif
@@ -1605,6 +1601,8 @@ __wt_split_descent_race(
* update. A thread can read the parent page's original page index and
* then read the split page's replacement index.
*
+ * For example, imagine a search descending the tree.
+ *
* Because internal page splits work by truncating the original page to
* the initial part of the original page, the result of this race is we
* will have a search key that points past the end of the current page.
@@ -1649,73 +1647,17 @@ __wt_split_descent_race(
* work by truncating the split page, so the split page search is for
* content the split page retains after the split, and we ignore this
* race.
- */
- WT_INTL_INDEX_GET(session, ref->home, pindex);
- return (pindex != saved_pindex);
-}
-
-/*
- * __wt_split_prev_race --
- * Return if we raced with an internal page split when moving backwards
- * through the tree.
- */
-static inline bool
-__wt_split_prev_race(WT_SESSION_IMPL *session, WT_REF *ref)
-{
- WT_PAGE_INDEX *pindex;
-
- /*
- * There's a split race when a cursor moving backwards through the tree
- * descends the tree. If we're splitting an internal page into its
- * parent, we move the WT_REF structures and update the parent's page
- * index before updating the split page's page index, and it's not an
- * atomic update. A thread can read the parent and split page's original
- * indexes during a split, or read the parent page's replacement page
- * index and then read the split page's original index, either of which
- * can lead to skipping pages.
*
- * For example, imagine an internal page with 3 child pages, with the
- * namespaces a-f, g-h and i-j; the first child page splits. The parent
- * starts out with the following page-index:
+ * This code is a general purpose check for a descent race and we call
+ * it in other cases, for example, a cursor traversing backwards through
+ * the tree.
*
- * | ... | a | g | i | ... |
- *
- * The split page starts out with the following page-index:
- *
- * | a | b | c | d | e | f |
- *
- * The first step is to move the c-f ranges into a new subtree, so, for
- * example we might have two new internal pages 'c' and 'e', where the
- * new 'c' page references the c-d namespace and the new 'e' page
- * references the e-f namespace. The top of the subtree references the
- * parent page, but until the parent's page index is updated, threads in
- * the subtree won't be able to ascend out of the subtree. However, once
- * the parent page's page index is updated to this:
- *
- * | ... | a | c | e | g | i | ... |
- *
- * threads in the subtree can ascend into the parent. Imagine a cursor
- * in the c-d part of the namespace that ascends to the parent's 'c'
- * slot. It would then decrement to the slot before the 'c' slot, the
- * 'a' slot.
- *
- * The previous-cursor movement selects the last slot in the 'a' page;
- * if the split page's page-index hasn't been updated yet, it selects
- * the 'f' slot, which is incorrect. Once the split page's page index is
- * updated to this:
- *
- * | a | b |
- *
- * the previous-cursor movement will select the 'b' slot, which is
- * correct.
- *
- * This function takes an argument which is the internal page into which
- * we're coupling. If the last slot on the page no longer points to
- * the current page as its "home", the page is being split and part of
- * its namespace moved, we have to restart.
+ * Presumably we acquired a page index on the child page before calling
+ * this code, don't re-order that acquisition with this check.
*/
- WT_INTL_INDEX_GET(session, ref->page, pindex);
- return (pindex->index[pindex->entries - 1]->home != ref->page);
+ WT_BARRIER();
+ WT_INTL_INDEX_GET(session, ref->home, pindex);
+ return (pindex != saved_pindex);
}
/*
@@ -1724,8 +1666,8 @@ __wt_split_prev_race(WT_SESSION_IMPL *session, WT_REF *ref)
* coupling up/down the tree.
*/
static inline int
-__wt_page_swap_func(WT_SESSION_IMPL *session,
- WT_REF *held, WT_REF *want, bool prev_race, uint32_t flags
+__wt_page_swap_func(
+ WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags
#ifdef HAVE_DIAGNOSTIC
, const char *file, int line
#endif
@@ -1755,18 +1697,6 @@ __wt_page_swap_func(WT_SESSION_IMPL *session,
);
/*
- * We can race when descending into an internal page as part of moving
- * backwards through the tree, and we have to detect that race before
- * releasing the page from which we are coupling, else we can't restart
- * the movement.
- */
- if (ret == 0 && prev_race && WT_PAGE_IS_INTERNAL(want->page) &&
- __wt_split_prev_race(session, want)) {
- ret = WT_RESTART;
- WT_TRET(__wt_page_release(session, want, flags));
- }
-
- /*
* Expected failures: page not found or restart. Our callers list the
* errors they're expecting to handle.
*/
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index 1299d3e90e3..7d07e6dfd98 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -120,11 +120,11 @@ struct __wt_cache {
double eviction_checkpoint_target;/* Percent to reduce dirty
to during checkpoint scrubs */
- double eviction_scrub_limit; /* Percent of cache to trigger
- dirty eviction during checkpoint
- scrubs */
+ double eviction_scrub_target; /* Current scrub target */
u_int overhead_pct; /* Cache percent adjustment */
+ uint64_t cache_max_wait_us; /* Maximum time an operation waits for
+ * space in cache */
/*
* Eviction thread tuning information.
diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i
index fc127942d02..7f12949e162 100644
--- a/src/third_party/wiredtiger/src/include/cache.i
+++ b/src/third_party/wiredtiger/src/include/cache.i
@@ -276,6 +276,22 @@ __wt_eviction_clean_needed(WT_SESSION_IMPL *session, double *pct_fullp)
}
/*
+ * __wt_eviction_dirty_target --
+ * Return the effective dirty target (including checkpoint scrubbing).
+ */
+static inline double
+__wt_eviction_dirty_target(WT_CACHE *cache)
+{
+ double dirty_target, scrub_target;
+
+ dirty_target = cache->eviction_dirty_target;
+ scrub_target = cache->eviction_scrub_target;
+
+ return (scrub_target > 0 && scrub_target < dirty_target ?
+ scrub_target : dirty_target);
+}
+
+/*
* __wt_eviction_dirty_needed --
* Return if an application thread should do eviction due to the total
* volume of dirty data in cache.
@@ -284,7 +300,6 @@ static inline bool
__wt_eviction_dirty_needed(WT_SESSION_IMPL *session, double *pct_fullp)
{
WT_CACHE *cache;
- double dirty_trigger;
uint64_t dirty_inuse, bytes_max;
cache = S2C(session)->cache;
@@ -299,10 +314,8 @@ __wt_eviction_dirty_needed(WT_SESSION_IMPL *session, double *pct_fullp)
if (pct_fullp != NULL)
*pct_fullp = ((100.0 * dirty_inuse) / bytes_max);
- if ((dirty_trigger = cache->eviction_scrub_limit) < 1.0)
- dirty_trigger = cache->eviction_dirty_trigger;
-
- return (dirty_inuse > (uint64_t)(dirty_trigger * bytes_max) / 100);
+ return (dirty_inuse > (uint64_t)(
+ cache->eviction_dirty_trigger * bytes_max) / 100);
}
/*
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index d0bebe8da5d..22459b0072c 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -458,6 +458,8 @@ struct __wt_connection_impl {
#define WT_TIMING_STRESS_SPLIT_5 0x040u
#define WT_TIMING_STRESS_SPLIT_6 0x080u
#define WT_TIMING_STRESS_SPLIT_7 0x100u
+#define WT_TIMING_STRESS_SPLIT_8 0x200u
+#define WT_TIMING_STRESS_SPLIT_9 0x400u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint64_t timing_stress_flags;
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 7e2d4a4786d..24e16adefd1 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -498,6 +498,7 @@ extern int __wt_lsm_work_switch(WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **ent
extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_lsm_chunk_visible_all(WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_lsm_work_enable_evict(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -537,6 +538,7 @@ extern int __wt_meta_track_drop(WT_SESSION_IMPL *session, const char *filename)
extern int __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_meta_track_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_turtle_exists(WT_SESSION_IMPL *session, bool *existp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_turtle_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h
index f515e03519a..67ef28757ef 100644
--- a/src/third_party/wiredtiger/src/include/lsm.h
+++ b/src/third_party/wiredtiger/src/include/lsm.h
@@ -107,7 +107,8 @@ struct __wt_lsm_chunk {
uint32_t id; /* ID used to generate URIs */
uint32_t generation; /* Merge generation */
uint32_t refcnt; /* Number of worker thread references */
- uint32_t bloom_busy; /* Number of worker thread references */
+ uint32_t bloom_busy; /* Currently creating bloom filter */
+ uint32_t evict_enabled; /* Eviction allowed on the chunk */
int8_t empty; /* 1/0: checkpoint missing */
int8_t evicted; /* 1/0: in-memory chunk was evicted */
@@ -129,13 +130,19 @@ struct __wt_lsm_chunk {
* is required.
*/
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_LSM_WORK_BLOOM 0x01u /* Create a bloom filter */
-#define WT_LSM_WORK_DROP 0x02u /* Drop unused chunks */
-#define WT_LSM_WORK_FLUSH 0x04u /* Flush a chunk to disk */
-#define WT_LSM_WORK_MERGE 0x08u /* Look for a tree merge */
-#define WT_LSM_WORK_SWITCH 0x10u /* Switch to new in-memory chunk */
+#define WT_LSM_WORK_BLOOM 0x01u /* Create a bloom filter */
+#define WT_LSM_WORK_DROP 0x02u /* Drop unused chunks */
+#define WT_LSM_WORK_ENABLE_EVICT 0x04u /* Create a bloom filter */
+#define WT_LSM_WORK_FLUSH 0x08u /* Flush a chunk to disk */
+#define WT_LSM_WORK_MERGE 0x10u /* Look for a tree merge */
+#define WT_LSM_WORK_SWITCH 0x20u /* Switch the in-memory chunk */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
+/* Work units that are serviced by general worker threads. */
+#define WT_LSM_WORK_GENERAL_OPS \
+ (WT_LSM_WORK_BLOOM | WT_LSM_WORK_DROP | WT_LSM_WORK_ENABLE_EVICT |\
+ WT_LSM_WORK_FLUSH | WT_LSM_WORK_SWITCH)
+
/*
* WT_LSM_WORK_UNIT --
* A definition of maintenance that an LSM tree needs done.
diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h
index 1d2a5075018..7060e6cea23 100644
--- a/src/third_party/wiredtiger/src/include/misc.h
+++ b/src/third_party/wiredtiger/src/include/misc.h
@@ -227,9 +227,12 @@
} while (0)
/*
- * Check if a variable string equals a constant string. Inline the common
- * case for WiredTiger of a single byte string. This is required because not
- * all compilers optimize this case in strcmp (e.g., clang).
+ * Check if a variable string equals a constant string. Inline the common case
+ * for WiredTiger of a single byte string. This is required because not all
+ * compilers optimize this case in strcmp (e.g., clang). While this macro works
+ * in the case of comparing two pointers (a sizeof operator on a pointer won't
+ * equal 2 and the extra code will be discarded at compile time), that's not its
+ * purpose.
*/
#define WT_STREQ(s, cs) \
(sizeof(cs) == 2 ? (s)[0] == (cs)[0] && (s)[1] == '\0' : \
@@ -294,16 +297,15 @@ typedef void wt_timestamp_t;
__wt_scr_alloc_func(session, size, scratchp, __func__, __LINE__)
#define __wt_page_in(session, ref, flags) \
__wt_page_in_func(session, ref, flags, __func__, __LINE__)
-#define __wt_page_swap(session, held, want, prev_race, flags) \
- __wt_page_swap_func( \
- session, held, want, prev_race, flags, __func__, __LINE__)
+#define __wt_page_swap(session, held, want, flags) \
+ __wt_page_swap_func(session, held, want, flags, __func__, __LINE__)
#else
#define __wt_scr_alloc(session, size, scratchp) \
__wt_scr_alloc_func(session, size, scratchp)
#define __wt_page_in(session, ref, flags) \
__wt_page_in_func(session, ref, flags)
-#define __wt_page_swap(session, held, want, prev_race, flags) \
- __wt_page_swap_func(session, held, want, prev_race, flags)
+#define __wt_page_swap(session, held, want, flags) \
+ __wt_page_swap_func(session, held, want, flags)
#endif
/* Called on unexpected code path: locate the failure. */
@@ -336,6 +338,3 @@ union __wt_rand_state {
continue; \
}
#define WT_TAILQ_SAFE_REMOVE_END }
-
-/* Sleep time to uncover race conditions during timing stress test. */
-#define TIMING_STRESS_TEST_SLEEP (100 * WT_THOUSAND)
diff --git a/src/third_party/wiredtiger/src/include/misc.i b/src/third_party/wiredtiger/src/include/misc.i
index 0250479af4a..2c380e95ade 100644
--- a/src/third_party/wiredtiger/src/include/misc.i
+++ b/src/third_party/wiredtiger/src/include/misc.i
@@ -248,3 +248,26 @@ __wt_spin_backoff(uint64_t *yield_count, uint64_t *sleep_usecs)
(*sleep_usecs) = WT_MIN((*sleep_usecs) + 100, WT_THOUSAND);
__wt_sleep(0, (*sleep_usecs));
}
+
+ /* Maximum stress delay is 1/10 of a second. */
+#define WT_TIMING_STRESS_MAX_DELAY (100000)
+
+/*
+ * __wt_timing_stress --
+ * Optionally add delay to stress code paths.
+ */
+static inline void
+__wt_timing_stress(WT_SESSION_IMPL *session, u_int flag)
+{
+ WT_CONNECTION_IMPL *conn;
+ uint64_t sleep_usecs;
+
+ conn = S2C(session);
+
+ /* Only sleep when the specified configuration flag is set. */
+ if (!FLD_ISSET(conn->timing_stress_flags, flag))
+ return;
+
+ sleep_usecs = __wt_random(&session->rnd) % WT_TIMING_STRESS_MAX_DELAY;
+ __wt_sleep(0, sleep_usecs);
+}
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index e102d7f5057..cbf572f9a23 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -141,6 +141,8 @@ struct __wt_session_impl {
u_int ckpt_handle_next; /* Next empty slot */
size_t ckpt_handle_allocated; /* Bytes allocated */
+ uint64_t cache_wait_us; /* Wait time for cache for current operation */
+
/*
* Operations acting on handles.
*
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 77e0fa85b0f..1693b9baa82 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -406,6 +406,7 @@ struct __wt_connection_stats {
int64_t cache_eviction_maximum_page_size;
int64_t cache_eviction_dirty;
int64_t cache_eviction_app_dirty;
+ int64_t cache_timed_out_ops;
int64_t cache_read_overflow;
int64_t cache_eviction_deepen;
int64_t cache_write_lookaside;
@@ -455,11 +456,11 @@ struct __wt_connection_stats {
int64_t cursor_insert;
int64_t cursor_modify;
int64_t cursor_next;
+ int64_t cursor_restart;
int64_t cursor_prev;
int64_t cursor_remove;
int64_t cursor_reserve;
int64_t cursor_reset;
- int64_t cursor_restart;
int64_t cursor_search;
int64_t cursor_search_near;
int64_t cursor_sweep_buckets;
@@ -663,6 +664,7 @@ struct __wt_connection_stats {
int64_t txn_pinned_checkpoint_range;
int64_t txn_pinned_snapshot_range;
int64_t txn_pinned_timestamp;
+ int64_t txn_pinned_timestamp_checkpoint;
int64_t txn_pinned_timestamp_oldest;
int64_t txn_sync;
int64_t txn_commit;
@@ -783,6 +785,7 @@ struct __wt_dsrc_stats {
int64_t compress_raw_ok;
int64_t cursor_insert_bulk;
int64_t cursor_create;
+ int64_t cursor_restart;
int64_t cursor_insert_bytes;
int64_t cursor_remove_bytes;
int64_t cursor_update_bytes;
@@ -795,7 +798,6 @@ struct __wt_dsrc_stats {
int64_t cursor_remove;
int64_t cursor_reserve;
int64_t cursor_reset;
- int64_t cursor_restart;
int64_t cursor_search;
int64_t cursor_search_near;
int64_t cursor_truncate;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 32234dca23e..480d31b188e 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -147,7 +147,7 @@ struct __wt_txn_global {
volatile bool checkpoint_running; /* Checkpoint running */
volatile uint32_t checkpoint_id; /* Checkpoint's session ID */
WT_TXN_STATE checkpoint_state; /* Checkpoint's txn state */
- WT_TXN *checkpoint_txn; /* Checkpoint's txn structure */
+ WT_DECL_TIMESTAMP(checkpoint_timestamp) /* Checkpoint's timestamp */
volatile uint64_t metadata_pinned; /* Oldest ID for metadata */
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 9276ca62903..0efc32811e6 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -396,6 +396,60 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session)
return (checkpoint_pinned);
}
+#ifdef HAVE_TIMESTAMPS
+/*
+ * __wt_txn_pinned_timestamp --
+ * Get the first timestamp that has to be kept for the current tree.
+ */
+static inline void
+__wt_txn_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *pinned_tsp)
+{
+ WT_BTREE *btree;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t checkpoint_ts, pinned_ts;
+ bool include_checkpoint_txn;
+
+ btree = S2BT_SAFE(session);
+ txn_global = &S2C(session)->txn_global;
+
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(&pinned_ts, &txn_global->pinned_timestamp));
+ __wt_timestamp_set(pinned_tsp, &pinned_ts);
+
+ /*
+ * Checkpoint transactions often fall behind ordinary application
+ * threads. Take special effort to not keep changes pinned in cache if
+ * they are only required for the checkpoint and it has already seen
+ * them.
+ *
+ * If there is no active checkpoint or this handle is up to date with
+ * the active checkpoint then it's safe to ignore the checkpoint ID in
+ * the visibility check.
+ */
+ include_checkpoint_txn = btree == NULL ||
+ (!F_ISSET(btree, WT_BTREE_LOOKASIDE) &&
+ btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT));
+ if (!include_checkpoint_txn)
+ return;
+
+ /*
+ * The read of the timestamp pinned by a checkpoint needs to be
+ * carefully ordered: if a checkpoint is starting and we have to use
+ * the checkpoint timestamp, we take the minimum of it with the oldest
+ * timestamp, which is what we want.
+ */
+ WT_READ_BARRIER();
+
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(&checkpoint_ts,
+ &txn_global->checkpoint_timestamp));
+
+ if (!__wt_timestamp_iszero(&checkpoint_ts) &&
+ __wt_timestamp_cmp(&checkpoint_ts, &pinned_ts) < 0)
+ __wt_timestamp_set(pinned_tsp, &checkpoint_ts);
+}
+#endif
+
/*
* __txn_visible_all_id --
* Check if a given transaction ID is "globally visible". This is, if
@@ -427,8 +481,7 @@ __wt_txn_visible_all(
#ifdef HAVE_TIMESTAMPS
{
- WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
- int cmp;
+ wt_timestamp_t pinned_ts;
/* Timestamp check. */
if (timestamp == NULL || __wt_timestamp_iszero(timestamp))
@@ -438,20 +491,11 @@ __wt_txn_visible_all(
* If no oldest timestamp has been supplied, updates have to stay in
* cache until we are shutting down.
*/
- if (!txn_global->has_pinned_timestamp)
+ if (!S2C(session)->txn_global.has_pinned_timestamp)
return (F_ISSET(S2C(session), WT_CONN_CLOSING));
- WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
- cmp = __wt_timestamp_cmp(timestamp, &txn_global->pinned_timestamp));
-
- /*
- * We can discard updates with timestamps less than or equal to the
- * pinned timestamp. This is different to the situation for
- * transaction IDs, because we know that updates with timestamps are
- * definitely committed (and in this case, that the transaction ID is
- * globally visible).
- */
- return (cmp <= 0);
+ __wt_txn_pinned_timestamp(session, &pinned_ts);
+ return (__wt_timestamp_cmp(timestamp, &pinned_ts) <= 0);
}
#else
WT_UNUSED(timestamp);
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 2991d6f74e3..a4ba834d5ef 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -602,6 +602,14 @@ struct __wt_cursor {
*
* The key must first be set and the record must already exist.
*
+ * Note that reserve works by doing a special update operation that is
+ * not logged and does not change the value of the record. This update
+ * is aborted when the enclosing transaction ends regardless of whether
+ * it commits or rolls back. Given that, reserve can only be used to
+ * detect conflicts between transactions that execute concurrently. It
+ * cannot detect all logical conflicts between transactions. For that,
+ * some update to the record must be committed.
+ *
* @snippet ex_all.c Reserve a record
*
* On success, the cursor ends positioned at the specified record; to
@@ -1639,6 +1647,12 @@ struct __wt_session {
* the WT_CURSOR::next (WT_CURSOR::prev) method will iterate from the
* beginning (end) of the table.
*
+ * When a range truncate is in progress, and another transaction inserts
+ * a key into that range, the behavior is not well defined - a conflict
+ * may be detected or both transactions may be permitted to commit. If
+ * they do commit, and if there is a crash and recovery runs, the result
+ * may be different than what was in cache before the crash.
+ *
* @param session the session handle
* @param name the URI of the table or file to truncate
* @param start optional cursor marking the first record discarded;
@@ -2128,6 +2142,10 @@ struct __wt_connection {
* thread uses a session from the configured session_max., an integer
* between 1 and 20; default \c 2.}
* @config{ ),,}
+ * @config{cache_max_wait_ms, the maximum number of milliseconds an
+ * application thread will wait for space to be available in cache
+ * before giving up. Default will wait forever., an integer greater
+ * than or equal to 0; default \c 0.}
* @config{cache_overhead, assume the heap allocator overhead is the
* specified percentage\, and adjust the cache usage by that amount (for
* example\, if there is 10GB of data in cache\, a percentage of 10
@@ -2179,7 +2197,7 @@ struct __wt_connection {
* is a percentage of the cache size if the value is within the range of
* 0 to 100 or an absolute size when greater than 100. The value is not
* allowed to exceed the \c cache_size. Ignored if set to zero or \c
- * in_memory is \c true., an integer between 0 and 10TB; default \c 5.}
+ * in_memory is \c true., an integer between 0 and 10TB; default \c 1.}
* @config{eviction_dirty_target, perform eviction in worker threads
* when the cache contains at least this much dirty content. It is a
* percentage of the cache size if the value is within the range of 1 to
@@ -2708,6 +2726,10 @@ struct __wt_connection {
* default value for any sessions created\, and can be overridden in configuring
* \c cache_cursors in WT_CONNECTION.open_session., a boolean flag; default \c
* true.}
+ * @config{cache_max_wait_ms, the maximum number of milliseconds an application
+ * thread will wait for space to be available in cache before giving up.
+ * Default will wait forever., an integer greater than or equal to 0; default \c
+ * 0.}
* @config{cache_overhead, assume the heap allocator overhead is the specified
* percentage\, and adjust the cache usage by that amount (for example\, if
* there is 10GB of data in cache\, a percentage of 10 means WiredTiger treats
@@ -2808,7 +2830,7 @@ struct __wt_connection {
* percentage of the cache size if the value is within the range of 0 to 100 or
* an absolute size when greater than 100. The value is not allowed to exceed
* the \c cache_size. Ignored if set to zero or \c in_memory is \c true., an
- * integer between 0 and 10TB; default \c 5.}
+ * integer between 0 and 10TB; default \c 1.}
* @config{eviction_dirty_target, perform eviction in worker threads when the
* cache contains at least this much dirty content. It is a percentage of the
* cache size if the value is within the range of 1 to 100 or an absolute size
@@ -5103,596 +5125,600 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1091
/*! cache: modified pages evicted by application threads */
#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1092
+/*! cache: operations timed out waiting for space in cache */
+#define WT_STAT_CONN_CACHE_TIMED_OUT_OPS 1093
/*! cache: overflow pages read into cache */
-#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1093
+#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1094
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1094
+#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1095
/*! cache: page written requiring lookaside records */
-#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1095
+#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1096
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 1096
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 1097
/*! cache: pages evicted because they exceeded the in-memory maximum count */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1097
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1098
/*!
* cache: pages evicted because they exceeded the in-memory maximum time
* (usecs)
*/
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME 1098
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME 1099
/*! cache: pages evicted because they had chains of deleted items count */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1099
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1100
/*!
* cache: pages evicted because they had chains of deleted items time
* (usecs)
*/
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME 1100
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME 1101
/*! cache: pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP 1101
+#define WT_STAT_CONN_CACHE_EVICTION_APP 1102
/*! cache: pages queued for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1102
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1103
/*! cache: pages queued for urgent eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1103
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1104
/*! cache: pages queued for urgent eviction during walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1104
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1105
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1105
+#define WT_STAT_CONN_CACHE_READ 1106
/*! cache: pages read into cache after truncate */
-#define WT_STAT_CONN_CACHE_READ_DELETED 1106
+#define WT_STAT_CONN_CACHE_READ_DELETED 1107
/*! cache: pages read into cache after truncate in prepare state */
-#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1107
+#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1108
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1108
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1109
/*! cache: pages read into cache requiring lookaside for checkpoint */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_CHECKPOINT 1109
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_CHECKPOINT 1110
/*! cache: pages read into cache skipping older lookaside entries */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_SKIPPED 1110
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_SKIPPED 1111
/*!
* cache: pages read into cache with skipped lookaside entries needed
* later
*/
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY 1111
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY 1112
/*!
* cache: pages read into cache with skipped lookaside entries needed
* later by checkpoint
*/
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY_CHECKPOINT 1112
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY_CHECKPOINT 1113
/*! cache: pages requested from the cache */
-#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1113
+#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1114
/*! cache: pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1114
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1115
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1115
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1116
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1116
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1117
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1117
+#define WT_STAT_CONN_CACHE_WRITE 1118
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1118
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1119
/*! cache: percentage overhead */
-#define WT_STAT_CONN_CACHE_OVERHEAD 1119
+#define WT_STAT_CONN_CACHE_OVERHEAD 1120
/*! cache: tracked bytes belonging to internal pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1120
+#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1121
/*! cache: tracked bytes belonging to leaf pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_LEAF 1121
+#define WT_STAT_CONN_CACHE_BYTES_LEAF 1122
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1122
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1123
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1123
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1124
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1124
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1125
/*! connection: auto adjusting condition resets */
-#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1125
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1126
/*! connection: auto adjusting condition wait calls */
-#define WT_STAT_CONN_COND_AUTO_WAIT 1126
+#define WT_STAT_CONN_COND_AUTO_WAIT 1127
/*! connection: detected system time went backwards */
-#define WT_STAT_CONN_TIME_TRAVEL 1127
+#define WT_STAT_CONN_TIME_TRAVEL 1128
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1128
+#define WT_STAT_CONN_FILE_OPEN 1129
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1129
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1130
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1130
+#define WT_STAT_CONN_MEMORY_FREE 1131
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1131
+#define WT_STAT_CONN_MEMORY_GROW 1132
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1132
+#define WT_STAT_CONN_COND_WAIT 1133
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1133
+#define WT_STAT_CONN_RWLOCK_READ 1134
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1134
+#define WT_STAT_CONN_RWLOCK_WRITE 1135
/*! connection: total fsync I/Os */
-#define WT_STAT_CONN_FSYNC_IO 1135
+#define WT_STAT_CONN_FSYNC_IO 1136
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1136
+#define WT_STAT_CONN_READ_IO 1137
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1137
+#define WT_STAT_CONN_WRITE_IO 1138
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1138
+#define WT_STAT_CONN_CURSOR_CREATE 1139
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1139
+#define WT_STAT_CONN_CURSOR_INSERT 1140
/*! cursor: cursor modify calls */
-#define WT_STAT_CONN_CURSOR_MODIFY 1140
+#define WT_STAT_CONN_CURSOR_MODIFY 1141
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1141
+#define WT_STAT_CONN_CURSOR_NEXT 1142
+/*! cursor: cursor operation restarted */
+#define WT_STAT_CONN_CURSOR_RESTART 1143
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1142
+#define WT_STAT_CONN_CURSOR_PREV 1144
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1143
+#define WT_STAT_CONN_CURSOR_REMOVE 1145
/*! cursor: cursor reserve calls */
-#define WT_STAT_CONN_CURSOR_RESERVE 1144
+#define WT_STAT_CONN_CURSOR_RESERVE 1146
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1145
-/*! cursor: cursor restarted searches */
-#define WT_STAT_CONN_CURSOR_RESTART 1146
+#define WT_STAT_CONN_CURSOR_RESET 1147
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1147
+#define WT_STAT_CONN_CURSOR_SEARCH 1148
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1148
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1149
/*! cursor: cursor sweep buckets */
-#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1149
+#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1150
/*! cursor: cursor sweep cursors closed */
-#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1150
+#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1151
/*! cursor: cursor sweep cursors examined */
-#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1151
+#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1152
/*! cursor: cursor sweeps */
-#define WT_STAT_CONN_CURSOR_SWEEP 1152
+#define WT_STAT_CONN_CURSOR_SWEEP 1153
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1153
+#define WT_STAT_CONN_CURSOR_UPDATE 1154
/*! cursor: cursors cached on close */
-#define WT_STAT_CONN_CURSOR_CACHE 1154
+#define WT_STAT_CONN_CURSOR_CACHE 1155
/*! cursor: cursors reused from cache */
-#define WT_STAT_CONN_CURSOR_REOPEN 1155
+#define WT_STAT_CONN_CURSOR_REOPEN 1156
/*! cursor: truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1156
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1157
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1157
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1158
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1158
+#define WT_STAT_CONN_DH_SWEEP_REF 1159
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1159
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1160
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1160
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1161
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1161
+#define WT_STAT_CONN_DH_SWEEP_TOD 1162
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1162
+#define WT_STAT_CONN_DH_SWEEPS 1163
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1163
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1164
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1164
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1165
/*! lock: checkpoint lock acquisitions */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1165
+#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1166
/*! lock: checkpoint lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1166
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1167
/*! lock: checkpoint lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1167
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1168
/*!
* lock: commit timestamp queue lock application thread time waiting for
* the dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1168
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1169
/*!
* lock: commit timestamp queue lock internal thread time waiting for the
* dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1169
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1170
/*! lock: commit timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1170
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1171
/*! lock: commit timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1171
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1172
/*!
* lock: dhandle lock application thread time waiting for the dhandle
* lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1172
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1173
/*!
* lock: dhandle lock internal thread time waiting for the dhandle lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1173
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1174
/*! lock: dhandle read lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1174
+#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1175
/*! lock: dhandle write lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1175
+#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1176
/*! lock: metadata lock acquisitions */
-#define WT_STAT_CONN_LOCK_METADATA_COUNT 1176
+#define WT_STAT_CONN_LOCK_METADATA_COUNT 1177
/*! lock: metadata lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1177
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1178
/*! lock: metadata lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1178
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1179
/*!
* lock: read timestamp queue lock application thread time waiting for
* the dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1179
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1180
/*!
* lock: read timestamp queue lock internal thread time waiting for the
* dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1180
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1181
/*! lock: read timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1181
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1182
/*! lock: read timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1182
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1183
/*! lock: schema lock acquisitions */
-#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1183
+#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1184
/*! lock: schema lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1184
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1185
/*! lock: schema lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1185
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1186
/*!
* lock: table lock application thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1186
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1187
/*!
* lock: table lock internal thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1187
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1188
/*! lock: table read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1188
+#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1189
/*! lock: table write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1189
+#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1190
/*!
* lock: txn global lock application thread time waiting for the dhandle
* lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1190
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1191
/*!
* lock: txn global lock internal thread time waiting for the dhandle
* lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1191
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1192
/*! lock: txn global read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1192
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1193
/*! lock: txn global write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1193
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1194
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1194
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1195
/*! log: force archive time sleeping (usecs) */
-#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1195
+#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1196
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1196
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1197
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1197
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1198
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1198
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1199
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1199
+#define WT_STAT_CONN_LOG_FLUSH 1200
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1200
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1201
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1201
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1202
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1202
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1203
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1203
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1204
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1204
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1205
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1205
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1206
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1206
+#define WT_STAT_CONN_LOG_SCANS 1207
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1207
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1208
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1208
+#define WT_STAT_CONN_LOG_WRITE_LSN 1209
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1209
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1210
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1210
+#define WT_STAT_CONN_LOG_SYNC 1211
/*! log: log sync time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DURATION 1211
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1212
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1212
+#define WT_STAT_CONN_LOG_SYNC_DIR 1213
/*! log: log sync_dir time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1213
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1214
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1214
+#define WT_STAT_CONN_LOG_WRITES 1215
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1215
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1216
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1216
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1217
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1217
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1218
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1218
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1219
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1219
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1220
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1220
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1221
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1221
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1222
/*! log: slot close lost race */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1222
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1223
/*! log: slot close unbuffered waits */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1223
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1224
/*! log: slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1224
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1225
/*! log: slot join atomic update races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1225
+#define WT_STAT_CONN_LOG_SLOT_RACES 1226
/*! log: slot join calls atomic updates raced */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1226
+#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1227
/*! log: slot join calls did not yield */
-#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1227
+#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1228
/*! log: slot join calls found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1228
+#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1229
/*! log: slot join calls slept */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1229
+#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1230
/*! log: slot join calls yielded */
-#define WT_STAT_CONN_LOG_SLOT_YIELD 1230
+#define WT_STAT_CONN_LOG_SLOT_YIELD 1231
/*! log: slot join found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1231
+#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1232
/*! log: slot joins yield time (usecs) */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1232
+#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1233
/*! log: slot transitions unable to find free slot */
-#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1233
+#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1234
/*! log: slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1234
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1235
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1235
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1236
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1236
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1237
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1237
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1238
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1238
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1239
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1239
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1240
/*! perf: file system read latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1240
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1241
/*! perf: file system read latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1241
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1242
/*! perf: file system read latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1242
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1243
/*! perf: file system read latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1243
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1244
/*! perf: file system read latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1244
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1245
/*! perf: file system read latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1245
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1246
/*! perf: file system write latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1246
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1247
/*! perf: file system write latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1247
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1248
/*! perf: file system write latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1248
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1249
/*! perf: file system write latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1249
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1250
/*! perf: file system write latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1250
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1251
/*! perf: file system write latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1251
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1252
/*! perf: operation read latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1252
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1253
/*! perf: operation read latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1253
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1254
/*! perf: operation read latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1254
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1255
/*! perf: operation read latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1255
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1256
/*! perf: operation read latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1256
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1257
/*! perf: operation write latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1257
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1258
/*! perf: operation write latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1258
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1259
/*! perf: operation write latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1259
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1260
/*! perf: operation write latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1260
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1261
/*! perf: operation write latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1261
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1262
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1262
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1263
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1263
+#define WT_STAT_CONN_REC_PAGES 1264
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1264
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1265
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1265
+#define WT_STAT_CONN_REC_PAGE_DELETE 1266
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1266
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1267
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1267
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1268
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1268
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1269
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1269
+#define WT_STAT_CONN_SESSION_OPEN 1270
/*! session: table alter failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1270
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1271
/*! session: table alter successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1271
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1272
/*! session: table alter unchanged and skipped */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1272
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1273
/*! session: table compact failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1273
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1274
/*! session: table compact successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1274
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1275
/*! session: table create failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1275
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1276
/*! session: table create successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1276
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1277
/*! session: table drop failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1277
+#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1278
/*! session: table drop successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1278
+#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1279
/*! session: table rebalance failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1279
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1280
/*! session: table rebalance successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1280
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1281
/*! session: table rename failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1281
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1282
/*! session: table rename successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1282
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1283
/*! session: table salvage failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1283
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1284
/*! session: table salvage successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1284
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1285
/*! session: table truncate failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1285
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1286
/*! session: table truncate successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1286
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1287
/*! session: table verify failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1287
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1288
/*! session: table verify successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1288
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1289
/*! thread-state: active filesystem fsync calls */
-#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1289
+#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1290
/*! thread-state: active filesystem read calls */
-#define WT_STAT_CONN_THREAD_READ_ACTIVE 1290
+#define WT_STAT_CONN_THREAD_READ_ACTIVE 1291
/*! thread-state: active filesystem write calls */
-#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1291
+#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1292
/*! thread-yield: application thread time evicting (usecs) */
-#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1292
+#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1293
/*! thread-yield: application thread time waiting for cache (usecs) */
-#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1293
+#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1294
/*!
* thread-yield: connection close blocked waiting for transaction state
* stabilization
*/
-#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1294
+#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1295
/*! thread-yield: connection close yielded for lsm manager shutdown */
-#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1295
+#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1296
/*! thread-yield: data handle lock yielded */
-#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1296
+#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1297
/*!
* thread-yield: get reference for page index and slot time sleeping
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1297
+#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1298
/*! thread-yield: log server sync yielded for log write */
-#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1298
+#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1299
/*! thread-yield: page access yielded due to prepare state change */
-#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1299
+#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1300
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1300
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1301
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1301
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1302
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1302
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1303
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1303
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1304
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1304
+#define WT_STAT_CONN_PAGE_SLEEP 1305
/*!
* thread-yield: page delete rollback time sleeping for state change
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1305
+#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1306
/*! thread-yield: page reconciliation yielded due to child modification */
-#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1306
+#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1307
/*! transaction: commit timestamp queue insert to empty */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1307
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1308
/*! transaction: commit timestamp queue inserts to tail */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_TAIL 1308
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_TAIL 1309
/*! transaction: commit timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1309
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1310
/*! transaction: commit timestamp queue length */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1310
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1311
/*! transaction: number of named snapshots created */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1311
+#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1312
/*! transaction: number of named snapshots dropped */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1312
+#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1313
/*! transaction: prepared transactions */
-#define WT_STAT_CONN_TXN_PREPARE 1313
+#define WT_STAT_CONN_TXN_PREPARE 1314
/*! transaction: prepared transactions committed */
-#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1314
+#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1315
/*! transaction: prepared transactions currently active */
-#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1315
+#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1316
/*! transaction: prepared transactions rolled back */
-#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1316
+#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1317
/*! transaction: query timestamp calls */
-#define WT_STAT_CONN_TXN_QUERY_TS 1317
+#define WT_STAT_CONN_TXN_QUERY_TS 1318
/*! transaction: read timestamp queue insert to empty */
-#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1318
+#define WT_STAT_CONN_TXN_READ_QUEUE_EMPTY 1319
/*! transaction: read timestamp queue inserts to head */
-#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1319
+#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1320
/*! transaction: read timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1320
+#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1321
/*! transaction: read timestamp queue length */
-#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1321
+#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1322
/*! transaction: rollback to stable calls */
-#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE 1322
+#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE 1323
/*! transaction: rollback to stable updates aborted */
-#define WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED 1323
+#define WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED 1324
/*! transaction: rollback to stable updates removed from lookaside */
-#define WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED 1324
+#define WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED 1325
/*! transaction: set timestamp calls */
-#define WT_STAT_CONN_TXN_SET_TS 1325
+#define WT_STAT_CONN_TXN_SET_TS 1326
/*! transaction: set timestamp commit calls */
-#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1326
+#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1327
/*! transaction: set timestamp commit updates */
-#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1327
+#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1328
/*! transaction: set timestamp oldest calls */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1328
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1329
/*! transaction: set timestamp oldest updates */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1329
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1330
/*! transaction: set timestamp stable calls */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE 1330
+#define WT_STAT_CONN_TXN_SET_TS_STABLE 1331
/*! transaction: set timestamp stable updates */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1331
+#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1332
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1332
+#define WT_STAT_CONN_TXN_BEGIN 1333
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1333
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1334
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1334
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1335
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1335
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1336
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1336
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1337
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1337
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1338
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1338
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1339
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1339
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1340
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1340
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1341
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1341
+#define WT_STAT_CONN_TXN_CHECKPOINT 1342
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1342
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1343
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1343
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1344
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1344
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1345
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1345
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1346
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1346
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1347
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1347
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1348
/*!
* transaction: transaction range of IDs currently pinned by named
* snapshots
*/
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1348
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1349
/*! transaction: transaction range of timestamps currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1349
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1350
+/*! transaction: transaction range of timestamps pinned by a checkpoint */
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1351
/*!
* transaction: transaction range of timestamps pinned by the oldest
* timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1350
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1352
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1351
+#define WT_STAT_CONN_TXN_SYNC 1353
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1352
+#define WT_STAT_CONN_TXN_COMMIT 1354
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1353
+#define WT_STAT_CONN_TXN_ROLLBACK 1355
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1354
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1356
/*!
* @}
@@ -6019,32 +6045,32 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2106
/*! cursor: create calls */
#define WT_STAT_DSRC_CURSOR_CREATE 2107
+/*! cursor: cursor operation restarted */
+#define WT_STAT_DSRC_CURSOR_RESTART 2108
/*! cursor: cursor-insert key and value bytes inserted */
-#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2108
+#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2109
/*! cursor: cursor-remove key bytes removed */
-#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2109
+#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2110
/*! cursor: cursor-update value bytes updated */
-#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2110
+#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2111
/*! cursor: cursors cached on close */
-#define WT_STAT_DSRC_CURSOR_CACHE 2111
+#define WT_STAT_DSRC_CURSOR_CACHE 2112
/*! cursor: cursors reused from cache */
-#define WT_STAT_DSRC_CURSOR_REOPEN 2112
+#define WT_STAT_DSRC_CURSOR_REOPEN 2113
/*! cursor: insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT 2113
+#define WT_STAT_DSRC_CURSOR_INSERT 2114
/*! cursor: modify calls */
-#define WT_STAT_DSRC_CURSOR_MODIFY 2114
+#define WT_STAT_DSRC_CURSOR_MODIFY 2115
/*! cursor: next calls */
-#define WT_STAT_DSRC_CURSOR_NEXT 2115
+#define WT_STAT_DSRC_CURSOR_NEXT 2116
/*! cursor: prev calls */
-#define WT_STAT_DSRC_CURSOR_PREV 2116
+#define WT_STAT_DSRC_CURSOR_PREV 2117
/*! cursor: remove calls */
-#define WT_STAT_DSRC_CURSOR_REMOVE 2117
+#define WT_STAT_DSRC_CURSOR_REMOVE 2118
/*! cursor: reserve calls */
-#define WT_STAT_DSRC_CURSOR_RESERVE 2118
+#define WT_STAT_DSRC_CURSOR_RESERVE 2119
/*! cursor: reset calls */
-#define WT_STAT_DSRC_CURSOR_RESET 2119
-/*! cursor: restarted searches */
-#define WT_STAT_DSRC_CURSOR_RESTART 2120
+#define WT_STAT_DSRC_CURSOR_RESET 2120
/*! cursor: search calls */
#define WT_STAT_DSRC_CURSOR_SEARCH 2121
/*! cursor: search near calls */
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
index 4d9f6f92832..13d7d857a04 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
@@ -265,6 +265,12 @@ open: WT_WITH_SCHEMA_LOCK(session,
}
if (!F_ISSET(clsm, WT_CLSM_ACTIVE)) {
+ /*
+ * Opening this LSM cursor has opened a number of btree
+ * cursors, ensure other code doesn't think this is the first
+ * cursor in a session.
+ */
+ ++session->ncursors;
WT_RET(__cursor_enter(session));
F_SET(clsm, WT_CLSM_ACTIVE);
}
@@ -284,6 +290,7 @@ __clsm_leave(WT_CURSOR_LSM *clsm)
session = (WT_SESSION_IMPL *)clsm->iface.session;
if (F_ISSET(clsm, WT_CLSM_ACTIVE)) {
+ --session->ncursors;
__cursor_leave(session);
F_CLR(clsm, WT_CLSM_ACTIVE);
}
@@ -365,12 +372,17 @@ __clsm_deleted_decode(WT_CURSOR_LSM *clsm, WT_ITEM *value)
* Close any btree cursors that are not needed.
*/
static int
-__clsm_close_cursors(WT_CURSOR_LSM *clsm, u_int start, u_int end)
+__clsm_close_cursors(
+ WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, u_int start, u_int end)
{
WT_BLOOM *bloom;
WT_CURSOR *c;
u_int i;
+ __wt_verbose(session, WT_VERB_LSM,
+ "LSM closing cursor session(%p):clsm(%p), start: %u, end: %u",
+ (void *)session, (void *)clsm, start, end);
+
if (clsm->chunks == NULL || clsm->nchunks == 0)
return (0);
@@ -609,7 +621,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
saved_gen = lsm_tree->dsk_gen;
locked = false;
__wt_lsm_tree_readunlock(session, lsm_tree);
- WT_ERR(__clsm_close_cursors(
+ WT_ERR(__clsm_close_cursors(session,
clsm, close_range_start, close_range_end));
__wt_lsm_tree_readlock(session, lsm_tree);
locked = true;
@@ -626,6 +638,10 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
clsm->nchunks = nchunks;
/* Open the cursors for chunks that have changed. */
+ __wt_verbose(session, WT_VERB_LSM,
+ "LSM opening cursor session(%p):clsm(%p)%s, chunks: %u, good: %u",
+ (void *)session, (void *)clsm,
+ update ? ", update" : "", nchunks, ngood);
for (i = ngood; i != nchunks; i++) {
chunk = lsm_tree->chunk[i + start_chunk];
/* Copy the maximum transaction ID. */
@@ -1736,7 +1752,7 @@ __wt_clsm_close(WT_CURSOR *cursor)
*/
clsm = (WT_CURSOR_LSM *)cursor;
CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, close, NULL);
- WT_TRET(__clsm_close_cursors(clsm, 0, clsm->nchunks));
+ WT_TRET(__clsm_close_cursors(session, clsm, 0, clsm->nchunks));
__clsm_free_chunks(session, clsm);
/* In case we were somehow left positioned, clear that. */
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
index 40ff5fc0b26..1a5c60344bc 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
@@ -72,11 +72,7 @@ __lsm_general_worker_start(WT_SESSION_IMPL *session)
worker_args->type =
WT_LSM_WORK_DROP | WT_LSM_WORK_SWITCH;
else {
- worker_args->type =
- WT_LSM_WORK_BLOOM |
- WT_LSM_WORK_DROP |
- WT_LSM_WORK_FLUSH |
- WT_LSM_WORK_SWITCH;
+ worker_args->type = WT_LSM_WORK_GENERAL_OPS;
/*
* Only allow half of the threads to run merges to
* avoid all all workers getting stuck in long-running
@@ -422,9 +418,10 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
fillms = 10000;
/*
* If the tree appears to not be triggering enough
- * LSM maintenance, help it out. Additional work units
- * don't hurt, and can be necessary if some work
- * units aren't completed for some reason.
+ * LSM maintenance, help it out. Some types of
+ * additional work units don't hurt, and can be
+ * necessary if some work units aren't completed for
+ * some reason.
* If the tree hasn't been modified, and there are
* more than 1 chunks - try to get the tree smaller
* so queries run faster.
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
index 6f18f4fb152..a283670eba6 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
@@ -313,6 +313,37 @@ __wt_lsm_chunk_visible_all(
}
/*
+ * __lsm_set_chunk_evictable --
+ * Enable eviction in an LSM chunk.
+ */
+static int
+__lsm_set_chunk_evictable(
+ WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk, bool need_handle)
+{
+ WT_BTREE *btree;
+ WT_DECL_RET;
+
+ if (chunk->evict_enabled != 0)
+ return (0);
+
+ /* See if we win the race to enable eviction. */
+ if (__wt_atomic_cas32(&chunk->evict_enabled, 0, 1)) {
+ if (need_handle)
+ WT_RET(__wt_session_get_dhandle(
+ session, chunk->uri, NULL, NULL, 0));
+ btree = session->dhandle->handle;
+ if (btree->evict_disabled_open) {
+ btree->evict_disabled_open = false;
+ __wt_evict_file_exclusive_off(session);
+ }
+
+ if (need_handle)
+ WT_TRET(__wt_session_release_dhandle(session));
+ }
+ return (ret);
+}
+
+/*
* __lsm_checkpoint_chunk --
* Checkpoint an LSM chunk, separated out to make locking easier.
*/
@@ -340,7 +371,6 @@ int
__wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
{
- WT_BTREE *btree;
WT_DECL_RET;
WT_TXN_ISOLATION saved_isolation;
bool flush_set, release_dhandle;
@@ -375,6 +405,14 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
WT_RET(__wt_txn_update_oldest(
session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
if (!__wt_lsm_chunk_visible_all(session, chunk)) {
+ /*
+ * If there is cache pressure consider making a chunk evictable
+ * to avoid the cache getting stuck when history is required.
+ */
+ if (__wt_eviction_needed(session, false, false, NULL))
+ WT_ERR(__wt_lsm_manager_push_entry(
+ session, WT_LSM_WORK_ENABLE_EVICT, 0, lsm_tree));
+
__wt_verbose(session, WT_VERB_LSM,
"LSM worker %s: running transaction, return",
chunk->uri);
@@ -446,11 +484,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
* Enable eviction on the live chunk so it doesn't block the cache.
* Future reads should direct to the on-disk chunk anyway.
*/
- btree = session->dhandle->handle;
- if (btree->evict_disabled_open) {
- btree->evict_disabled_open = false;
- __wt_evict_file_exclusive_off(session);
- }
+ WT_ERR(__lsm_set_chunk_evictable(session, chunk, false));
release_dhandle = false;
WT_ERR(__wt_session_release_dhandle(session));
@@ -481,6 +515,54 @@ err: if (flush_set)
}
/*
+ * __wt_lsm_work_enable_evict --
+ * LSM usually pins live chunks in memory - preferring to force them
+ * out via a checkpoint when they are no longer required. For applications
+ * that keep data pinned for a long time this can lead to the cache
+ * being pinned full. This work unit detects that case, and enables
+ * regular eviction in chunks that can be correctly evicted.
+ */
+int
+__wt_lsm_work_enable_evict(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+{
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
+ WT_LSM_WORKER_COOKIE cookie;
+ u_int i;
+
+ WT_CLEAR(cookie);
+
+ /* Only do this if there is cache pressure */
+ if (!__wt_eviction_needed(session, false, false, NULL))
+ return (0);
+
+ WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, false));
+
+ /*
+ * Turn on eviction in chunks that have had some chance to
+ * checkpoint if there is cache pressure.
+ */
+ for (i = 0; cookie.nchunks > 2 && i < cookie.nchunks - 2; i++) {
+ chunk = cookie.chunk_array[i];
+
+ /*
+ * Skip if the chunk isn't on disk yet, or if it's still in
+ * cache for a reason other than transaction visibility.
+ */
+ if (!F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) ||
+ chunk->evict_enabled != 0 ||
+ __wt_lsm_chunk_visible_all(session, chunk))
+ continue;
+
+ WT_ERR(__lsm_set_chunk_evictable(session, chunk, true));
+ }
+
+err: __lsm_unpin_chunks(session, &cookie);
+ __wt_free(session, cookie.chunk_array);
+ return (ret);
+}
+
+/*
* __lsm_bloom_create --
* Create a bloom filter for a chunk of the LSM tree that has been
* checkpointed but not yet been merged.
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_worker.c b/src/third_party/wiredtiger/src/lsm/lsm_worker.c
index 82f72bdf355..8588737f6c3 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_worker.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_worker.c
@@ -42,7 +42,9 @@ __wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args)
/*
* __lsm_worker_general_op --
- * Execute a single bloom, drop or flush work unit.
+ * Execute a single medium importance maintenance operation that should
+ * not be super long running. That includes bloom creation, drop or flush
+ * work unit types.
*/
static int
__lsm_worker_general_op(
@@ -55,11 +57,7 @@ __lsm_worker_general_op(
*completed = false;
- /*
- * Return if this thread cannot process a bloom, drop or flush.
- */
- if (!FLD_ISSET(cookie->type,
- WT_LSM_WORK_BLOOM | WT_LSM_WORK_DROP | WT_LSM_WORK_FLUSH))
+ if (!FLD_ISSET(cookie->type, WT_LSM_WORK_GENERAL_OPS))
return (WT_NOTFOUND);
if ((ret = __wt_lsm_manager_pop_entry(session,
@@ -88,6 +86,8 @@ __lsm_worker_general_op(
WT_ERR(__wt_lsm_free_chunks(session, entry->lsm_tree));
else if (entry->type == WT_LSM_WORK_BLOOM)
WT_ERR(__wt_lsm_work_bloom(session, entry->lsm_tree));
+ else if (entry->type == WT_LSM_WORK_ENABLE_EVICT)
+ WT_ERR(__wt_lsm_work_enable_evict(session, entry->lsm_tree));
*completed = true;
err: __wt_lsm_manager_free_work_unit(session, entry);
diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c
index d6acaed98fc..2c83167c28f 100644
--- a/src/third_party/wiredtiger/src/meta/meta_turtle.c
+++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c
@@ -141,6 +141,46 @@ err: WT_TRET(__wt_metadata_cursor_release(session, &cursor));
}
/*
+ * __wt_turtle_exists --
+ * Return if the turtle file exists on startup.
+ */
+int
+__wt_turtle_exists(WT_SESSION_IMPL *session, bool *existp)
+{
+ /*
+ * The last thing we do in database initialization is rename a turtle
+ * file into place, and there's never a database home after that point
+ * without a turtle file. On startup we check if the turtle file exists
+ * to decide if we're creating the database or re-opening an existing
+ * database.
+ * Unfortunately, we re-write the turtle file at checkpoint end,
+ * first creating the "set" file and then renaming it into place.
+ * Renames on Windows aren't guaranteed to be atomic, a power failure
+ * could leave us with only the set file. The turtle file is the file
+ * we regularly rename when WiredTiger is running, so if we're going to
+ * get caught, the turtle file is where it will happen. If we have a set
+ * file and no turtle file, rename the set file into place. We don't
+ * know what went wrong for sure, so this can theoretically make it
+ * worse, but there aren't alternatives other than human intervention.
+ */
+ WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE, existp));
+ if (*existp)
+ return (0);
+
+ WT_RET(__wt_fs_exist(session, WT_METADATA_TURTLE_SET, existp));
+ if (!*existp)
+ return (0);
+
+ WT_RET(__wt_fs_rename(session,
+ WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE, true));
+ WT_RET(__wt_msg(session,
+ "%s not found, %s renamed to %s",
+ WT_METADATA_TURTLE, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE));
+ *existp = true;
+ return (0);
+}
+
+/*
* __wt_turtle_init --
* Check the turtle file and create if necessary.
*/
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index eb3b0038525..2ec28e31201 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -47,10 +47,11 @@ typedef struct {
/* Track the page's min/maximum transactions. */
uint64_t max_txn;
- uint64_t min_txn_unstable;
WT_DECL_TIMESTAMP(max_timestamp)
- WT_DECL_TIMESTAMP(max_onpage_timestamp)
- WT_DECL_TIMESTAMP(min_saved_timestamp)
+
+ /* Lookaside boundary tracking. */
+ uint64_t unstable_txn;
+ WT_DECL_TIMESTAMP(unstable_timestamp)
u_int updates_seen; /* Count of updates seen. */
u_int updates_unstable; /* Count of updates not visible_all. */
@@ -422,14 +423,27 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
return (EBUSY);
}
+ /* Initialize the reconciliation structure for each new run. */
+ if ((ret = __rec_init(
+ session, ref, flags, salvage, &session->reconcile)) != 0) {
+ WT_PAGE_UNLOCK(session, page);
+ return (ret);
+ }
+ r = session->reconcile;
+
oldest_id = __wt_txn_oldest_id(session);
+
+ /*
+ * During eviction, save the transaction state that causes history to
+ * be pinned, regardless of whether reconciliation succeeds or fails.
+ * There is usually no point retrying eviction until this state
+ * changes.
+ */
if (LF_ISSET(WT_REC_EVICT)) {
mod->last_eviction_id = oldest_id;
#ifdef HAVE_TIMESTAMPS
- WT_WITH_TIMESTAMP_READLOCK(session,
- &S2C(session)->txn_global.rwlock,
- __wt_timestamp_set(&mod->last_eviction_timestamp,
- &S2C(session)->txn_global.pinned_timestamp));
+ __wt_txn_pinned_timestamp(
+ session, &mod->last_eviction_timestamp);
#endif
mod->last_evict_pass_gen = S2C(session)->cache->evict_pass_gen;
}
@@ -444,14 +458,6 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
mod->last_oldest_id = oldest_id;
#endif
- /* Initialize the reconciliation structure for each new run. */
- if ((ret = __rec_init(
- session, ref, flags, salvage, &session->reconcile)) != 0) {
- WT_PAGE_UNLOCK(session, page);
- return (ret);
- }
- r = session->reconcile;
-
/* Reconcile the page. */
switch (page->type) {
case WT_PAGE_COL_FIX:
@@ -495,6 +501,17 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
else
WT_TRET(__rec_write_wrapup_err(session, r, page));
+#ifdef HAVE_TIMESTAMPS
+ /*
+ * If reconciliation completes successfully, save the stable timestamp.
+ */
+ if (ret == 0 && S2C(session)->txn_global.has_stable_timestamp)
+ WT_WITH_TIMESTAMP_READLOCK(session,
+ &S2C(session)->txn_global.rwlock,
+ __wt_timestamp_set(&mod->last_stable_timestamp,
+ &S2C(session)->txn_global.stable_timestamp));
+#endif
+
/* Release the reconciliation lock. */
WT_PAGE_UNLOCK(session, page);
@@ -681,7 +698,7 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r)
} else {
/*
* Track the page's maximum transaction ID (used to decide if
- * we're likely to be able to evict this page in the future).
+ * we can evict a clean page and discard its history).
*/
mod->rec_max_txn = r->max_txn;
__wt_timestamp_set(&mod->rec_max_timestamp, &r->max_timestamp);
@@ -873,7 +890,6 @@ __rec_init(WT_SESSION_IMPL *session,
WT_PAGE *page;
WT_RECONCILE *r;
WT_TXN_GLOBAL *txn_global;
- bool las_skew_oldest;
btree = S2BT(session);
page = ref->page;
@@ -928,27 +944,24 @@ __rec_init(WT_SESSION_IMPL *session,
* We usually prefer to skew to newer versions, the logic being that by
* the time the next checkpoint runs, it is likely that all the updates
* we choose will be stable. However, if checkpointing with a
- * timestamp (indicated by a stable_timestamp being set), and the
- * timestamp hasn't changed since the last time this page was
- * reconciled, skew oldest instead. If a checkpoint is already running,
- * the oldest version is more likely to be what it needs.
+ * timestamp (indicated by a stable_timestamp being set), and there is
+ * a checkpoint already running, or this page was read with lookaside
+ * history, or the stable timestamp hasn't changed since last time this
+ * page was successfully, skew oldest instead.
*/
- if (__wt_btree_immediately_durable(session))
- las_skew_oldest = false;
- else {
- WT_ORDERED_READ(las_skew_oldest,
- txn_global->has_stable_timestamp);
- if (las_skew_oldest) {
- las_skew_oldest = (ref->page_las != NULL &&
- !__wt_txn_visible_all(session, WT_TXN_NONE,
- WT_TIMESTAMP_NULL(
- &ref->page_las->min_timestamp))) ||
- btree->checkpoint_gen !=
- __wt_gen(session, WT_GEN_CHECKPOINT);
- }
- }
- r->las_skew_newest = LF_ISSET(WT_REC_LOOKASIDE) &&
- LF_ISSET(WT_REC_VISIBLE_ALL) && !las_skew_oldest;
+ r->las_skew_newest =
+ LF_ISSET(WT_REC_LOOKASIDE) && LF_ISSET(WT_REC_VISIBLE_ALL);
+#ifdef HAVE_TIMESTAMPS
+ if (r->las_skew_newest &&
+ !__wt_btree_immediately_durable(session) &&
+ txn_global->has_stable_timestamp &&
+ ((btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT) &&
+ txn_global->stable_is_pinned) ||
+ FLD_ISSET(page->modify->restore_state, WT_PAGE_RS_LOOKASIDE) ||
+ __wt_timestamp_cmp(&page->modify->last_stable_timestamp,
+ &txn_global->stable_timestamp) == 0))
+ r->las_skew_newest = false;
+#endif
/*
* When operating on the lookaside table, we should never try
@@ -979,10 +992,21 @@ __rec_init(WT_SESSION_IMPL *session,
/* Track the page's min/maximum transaction */
r->max_txn = WT_TXN_NONE;
- r->min_txn_unstable = WT_TXN_ABORTED;
__wt_timestamp_set_zero(&r->max_timestamp);
- __wt_timestamp_set_zero(&r->max_onpage_timestamp);
- __wt_timestamp_set_inf(&r->min_saved_timestamp);
+
+ /*
+ * Track the first unstable transaction (when skewing newest this is
+ * the newest update, otherwise the newest update not on the page).
+ * This is the boundary between the on-page information and the history
+ * stored in the lookaside table.
+ */
+ if (r->las_skew_newest) {
+ r->unstable_txn = WT_TXN_NONE;
+ __wt_timestamp_set_zero(&r->unstable_timestamp);
+ } else {
+ r->unstable_txn = WT_TXN_ABORTED;
+ __wt_timestamp_set_inf(&r->unstable_timestamp);
+ }
/* Track if updates were used and/or uncommitted. */
r->updates_seen = r->updates_unstable = 0;
@@ -1264,7 +1288,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
WT_UPDATE *first_txn_upd, *first_upd, *upd;
wt_timestamp_t *timestampp;
size_t upd_memsize;
- uint64_t max_txn, min_txn_unstable, txnid;
+ uint64_t max_txn, txnid;
bool all_visible, skipped_birthmark, uncommitted;
#ifdef HAVE_TIMESTAMPS
@@ -1280,7 +1304,6 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
first_txn_upd = NULL;
upd_memsize = 0;
max_txn = WT_TXN_NONE;
- min_txn_unstable = WT_TXN_ABORTED;
skipped_birthmark = uncommitted = false;
/*
@@ -1380,19 +1403,12 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (upd->type == WT_UPDATE_BIRTHMARK)
skipped_birthmark = true;
- /*
- * Track minimum transaction ID for unstable updates.
- */
- if (txnid != WT_TXN_NONE &&
- WT_TXNID_LT(txnid, min_txn_unstable))
- min_txn_unstable = txnid;
-
continue;
}
/*
* Lookaside without stable timestamp was taken care of above
- * (set to the first uncommitted transaction. Lookaside with
+ * (set to the first uncommitted transaction). Lookaside with
* stable timestamp always takes the first stable update.
*/
if (*updp == NULL)
@@ -1434,23 +1450,11 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (WT_TXNID_LT(r->max_txn, max_txn))
r->max_txn = max_txn;
- /*
- * Track the oldest unstable transaction in the page. It is used to
- * decide whether to or not to read the history during a page read.
- */
- if (WT_TXNID_LT(min_txn_unstable, r->min_txn_unstable))
- r->min_txn_unstable = min_txn_unstable;
-
#ifdef HAVE_TIMESTAMPS
/* Update the maximum timestamp. */
if (first_ts_upd != NULL &&
__wt_timestamp_cmp(&r->max_timestamp, &first_ts_upd->timestamp) < 0)
__wt_timestamp_set(&r->max_timestamp, &first_ts_upd->timestamp);
-
- /* Update the maximum on-page timestamp. */
- if (upd != NULL &&
- __wt_timestamp_cmp(&upd->timestamp, &r->max_onpage_timestamp) > 0)
- __wt_timestamp_set(&r->max_onpage_timestamp, &upd->timestamp);
#endif
/*
@@ -1527,24 +1531,38 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (upd_savedp != NULL)
*upd_savedp = true;
+ /*
+ * Track the first off-page update when saving history in the lookaside
+ * table. When skewing newest, we want the first (non-aborted) update
+ * after the one stored on the page. Otherwise, we want the update
+ * before the on-page update.
+ */
+ if (F_ISSET(r, WT_REC_LOOKASIDE) && r->las_skew_newest) {
+ if (WT_TXNID_LT(r->unstable_txn, first_upd->txnid))
+ r->unstable_txn = first_upd->txnid;
#ifdef HAVE_TIMESTAMPS
- /* Track the oldest saved timestamp for lookaside. */
- if (F_ISSET(r, WT_REC_LOOKASIDE)) {
- /* If no updates had timestamps, we're done. */
- if (first_ts_upd == NULL)
- __wt_timestamp_set_zero(&r->min_saved_timestamp);
+ if (first_ts_upd != NULL &&
+ __wt_timestamp_cmp(&r->unstable_timestamp,
+ &first_ts_upd->timestamp) < 0)
+ __wt_timestamp_set(&r->unstable_timestamp,
+ &first_ts_upd->timestamp);
+#endif
+ } else if (F_ISSET(r, WT_REC_LOOKASIDE)) {
for (upd = first_upd; upd != *updp; upd = upd->next) {
- if (upd->txnid != WT_TXN_ABORTED &&
- __wt_timestamp_cmp(&upd->timestamp,
- &r->min_saved_timestamp) < 0)
- __wt_timestamp_set(&r->min_saved_timestamp,
- &upd->timestamp);
+ if (upd->txnid == WT_TXN_ABORTED)
+ continue;
- WT_ASSERT(session, upd->txnid == WT_TXN_ABORTED ||
- WT_TXNID_LE(upd->txnid, r->max_txn));
+ if (upd->txnid != WT_TXN_NONE &&
+ WT_TXNID_LT(upd->txnid, r->unstable_txn))
+ r->unstable_txn = upd->txnid;
+#ifdef HAVE_TIMESTAMPS
+ if (__wt_timestamp_cmp(&upd->timestamp,
+ &r->unstable_timestamp) < 0)
+ __wt_timestamp_set(&r->unstable_timestamp,
+ &upd->timestamp);
+#endif
}
}
-#endif
check_original_value:
/*
@@ -3429,16 +3447,15 @@ __rec_split_write_supd(WT_SESSION_IMPL *session,
done: if (F_ISSET(r, WT_REC_LOOKASIDE)) {
/* Track the oldest lookaside timestamp seen so far. */
- multi->page_las.las_skew_newest = r->las_skew_newest;
- multi->page_las.las_max_txn = r->max_txn;
- multi->page_las.las_min_txn = r->min_txn_unstable;
- WT_ASSERT(session, r->max_txn != WT_TXN_NONE);
- WT_ASSERT(session, r->min_txn_unstable != WT_TXN_NONE);
+ multi->page_las.skew_newest = r->las_skew_newest;
+ multi->page_las.max_txn = r->max_txn;
+ multi->page_las.unstable_txn = r->unstable_txn;
+ WT_ASSERT(session, r->unstable_txn != WT_TXN_NONE);
#ifdef HAVE_TIMESTAMPS
- __wt_timestamp_set(&multi->page_las.min_timestamp,
- &r->min_saved_timestamp);
- __wt_timestamp_set(&multi->page_las.onpage_timestamp,
- &r->max_onpage_timestamp);
+ __wt_timestamp_set(&multi->page_las.max_timestamp,
+ &r->max_timestamp);
+ __wt_timestamp_set(&multi->page_las.unstable_timestamp,
+ &r->unstable_timestamp);
#endif
}
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index 0becbe0b536..db7a9c86767 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -259,7 +259,7 @@ __session_close_cursors(WT_SESSION_IMPL *session, WT_CURSOR_LIST *cursors)
*/
WT_TRET_NOTFOUND_OK(cursor->reopen(cursor, false));
else if (session->event_handler->handle_close != NULL &&
- !WT_STREQ(cursor->internal_uri, WT_LAS_URI))
+ strcmp(cursor->internal_uri, WT_LAS_URI) != 0)
/*
* Notify the user that we are closing the cursor
* handle via the registered close callback.
@@ -609,7 +609,7 @@ __session_open_cursor(WT_SESSION *wt_session,
SESSION_API_CALL(session, open_cursor, config, cfg);
statjoin = (to_dup != NULL && uri != NULL &&
- WT_STREQ(uri, "statistics:join"));
+ strcmp(uri, "statistics:join") == 0);
if (!statjoin) {
if ((to_dup == NULL && uri == NULL) ||
(to_dup != NULL && uri != NULL))
@@ -1490,7 +1490,7 @@ __session_truncate(WT_SESSION *wt_session,
* Verify the user only gave the URI prefix and not
* a specific target name after that.
*/
- if (!WT_STREQ(uri, "log:"))
+ if (strcmp(uri, "log:") != 0)
WT_ERR_MSG(session, EINVAL,
"the truncate method should not specify any"
"target after the log: URI prefix");
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 749564c2464..0d39a5b682e 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -111,6 +111,7 @@ static const char * const __stats_dsrc_desc[] = {
"compression: raw compression call succeeded",
"cursor: bulk-loaded cursor-insert calls",
"cursor: create calls",
+ "cursor: cursor operation restarted",
"cursor: cursor-insert key and value bytes inserted",
"cursor: cursor-remove key bytes removed",
"cursor: cursor-update value bytes updated",
@@ -123,7 +124,6 @@ static const char * const __stats_dsrc_desc[] = {
"cursor: remove calls",
"cursor: reserve calls",
"cursor: reset calls",
- "cursor: restarted searches",
"cursor: search calls",
"cursor: search near calls",
"cursor: truncate calls",
@@ -296,6 +296,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->compress_raw_ok = 0;
stats->cursor_insert_bulk = 0;
stats->cursor_create = 0;
+ stats->cursor_restart = 0;
stats->cursor_insert_bytes = 0;
stats->cursor_remove_bytes = 0;
stats->cursor_update_bytes = 0;
@@ -308,7 +309,6 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cursor_remove = 0;
stats->cursor_reserve = 0;
stats->cursor_reset = 0;
- stats->cursor_restart = 0;
stats->cursor_search = 0;
stats->cursor_search_near = 0;
stats->cursor_truncate = 0;
@@ -482,6 +482,7 @@ __wt_stat_dsrc_aggregate_single(
to->compress_raw_ok += from->compress_raw_ok;
to->cursor_insert_bulk += from->cursor_insert_bulk;
to->cursor_create += from->cursor_create;
+ to->cursor_restart += from->cursor_restart;
to->cursor_insert_bytes += from->cursor_insert_bytes;
to->cursor_remove_bytes += from->cursor_remove_bytes;
to->cursor_update_bytes += from->cursor_update_bytes;
@@ -494,7 +495,6 @@ __wt_stat_dsrc_aggregate_single(
to->cursor_remove += from->cursor_remove;
to->cursor_reserve += from->cursor_reserve;
to->cursor_reset += from->cursor_reset;
- to->cursor_restart += from->cursor_restart;
to->cursor_search += from->cursor_search;
to->cursor_search_near += from->cursor_search_near;
to->cursor_truncate += from->cursor_truncate;
@@ -701,6 +701,7 @@ __wt_stat_dsrc_aggregate(
to->compress_raw_ok += WT_STAT_READ(from, compress_raw_ok);
to->cursor_insert_bulk += WT_STAT_READ(from, cursor_insert_bulk);
to->cursor_create += WT_STAT_READ(from, cursor_create);
+ to->cursor_restart += WT_STAT_READ(from, cursor_restart);
to->cursor_insert_bytes += WT_STAT_READ(from, cursor_insert_bytes);
to->cursor_remove_bytes += WT_STAT_READ(from, cursor_remove_bytes);
to->cursor_update_bytes += WT_STAT_READ(from, cursor_update_bytes);
@@ -713,7 +714,6 @@ __wt_stat_dsrc_aggregate(
to->cursor_remove += WT_STAT_READ(from, cursor_remove);
to->cursor_reserve += WT_STAT_READ(from, cursor_reserve);
to->cursor_reset += WT_STAT_READ(from, cursor_reset);
- to->cursor_restart += WT_STAT_READ(from, cursor_restart);
to->cursor_search += WT_STAT_READ(from, cursor_search);
to->cursor_search_near += WT_STAT_READ(from, cursor_search_near);
to->cursor_truncate += WT_STAT_READ(from, cursor_truncate);
@@ -840,6 +840,7 @@ static const char * const __stats_connection_desc[] = {
"cache: maximum page size at eviction",
"cache: modified pages evicted",
"cache: modified pages evicted by application threads",
+ "cache: operations timed out waiting for space in cache",
"cache: overflow pages read into cache",
"cache: page split during eviction deepened the tree",
"cache: page written requiring lookaside records",
@@ -889,11 +890,11 @@ static const char * const __stats_connection_desc[] = {
"cursor: cursor insert calls",
"cursor: cursor modify calls",
"cursor: cursor next calls",
+ "cursor: cursor operation restarted",
"cursor: cursor prev calls",
"cursor: cursor remove calls",
"cursor: cursor reserve calls",
"cursor: cursor reset calls",
- "cursor: cursor restarted searches",
"cursor: cursor search calls",
"cursor: cursor search near calls",
"cursor: cursor sweep buckets",
@@ -1097,6 +1098,7 @@ static const char * const __stats_connection_desc[] = {
"transaction: transaction range of IDs currently pinned by a checkpoint",
"transaction: transaction range of IDs currently pinned by named snapshots",
"transaction: transaction range of timestamps currently pinned",
+ "transaction: transaction range of timestamps pinned by a checkpoint",
"transaction: transaction range of timestamps pinned by the oldest timestamp",
"transaction: transaction sync calls",
"transaction: transactions committed",
@@ -1237,6 +1239,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing cache_eviction_maximum_page_size */
stats->cache_eviction_dirty = 0;
stats->cache_eviction_app_dirty = 0;
+ stats->cache_timed_out_ops = 0;
stats->cache_read_overflow = 0;
stats->cache_eviction_deepen = 0;
stats->cache_write_lookaside = 0;
@@ -1286,11 +1289,11 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cursor_insert = 0;
stats->cursor_modify = 0;
stats->cursor_next = 0;
+ stats->cursor_restart = 0;
stats->cursor_prev = 0;
stats->cursor_remove = 0;
stats->cursor_reserve = 0;
stats->cursor_reset = 0;
- stats->cursor_restart = 0;
stats->cursor_search = 0;
stats->cursor_search_near = 0;
stats->cursor_sweep_buckets = 0;
@@ -1494,6 +1497,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing txn_pinned_checkpoint_range */
/* not clearing txn_pinned_snapshot_range */
/* not clearing txn_pinned_timestamp */
+ /* not clearing txn_pinned_timestamp_checkpoint */
/* not clearing txn_pinned_timestamp_oldest */
stats->txn_sync = 0;
stats->txn_commit = 0;
@@ -1662,6 +1666,7 @@ __wt_stat_connection_aggregate(
to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty);
to->cache_eviction_app_dirty +=
WT_STAT_READ(from, cache_eviction_app_dirty);
+ to->cache_timed_out_ops += WT_STAT_READ(from, cache_timed_out_ops);
to->cache_read_overflow += WT_STAT_READ(from, cache_read_overflow);
to->cache_eviction_deepen +=
WT_STAT_READ(from, cache_eviction_deepen);
@@ -1726,11 +1731,11 @@ __wt_stat_connection_aggregate(
to->cursor_insert += WT_STAT_READ(from, cursor_insert);
to->cursor_modify += WT_STAT_READ(from, cursor_modify);
to->cursor_next += WT_STAT_READ(from, cursor_next);
+ to->cursor_restart += WT_STAT_READ(from, cursor_restart);
to->cursor_prev += WT_STAT_READ(from, cursor_prev);
to->cursor_remove += WT_STAT_READ(from, cursor_remove);
to->cursor_reserve += WT_STAT_READ(from, cursor_reserve);
to->cursor_reset += WT_STAT_READ(from, cursor_reset);
- to->cursor_restart += WT_STAT_READ(from, cursor_restart);
to->cursor_search += WT_STAT_READ(from, cursor_search);
to->cursor_search_near += WT_STAT_READ(from, cursor_search_near);
to->cursor_sweep_buckets += WT_STAT_READ(from, cursor_sweep_buckets);
@@ -2047,6 +2052,8 @@ __wt_stat_connection_aggregate(
to->txn_pinned_snapshot_range +=
WT_STAT_READ(from, txn_pinned_snapshot_range);
to->txn_pinned_timestamp += WT_STAT_READ(from, txn_pinned_timestamp);
+ to->txn_pinned_timestamp_checkpoint +=
+ WT_STAT_READ(from, txn_pinned_timestamp_checkpoint);
to->txn_pinned_timestamp_oldest +=
WT_STAT_READ(from, txn_pinned_timestamp_oldest);
to->txn_sync += WT_STAT_READ(from, txn_sync);
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index cf233ab9a5d..4cb780c0042 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -119,9 +119,11 @@ void
__wt_txn_release_snapshot(WT_SESSION_IMPL *session)
{
WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *txn_state;
txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
txn_state = WT_SESSION_TXN_STATE(session);
WT_ASSERT(session,
@@ -131,6 +133,14 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session)
txn_state->metadata_pinned = txn_state->pinned_id = WT_TXN_NONE;
F_CLR(txn, WT_TXN_HAS_SNAPSHOT);
+
+ /* Clear a checkpoint's pinned ID. */
+ if (WT_SESSION_IS_CHECKPOINT(session)) {
+ txn_global->checkpoint_state.pinned_id = WT_TXN_NONE;
+ __wt_timestamp_set_zero(&txn_global->checkpoint_timestamp);
+ }
+
+ __wt_txn_clear_read_timestamp(session);
}
/*
@@ -528,8 +538,7 @@ __wt_txn_release(WT_SESSION_IMPL *session)
if (WT_SESSION_IS_CHECKPOINT(session)) {
WT_ASSERT(session,
WT_SESSION_TXN_STATE(session)->id == WT_TXN_NONE);
- txn->id = txn_global->checkpoint_state.id =
- txn_global->checkpoint_state.pinned_id = WT_TXN_NONE;
+ txn->id = txn_global->checkpoint_state.id = WT_TXN_NONE;
/*
* Be extra careful to cleanup everything for checkpoints: once
@@ -548,7 +557,6 @@ __wt_txn_release(WT_SESSION_IMPL *session)
}
__wt_txn_clear_commit_timestamp(session);
- __wt_txn_clear_read_timestamp(session);
/* Free the scratch buffer allocated for logging. */
__wt_logrec_free(session, &txn->logrec);
@@ -1283,12 +1291,24 @@ __wt_txn_stats_update(WT_SESSION_IMPL *session)
txn_global->current - txn_global->oldest_id);
#if WT_TIMESTAMP_SIZE == 8
+ {
+ WT_DECL_TIMESTAMP(checkpoint_timestamp)
+ WT_DECL_TIMESTAMP(commit_timestamp)
+ WT_DECL_TIMESTAMP(pinned_timestamp)
+
+ checkpoint_timestamp = txn_global->checkpoint_timestamp;
+ commit_timestamp = txn_global->commit_timestamp;
+ pinned_timestamp = txn_global->pinned_timestamp;
+ if (checkpoint_timestamp.val != 0 &&
+ checkpoint_timestamp.val < pinned_timestamp.val)
+ pinned_timestamp = checkpoint_timestamp;
WT_STAT_SET(session, stats, txn_pinned_timestamp,
- txn_global->commit_timestamp.val -
- txn_global->pinned_timestamp.val);
+ commit_timestamp.val - pinned_timestamp.val);
+ WT_STAT_SET(session, stats, txn_pinned_timestamp_checkpoint,
+ commit_timestamp.val - checkpoint_timestamp.val);
WT_STAT_SET(session, stats, txn_pinned_timestamp_oldest,
- txn_global->commit_timestamp.val -
- txn_global->oldest_timestamp.val);
+ commit_timestamp.val - txn_global->oldest_timestamp.val);
+ }
#endif
WT_STAT_SET(session, stats, txn_pinned_snapshot_range,
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 10af61caeaf..ad8351923a0 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -372,12 +372,11 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
{
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
- double current_dirty, delta, scrub_min;
- uint64_t bytes_written_last, bytes_written_start, bytes_written_total;
+ double current_dirty, prev_dirty;
+ uint64_t bytes_written_start, bytes_written_total;
uint64_t cache_size, max_write;
- uint64_t current_us, stepdown_us, total_ms, work_us;
- uint64_t time_last, time_start, time_stop;
- bool progress;
+ uint64_t time_start, time_stop;
+ uint64_t total_ms;
conn = S2C(session);
cache = conn->cache;
@@ -388,61 +387,41 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
* scrubbing cannot help).
*/
if (F_ISSET(conn, WT_CONN_CLOSING_TIMESTAMP) ||
- cache->eviction_checkpoint_target < DBL_EPSILON ||
- cache->eviction_checkpoint_target >= cache->eviction_dirty_trigger)
+ cache->eviction_checkpoint_target < DBL_EPSILON)
return;
- time_last = time_start = __wt_clock(session);
- bytes_written_last = 0;
+ time_start = __wt_clock(session);
bytes_written_start = cache->bytes_written;
- cache_size = conn->cache_size;
+
/*
* If the cache size is zero or very small, we're done. The cache
* size can briefly become zero if we're transitioning to a shared
* cache via reconfigure. This avoids potential divide by zero.
*/
- if (cache_size < 10 * WT_MEGABYTE)
+ if ((cache_size = conn->cache_size) < 10 * WT_MEGABYTE)
return;
- /*
- * Skip scrubbing if it won't perform at-least some minimum amount of
- * work. Scrubbing is supposed to bring down the dirty data to eviction
- * checkpoint target before the actual checkpoint starts. Do not perform
- * scrubbing if the dirty data to scrub is less than a pre-configured
- * size. This size is to an extent based on the configured cache size
- * without being too large or too small for large cache sizes. For the
- * values chosen, for instance, 100 GB cache will require at-least
- * 200 MB of dirty data above eviction checkpoint target, which should
- * equate to a scrub phase a few seconds long. That said, the value of
- * 0.2% and 500 MB are still somewhat arbitrary.
- */
- scrub_min = WT_MIN((0.2 * conn->cache_size) / 100, 500 * WT_MEGABYTE);
- if (__wt_cache_dirty_leaf_inuse(cache) <
- ((cache->eviction_checkpoint_target * conn->cache_size) / 100) +
- scrub_min)
+ current_dirty =
+ (100.0 * __wt_cache_dirty_leaf_inuse(cache)) / cache_size;
+ if (current_dirty <= cache->eviction_checkpoint_target)
return;
- stepdown_us = 10000;
- work_us = 0;
- progress = false;
-
- /* Step down the scrub target (as a percentage) in units of 10MB. */
- delta = WT_MIN(1.0, (100 * 10.0 * WT_MEGABYTE) / cache_size);
-
- /*
- * Start with the scrub target equal to the expected maximum percentage
- * of dirty data in cache.
- */
- cache->eviction_scrub_limit = cache->eviction_dirty_trigger;
-
/* Stop if we write as much dirty data as is currently in cache. */
max_write = __wt_cache_dirty_leaf_inuse(cache);
- /* Step down the dirty target to the eviction trigger */
+ /* Set the dirty trigger to the target value. */
+ cache->eviction_scrub_target = cache->eviction_checkpoint_target;
+ WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0);
+
+ /* Wait while the dirty level is going down. */
for (;;) {
+ __wt_sleep(0, 100 * WT_THOUSAND);
+
+ prev_dirty = current_dirty;
current_dirty =
(100.0 * __wt_cache_dirty_leaf_inuse(cache)) / cache_size;
- if (current_dirty <= cache->eviction_checkpoint_target)
+ if (current_dirty <= cache->eviction_checkpoint_target ||
+ current_dirty >= prev_dirty)
break;
/*
@@ -452,63 +431,17 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
if (F_ISSET(cache, WT_CACHE_EVICT_LOOKASIDE))
break;
- __wt_sleep(0, stepdown_us / 10);
- time_stop = __wt_clock(session);
- current_us = WT_CLOCKDIFF_US(time_stop, time_last);
- bytes_written_total =
- cache->bytes_written - bytes_written_start;
-
- if (current_dirty > cache->eviction_scrub_limit) {
- /*
- * We haven't reached the current target.
- *
- * Don't wait indefinitely: there might be dirty pages
- * that can't be evicted. If we can't meet the target,
- * give up and start the checkpoint for real.
- */
- if (current_us > WT_MAX(WT_MILLION, 10 * stepdown_us) ||
- bytes_written_total > max_write)
- break;
- continue;
- }
-
/*
- * Estimate how long the next step down of dirty data should
- * take.
- *
- * The calculation here assumes that the system is writing from
- * cache as fast as it can, and determines the write throughput
- * based on the change in the bytes written from cache since
- * the start of the call. We use that to estimate how long it
- * will take to step the dirty target down by delta.
+ * We haven't reached the current target.
*
- * Take care to avoid dividing by zero.
- */
- if (bytes_written_total - bytes_written_last > WT_MEGABYTE &&
- work_us > 0) {
- stepdown_us = (uint64_t)((delta * cache_size / 100) /
- ((double)bytes_written_total / work_us));
- stepdown_us = WT_MAX(1, stepdown_us);
- if (!progress)
- stepdown_us = WT_MIN(stepdown_us, 200000);
- progress = true;
-
- bytes_written_last = bytes_written_total;
- }
-
- work_us += current_us;
-
- /*
- * Smooth out step down: try to limit the impact on
- * performance to 10% by waiting once we reach the last
- * level.
+ * Don't wait indefinitely: there might be dirty pages
+ * that can't be evicted. If we can't meet the target,
+ * give up and start the checkpoint for real.
*/
- __wt_sleep(0, 10 * stepdown_us);
- cache->eviction_scrub_limit =
- WT_MAX(cache->eviction_dirty_target, current_dirty - delta);
- WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target,
- cache->eviction_scrub_limit);
- time_last = __wt_clock(session);
+ bytes_written_total =
+ cache->bytes_written - bytes_written_start;
+ if (bytes_written_total > max_write)
+ break;
}
time_stop = __wt_clock(session);
@@ -681,8 +614,7 @@ __checkpoint_prepare(
*/
__wt_writelock(session, &txn_global->rwlock);
txn_global->checkpoint_state = *txn_state;
- txn_global->checkpoint_txn = txn;
- txn_global->checkpoint_state.pinned_id = WT_MIN(txn->id, txn->snap_min);
+ txn_global->checkpoint_state.pinned_id = txn->snap_min;
/*
* Sanity check that the oldest ID hasn't moved on before we have
@@ -724,6 +656,8 @@ __checkpoint_prepare(
if (txn_global->has_stable_timestamp) {
__wt_timestamp_set(&txn->read_timestamp,
&txn_global->stable_timestamp);
+ __wt_timestamp_set(&txn_global->checkpoint_timestamp,
+ &txn->read_timestamp);
F_SET(txn, WT_TXN_HAS_TS_READ);
if (!F_ISSET(conn, WT_CONN_RECOVERING))
__wt_timestamp_set(
@@ -802,14 +736,10 @@ __txn_checkpoint_can_skip(WT_SESSION_IMPL *session,
*/
WT_RET(__wt_config_gets(session, cfg, "target", &cval));
__wt_config_subinit(session, &targetconf, &cval);
- full = __wt_config_next(&targetconf, &k, &v) != 0;
- if (fullp != NULL)
- *fullp = full;
+ *fullp = full = __wt_config_next(&targetconf, &k, &v) != 0;
WT_RET(__wt_config_gets(session, cfg, "use_timestamp", &cval));
- use_timestamp = cval.val != 0;
- if (use_timestampp != NULL)
- *use_timestampp = use_timestamp;
+ *use_timestampp = use_timestamp = cval.val != 0;
/* Never skip non-full checkpoints */
if (!full)
@@ -979,7 +909,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* Unblock updates -- we can figure out that any updates to clean pages
* after this point are too new to be written in the checkpoint.
*/
- cache->eviction_scrub_limit = 0.0;
+ cache->eviction_scrub_target = 0.0;
WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0);
/* Tell logging that we have started a database checkpoint. */
@@ -1129,7 +1059,7 @@ err: /*
if (tracking)
WT_TRET(__wt_meta_track_off(session, false, failed));
- cache->eviction_scrub_limit = 0.0;
+ cache->eviction_scrub_target = 0.0;
WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0);
if (F_ISSET(txn, WT_TXN_RUNNING)) {
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 35a89eeb072..e01db53fda9 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -275,8 +275,9 @@ __txn_rollback_to_stable_btree_walk(
WT_READ_CACHE | WT_READ_LOOKASIDE | WT_READ_NO_EVICT)) == 0 &&
ref != NULL) {
if (ref->page_las != NULL &&
+ ref->page_las->skew_newest &&
__wt_timestamp_cmp(rollback_timestamp,
- &ref->page_las->onpage_timestamp) < 0)
+ &ref->page_las->unstable_timestamp) < 0)
ref->page_las->invalid = true;
/* Review deleted page saved to the ref */
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index a10ff740df6..32233966479 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -193,6 +193,44 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name,
}
/*
+ * __txn_get_pinned_timestamp --
+ * Calculate the current pinned timestamp.
+ */
+static int
+__txn_get_pinned_timestamp(
+ WT_SESSION_IMPL *session, wt_timestamp_t *tsp, bool include_checkpoint)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+
+ conn = S2C(session);
+ txn_global = &conn->txn_global;
+
+ if (!txn_global->has_oldest_timestamp)
+ return (WT_NOTFOUND);
+ __wt_readlock(session, &txn_global->rwlock);
+ __wt_timestamp_set(tsp, &txn_global->oldest_timestamp);
+
+ /* Check for a running checkpoint */
+ if (include_checkpoint &&
+ !__wt_timestamp_iszero(&txn_global->checkpoint_timestamp) &&
+ __wt_timestamp_cmp(&txn_global->checkpoint_timestamp, tsp) < 0)
+ __wt_timestamp_set(tsp, &txn_global->checkpoint_timestamp);
+ __wt_readunlock(session, &txn_global->rwlock);
+
+ /* Look for the oldest ordinary reader. */
+ __wt_readlock(session, &txn_global->read_timestamp_rwlock);
+ txn = TAILQ_FIRST(&txn_global->read_timestamph);
+ if (txn != NULL &&
+ __wt_timestamp_cmp(&txn->read_timestamp, tsp) < 0)
+ __wt_timestamp_set(tsp, &txn->read_timestamp);
+ __wt_readunlock(session, &txn_global->read_timestamp_rwlock);
+
+ return (0);
+}
+
+/*
* __txn_global_query_timestamp --
* Query a timestamp.
*/
@@ -248,26 +286,7 @@ __txn_global_query_timestamp(
WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
__wt_timestamp_set(&ts, &txn_global->oldest_timestamp));
} else if (WT_STRING_MATCH("pinned", cval.str, cval.len)) {
- if (!txn_global->has_oldest_timestamp)
- return (WT_NOTFOUND);
- __wt_readlock(session, &txn_global->rwlock);
- __wt_timestamp_set(&ts, &txn_global->oldest_timestamp);
-
- /* Check for a running checkpoint */
- txn = txn_global->checkpoint_txn;
- if (txn_global->checkpoint_state.pinned_id != WT_TXN_NONE &&
- !__wt_timestamp_iszero(&txn->read_timestamp) &&
- __wt_timestamp_cmp(&txn->read_timestamp, &ts) < 0)
- __wt_timestamp_set(&ts, &txn->read_timestamp);
- __wt_readunlock(session, &txn_global->rwlock);
-
- /* Look for the oldest ordinary reader. */
- __wt_readlock(session, &txn_global->read_timestamp_rwlock);
- txn = TAILQ_FIRST(&txn_global->read_timestamph);
- if (txn != NULL &&
- __wt_timestamp_cmp(&txn->read_timestamp, &ts) < 0)
- __wt_timestamp_set(&ts, &txn->read_timestamp);
- __wt_readunlock(session, &txn_global->read_timestamp_rwlock);
+ WT_RET(__txn_get_pinned_timestamp(session, &ts, true));
} else if (WT_STRING_MATCH("recovery", cval.str, cval.len))
/* Read-only value forever. No lock needed. */
__wt_timestamp_set(&ts, &txn_global->recovery_timestamp);
@@ -320,8 +339,6 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force)
WT_TXN_GLOBAL *txn_global;
wt_timestamp_t active_timestamp, last_pinned_timestamp;
wt_timestamp_t oldest_timestamp, pinned_timestamp;
- const char *query_cfg[] = { WT_CONFIG_BASE(session,
- WT_CONNECTION_query_timestamp), "get=pinned", NULL };
txn_global = &S2C(session)->txn_global;
@@ -334,13 +351,13 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force)
&oldest_timestamp, &txn_global->oldest_timestamp));
/* Scan to find the global pinned timestamp. */
- if ((ret = __txn_global_query_timestamp(
- session, &active_timestamp, query_cfg)) != 0)
+ if ((ret = __txn_get_pinned_timestamp(
+ session, &active_timestamp, false)) != 0)
return (ret == WT_NOTFOUND ? 0 : ret);
- if (__wt_timestamp_cmp(&oldest_timestamp, &active_timestamp) < 0) {
+ if (__wt_timestamp_cmp(&oldest_timestamp, &active_timestamp) < 0)
__wt_timestamp_set(&pinned_timestamp, &oldest_timestamp);
- } else
+ else
__wt_timestamp_set(&pinned_timestamp, &active_timestamp);
if (txn_global->has_pinned_timestamp && !force) {
@@ -362,6 +379,9 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force)
txn_global->oldest_is_pinned = __wt_timestamp_cmp(
&txn_global->pinned_timestamp,
&txn_global->oldest_timestamp) == 0;
+ txn_global->stable_is_pinned = __wt_timestamp_cmp(
+ &txn_global->pinned_timestamp,
+ &txn_global->stable_timestamp) == 0;
__wt_verbose_timestamp(session,
&pinned_timestamp, "Updated pinned timestamp");
}
diff --git a/src/third_party/wiredtiger/src/utilities/util_load.c b/src/third_party/wiredtiger/src/utilities/util_load.c
index 2b210419c78..7e0f816436e 100644
--- a/src/third_party/wiredtiger/src/utilities/util_load.c
+++ b/src/third_party/wiredtiger/src/utilities/util_load.c
@@ -138,7 +138,7 @@ load_dump(WT_SESSION *session)
* Check the append flag (it only applies to objects where the primary
* key is a record number).
*/
- if (append && strcmp(cursor->key_format, "r") != 0) {
+ if (append && !WT_STREQ(cursor->key_format, "r")) {
fprintf(stderr,
"%s: %s: -a option illegal unless the primary key is a "
"record number\n",
diff --git a/src/third_party/wiredtiger/src/utilities/util_load_json.c b/src/third_party/wiredtiger/src/utilities/util_load_json.c
index 91358fd29f0..47c56a84064 100644
--- a/src/third_party/wiredtiger/src/utilities/util_load_json.c
+++ b/src/third_party/wiredtiger/src/utilities/util_load_json.c
@@ -254,7 +254,7 @@ json_data(WT_SESSION *session,
goto err;
}
keyformat = cursor->key_format;
- isrec = strcmp(keyformat, "r") == 0;
+ isrec = WT_STREQ(keyformat, "r");
for (nkeys = 0; *keyformat; keyformat++)
if (!__wt_isdigit((u_char)*keyformat))
nkeys++;
diff --git a/src/third_party/wiredtiger/src/utilities/util_loadtext.c b/src/third_party/wiredtiger/src/utilities/util_loadtext.c
index 1519a0e4fa5..3e57e3ea0e4 100644
--- a/src/third_party/wiredtiger/src/utilities/util_loadtext.c
+++ b/src/third_party/wiredtiger/src/utilities/util_loadtext.c
@@ -74,13 +74,13 @@ text(WT_SESSION *session, const char *uri)
* Row-store tables have key/value pairs, column-store tables only have
* values.
*/
- if (strcmp(cursor->value_format, "S") != 0 ||
- (strcmp(cursor->key_format, "S") != 0 &&
- strcmp(cursor->key_format, "r") != 0))
+ if (!WT_STREQ(cursor->value_format, "S") ||
+ (!WT_STREQ(cursor->key_format, "S") &&
+ !WT_STREQ(cursor->key_format, "r")))
return (util_err(session, EINVAL,
"the loadtext command can only load objects configured "
"for record number or string keys, and string values"));
- readkey = strcmp(cursor->key_format, "r") != 0;
+ readkey = !WT_STREQ(cursor->key_format, "r");
/* Insert the records */
ret = insert(cursor, uri, readkey);
diff --git a/src/third_party/wiredtiger/src/utilities/util_read.c b/src/third_party/wiredtiger/src/utilities/util_read.c
index ab2b2a79968..26f3cb5394f 100644
--- a/src/third_party/wiredtiger/src/utilities/util_read.c
+++ b/src/third_party/wiredtiger/src/utilities/util_read.c
@@ -51,16 +51,16 @@ util_read(WT_SESSION *session, int argc, char *argv[])
* A simple search only makes sense if the key format is a string or a
* record number, and the value format is a single string.
*/
- if (strcmp(cursor->key_format, "r") != 0 &&
- strcmp(cursor->key_format, "S") != 0) {
+ if (!WT_STREQ(cursor->key_format, "r") &&
+ !WT_STREQ(cursor->key_format, "S")) {
fprintf(stderr,
"%s: read command only possible when the key format is "
"a record number or string\n",
progname);
return (1);
}
- rkey = strcmp(cursor->key_format, "r") == 0;
- if (strcmp(cursor->value_format, "S") != 0) {
+ rkey = WT_STREQ(cursor->key_format, "r");
+ if (!WT_STREQ(cursor->value_format, "S")) {
fprintf(stderr,
"%s: read command only possible when the value format is "
"a string\n",
diff --git a/src/third_party/wiredtiger/src/utilities/util_write.c b/src/third_party/wiredtiger/src/utilities/util_write.c
index da958f86c2d..5d460367adc 100644
--- a/src/third_party/wiredtiger/src/utilities/util_write.c
+++ b/src/third_party/wiredtiger/src/utilities/util_write.c
@@ -71,16 +71,16 @@ util_write(WT_SESSION *session, int argc, char *argv[])
* A simple search only makes sense if the key format is a string or a
* record number, and the value format is a single string.
*/
- if (strcmp(cursor->key_format, "r") != 0 &&
- strcmp(cursor->key_format, "S") != 0) {
+ if (!WT_STREQ(cursor->key_format, "r") &&
+ !WT_STREQ(cursor->key_format, "S")) {
fprintf(stderr,
"%s: write command only possible when the key format is "
"a record number or string\n",
progname);
return (1);
}
- rkey = strcmp(cursor->key_format, "r") == 0;
- if (strcmp(cursor->value_format, "S") != 0) {
+ rkey = WT_STREQ(cursor->key_format, "r");
+ if (!WT_STREQ(cursor->value_format, "S")) {
fprintf(stderr,
"%s: write command only possible when the value format is "
"a string\n",
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index 51dc906465a..c398c1a96b2 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -331,41 +331,49 @@ static CONFIG c[] = {
C_IGNORE, 0, 0, UINT_MAX, &g.c_timer, NULL },
{ "timing_stress_checkpoint",
- "configure slow checkpoints", /* 2% */
+ "stress checkpoints", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_checkpoint, NULL },
{ "timing_stress_lookaside_sweep",
- "configure slow lookaside sweep", /* 2% */
+ "stress lookaside sweep", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_lookaside_sweep, NULL },
{ "timing_stress_split_1",
- "configure slow splits (#1)", /* 2% */
+ "stress splits (#1)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_1, NULL },
{ "timing_stress_split_2",
- "configure slow splits (#2)", /* 2% */
+ "stress splits (#2)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_2, NULL },
{ "timing_stress_split_3",
- "configure slow splits (#3)", /* 2% */
+ "stress splits (#3)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_3, NULL },
{ "timing_stress_split_4",
- "configure slow splits (#4)", /* 2% */
+ "stress splits (#4)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_4, NULL },
{ "timing_stress_split_5",
- "configure slow splits (#5)", /* 2% */
+ "stress splits (#5)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_5, NULL },
{ "timing_stress_split_6",
- "configure slow splits (#6)", /* 2% */
+ "stress splits (#6)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_6, NULL },
{ "timing_stress_split_7",
- "configure slow splits (#7)", /* 2% */
+ "stress splits (#7)", /* 2% */
C_BOOL, 2, 0, 0, &g.c_timing_stress_split_7, NULL },
+ { "timing_stress_split_8",
+ "stress splits (#8)", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_8, NULL },
+
+ { "timing_stress_split_9",
+ "stress splits (#9)", /* 2% */
+ C_BOOL, 2, 0, 0, &g.c_timing_stress_split_9, NULL },
+
{ "transaction_timestamps", /* 10% */
"enable transaction timestamp support",
C_BOOL, 10, 0, 0, &g.c_txn_timestamps, NULL },
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index 0eca6657dd9..1406d2b3fb5 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -221,6 +221,8 @@ typedef struct {
uint32_t c_timing_stress_split_5;
uint32_t c_timing_stress_split_6;
uint32_t c_timing_stress_split_7;
+ uint32_t c_timing_stress_split_8;
+ uint32_t c_timing_stress_split_9;
uint32_t c_truncate;
uint32_t c_txn_freq;
uint32_t c_txn_timestamps;
diff --git a/src/third_party/wiredtiger/test/format/lrt.c b/src/third_party/wiredtiger/test/format/lrt.c
index 9d99933ef64..31c5de93870 100644
--- a/src/third_party/wiredtiger/test/format/lrt.c
+++ b/src/third_party/wiredtiger/test/format/lrt.c
@@ -110,8 +110,15 @@ lrt(void *arg)
*/
testutil_check(session->snapshot(session, "name=test"));
__wt_sleep(1, 0);
- testutil_check(session->begin_transaction(
- session, "snapshot=test"));
+ /*
+ * Keep trying to start a new transaction if it's
+ * timing out - we know there aren't any resources
+ * pinned so it should succeed eventually.
+ */
+ while ((ret = session->begin_transaction(
+ session, "snapshot=test")) == WT_CACHE_FULL)
+ ;
+ testutil_check(ret);
testutil_check(session->snapshot(
session, "drop=(all)"));
testutil_check(session->commit_transaction(
@@ -123,8 +130,10 @@ lrt(void *arg)
* positioned. As soon as the cursor loses its position
* a new snapshot will be allocated.
*/
- testutil_check(session->begin_transaction(
- session, "isolation=snapshot"));
+ while ((ret = session->begin_transaction(
+ session, "snapshot=snapshot")) == WT_CACHE_FULL)
+ ;
+ testutil_check(ret);
/* Read a record at the end of the table. */
do {
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 7d08dbd8bd8..27aa118a4ef 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -456,10 +456,10 @@ snap_check(WT_CURSOR *cursor,
print_item_data(
"expected", start->vdata, start->vsize);
if (ret == WT_NOTFOUND)
- fprintf(stderr, "\t found {deleted}\n");
+ fprintf(stderr, "found {deleted}\n");
else
print_item_data(
- " found", value->data, value->size);
+ "found", value->data, value->size);
testutil_die(ret,
"snapshot-isolation: %.*s search mismatch",
@@ -476,10 +476,10 @@ snap_check(WT_CURSOR *cursor,
print_item_data(
"expected", start->vdata, start->vsize);
if (ret == WT_NOTFOUND)
- fprintf(stderr, "\t found {deleted}\n");
+ fprintf(stderr, "found {deleted}\n");
else
print_item_data(
- " found", value->data, value->size);
+ "found", value->data, value->size);
testutil_die(ret,
"snapshot-isolation: %" PRIu64 " search mismatch",
@@ -498,6 +498,7 @@ static void
begin_transaction(TINFO *tinfo, WT_SESSION *session, u_int *iso_configp)
{
u_int v;
+ int ret;
const char *config;
char config_buf[64];
bool locked;
@@ -523,7 +524,15 @@ begin_transaction(TINFO *tinfo, WT_SESSION *session, u_int *iso_configp)
}
*iso_configp = v;
- testutil_check(session->begin_transaction(session, config));
+ /*
+ * Keep trying to start a new transaction if it's timing out - we
+ * know there aren't any resources pinned so it should succeed
+ * eventually.
+ */
+ while ((ret =
+ session->begin_transaction(session, config)) == WT_CACHE_FULL)
+ ;
+ testutil_check(ret);
if (v == ISOLATION_SNAPSHOT && g.c_txn_timestamps) {
/* Avoid starting a new reader when a prepare is in progress. */
diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c
index dd87adeae56..8040142aa19 100644
--- a/src/third_party/wiredtiger/test/format/wts.c
+++ b/src/third_party/wiredtiger/test/format/wts.c
@@ -262,6 +262,10 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp)
CONFIG_APPEND(p, ",split_6");
if (g.c_timing_stress_split_7)
CONFIG_APPEND(p, ",split_7");
+ if (g.c_timing_stress_split_8)
+ CONFIG_APPEND(p, ",split_8");
+ if (g.c_timing_stress_split_9)
+ CONFIG_APPEND(p, ",split_9");
CONFIG_APPEND(p, "]");
/* Extensions. */
diff --git a/src/third_party/wiredtiger/test/suite/test_bug020.py b/src/third_party/wiredtiger/test/suite/test_bug020.py
new file mode 100644
index 00000000000..646f2505e83
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_bug020.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import os
+import wiredtiger, wttest
+from wtdataset import SimpleDataSet
+
+# test_bug020.py
+# Test that an existing set file will replace a missing turtle file.
+class test_bug020(wttest.WiredTigerTestCase):
+ def test_bug020(self):
+ SimpleDataSet(self, "table:bug020", 1000).populate()
+ self.close_conn()
+ os.rename("WiredTiger.turtle", "WiredTiger.turtle.set")
+ expectMessage = 'WiredTiger.turtle not found'
+ with self.expectedStdoutPattern(expectMessage):
+ self.open_conn()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_las03.py b/src/third_party/wiredtiger/test/suite/test_las03.py
new file mode 100644
index 00000000000..6934bd9a741
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_las03.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+from helper import copy_wiredtiger_home
+import wiredtiger, wttest
+from wiredtiger import stat
+from wtdataset import SimpleDataSet
+
+def timestamp_str(t):
+ return '%x' % t
+
+# test_las03.py
+# Ensure checkpoints don't read too unnecessary lookaside entries.
+class test_las03(wttest.WiredTigerTestCase):
+ # Force a small cache.
+ def conn_config(self):
+ return 'cache_size=50MB,statistics=(fast)'
+
+ def get_stat(self, stat):
+ stat_cursor = self.session.open_cursor('statistics:')
+ val = stat_cursor[stat][2]
+ stat_cursor.close()
+ return val
+
+ def large_updates(self, session, uri, value, ds, nrows, nops):
+ # Update a large number of records, we'll hang if the lookaside table
+ # isn't doing its thing.
+ cursor = session.open_cursor(uri)
+ for i in range(nrows + 1, nrows + nops + 1):
+ session.begin_transaction()
+ cursor[ds.key(i)] = value
+ session.commit_transaction('commit_timestamp=' + timestamp_str(i))
+ cursor.close()
+
+ def test_checkpoint_las_reads(self):
+ if not wiredtiger.timestamp_build():
+ self.skipTest('requires a timestamp build')
+
+ # Create a small table.
+ uri = "table:test_las03"
+ nrows = 100
+ ds = SimpleDataSet(self, uri, nrows, key_format="S", value_format='u')
+ ds.populate()
+ bigvalue = "aaaaa" * 100
+
+ # Initially load huge data
+ cursor = self.session.open_cursor(uri)
+ for i in range(1, 10000):
+ cursor[ds.key(nrows + i)] = bigvalue
+ cursor.close()
+ self.session.checkpoint()
+
+ # Check to see LAS working with old timestamp
+ bigvalue2 = "ddddd" * 100
+ self.conn.set_timestamp('stable_timestamp=' + timestamp_str(1))
+ las_writes_start = self.get_stat(stat.conn.cache_write_lookaside)
+ self.large_updates(self.session, uri, bigvalue2, ds, nrows, 10000)
+
+ # If the test sizing is correct, the history will overflow the cache
+ self.session.checkpoint()
+ las_writes = self.get_stat(stat.conn.cache_write_lookaside) - las_writes_start
+ self.assertGreaterEqual(las_writes, 0)
+
+ for ts in range(2, 4):
+ self.conn.set_timestamp('stable_timestamp=' + timestamp_str(ts))
+
+ # Now just update one record and checkpoint again
+ self.large_updates(self.session, uri, bigvalue2, ds, nrows, 1)
+
+ las_reads_start = self.get_stat(stat.conn.cache_read_lookaside)
+ self.session.checkpoint()
+ las_reads = self.get_stat(stat.conn.cache_read_lookaside) - las_reads_start
+
+ # Since we're dealing with eviction concurrent with checkpoints
+ # and skewing is controlled by a heuristic, we can't put too tight
+ # a bound on this.
+ self.assertLessEqual(las_reads, 100)
+
+if __name__ == '__main__':
+ wttest.run()