summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src')
-rw-r--r--src/third_party/wiredtiger/src/block/block_ckpt.c42
-rw-r--r--src/third_party/wiredtiger/src/block/block_mgr.c26
-rw-r--r--src/third_party/wiredtiger/src/block/block_read.c4
-rw-r--r--src/third_party/wiredtiger/src/block/block_tiered.c99
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c39
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c39
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c50
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c60
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_io.c23
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c8
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c269
-rw-r--r--src/third_party/wiredtiger/src/config/test_config.c66
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c142
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache_pool.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_capacity.c3
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_ckpt.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_dhandle.c31
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c10
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_open.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_stat.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_sweep.c4
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_tiered.c225
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_backup.c20
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_backup_incr.c13
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_std.c59
-rw-r--r--src/third_party/wiredtiger/src/docs/arch-transaction.dox8
-rw-r--r--src/third_party/wiredtiger/src/docs/custom-storage-sources.dox12
-rw-r--r--src/third_party/wiredtiger/src/docs/examples.dox3
-rw-r--r--src/third_party/wiredtiger/src/docs/spell.ok3
-rw-r--r--src/third_party/wiredtiger/src/docs/top/main.dox8
-rw-r--r--src/third_party/wiredtiger/src/docs/upgrading.dox53
-rw-r--r--src/third_party/wiredtiger/src/docs/wtperf.dox2
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c23
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c14
-rw-r--r--src/third_party/wiredtiger/src/history/hs_conn.c3
-rw-r--r--src/third_party/wiredtiger/src/history/hs_cursor.c12
-rw-r--r--src/third_party/wiredtiger/src/history/hs_rec.c565
-rw-r--r--src/third_party/wiredtiger/src/include/block.h1
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h25
-rw-r--r--src/third_party/wiredtiger/src/include/btree_cmp_inline.h21
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h2
-rw-r--r--src/third_party/wiredtiger/src/include/cache_inline.h9
-rw-r--r--src/third_party/wiredtiger/src/include/config.h14
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h56
-rw-r--r--src/third_party/wiredtiger/src/include/dhandle.h29
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h68
-rw-r--r--src/third_party/wiredtiger/src/include/log.h2
-rw-r--r--src/third_party/wiredtiger/src/include/meta.h2
-rw-r--r--src/third_party/wiredtiger/src/include/os_fs_inline.h24
-rw-r--r--src/third_party/wiredtiger/src/include/schema.h371
-rw-r--r--src/third_party/wiredtiger/src/include/session.h76
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h17
-rw-r--r--src/third_party/wiredtiger/src/include/tiered.h104
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h3
-rw-r--r--src/third_party/wiredtiger/src/include/txn_inline.h18
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in1228
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger_ext.h15
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h10
-rw-r--r--src/third_party/wiredtiger/src/log/log.c8
-rw-r--r--src/third_party/wiredtiger/src/log/log_slot.c8
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_manager.c8
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_tree.c21
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_apply.c2
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_track.c6
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_turtle.c8
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_fhandle.c2
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_dictionary.c2
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_visibility.c6
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_create.c137
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_drop.c11
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_list.c55
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_open.c4
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_truncate.c9
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_util.c4
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_worker.c8
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c29
-rw-r--r--src/third_party/wiredtiger/src/session/session_dhandle.c2
-rw-r--r--src/third_party/wiredtiger/src/support/modify.c112
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c69
-rw-r--r--src/third_party/wiredtiger/src/support/thread_group.c2
-rw-r--r--src/third_party/wiredtiger/src/support/update_vector.c111
-rw-r--r--src/third_party/wiredtiger/src/tiered/tiered_config.c188
-rw-r--r--src/third_party/wiredtiger/src/tiered/tiered_cursor.c155
-rw-r--r--src/third_party/wiredtiger/src/tiered/tiered_handle.c625
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c63
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c21
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_recover.c20
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c66
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c3
90 files changed, 3618 insertions, 2190 deletions
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c
index 176d6570ff3..0f4af019b52 100644
--- a/src/third_party/wiredtiger/src/block/block_ckpt.c
+++ b/src/third_party/wiredtiger/src/block/block_ckpt.c
@@ -98,10 +98,7 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint
block, &endp, ci->root_logid, ci->root_offset, ci->root_size, ci->root_checksum));
*root_addr_sizep = WT_PTRDIFF(endp, root_addr);
- if (block->log_structured) {
- block->logid = ci->root_logid;
- WT_ERR(__wt_block_newfile(session, block));
- }
+ WT_ERR(__wt_block_tiered_load(session, block, ci));
}
/*
@@ -468,37 +465,6 @@ __ckpt_add_blk_mods_ext(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_BLOCK_CK
}
/*
- * __wt_block_newfile --
- * Switch a log-structured block object to a new file.
- */
-int
-__wt_block_newfile(WT_SESSION_IMPL *session, WT_BLOCK *block)
-{
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- const char *filename;
-
- /* Bump to a new file ID. */
- ++block->logid;
-
- WT_ERR(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(__wt_buf_fmt(session, tmp, "%s.%08" PRIu32, block->name, block->logid));
- filename = tmp->data;
- WT_ERR(__wt_close(session, &block->fh));
- WT_ERR(__wt_open(session, filename, WT_FS_OPEN_FILE_TYPE_DATA,
- WT_FS_OPEN_CREATE | block->file_flags, &block->fh));
- WT_ERR(__wt_desc_write(session, block->fh, block->allocsize));
-
- block->size = block->allocsize;
- __wt_block_ckpt_destroy(session, &block->live);
- WT_ERR(__wt_block_ckpt_init(session, &block->live, "live"));
-
-err:
- __wt_scr_free(session, &tmp);
- return (ret);
-}
-
-/*
* __ckpt_process --
* Process the list of checkpoints.
*/
@@ -780,8 +746,12 @@ live_update:
ci->ckpt_discard = ci->discard;
WT_ERR(__wt_block_extlist_init(session, &ci->discard, "live", "discard", false));
+ /*
+ * TODO: tiered: for now we are switching files on a checkpoint, we'll want to do it only on
+ * flush_tier.
+ */
if (block->log_structured)
- WT_ERR(__wt_block_newfile(session, block));
+ WT_ERR(__wt_block_tiered_newfile(session, block));
#ifdef HAVE_DIAGNOSTIC
/*
diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c
index 00db82934dd..4be319fe79c 100644
--- a/src/third_party/wiredtiger/src/block/block_mgr.c
+++ b/src/third_party/wiredtiger/src/block/block_mgr.c
@@ -289,6 +289,30 @@ __bm_compact_start_readonly(WT_BM *bm, WT_SESSION_IMPL *session)
}
/*
+ * __bm_flush_tier --
+ * Flush the underlying file to the shared tier.
+ */
+static int
+__bm_flush_tier(WT_BM *bm, WT_SESSION_IMPL *session, uint8_t **flush_cookie, size_t *cookie_size)
+{
+ return (__wt_block_tiered_flush(session, bm->block, flush_cookie, cookie_size));
+}
+
+/*
+ * __bm_flush_tier_readonly --
+ * Flush the underlying file to the shared tier; readonly version.
+ */
+static int
+__bm_flush_tier_readonly(
+ WT_BM *bm, WT_SESSION_IMPL *session, uint8_t **flush_cookie, size_t *cookie_size)
+{
+ WT_UNUSED(flush_cookie);
+ WT_UNUSED(cookie_size);
+
+ return (__bm_readonly(bm, session));
+}
+
+/*
* __bm_free --
* Free a block of space to the underlying file.
*/
@@ -565,6 +589,7 @@ __bm_method_set(WT_BM *bm, bool readonly)
bm->compact_skip = __bm_compact_skip;
bm->compact_start = __bm_compact_start;
bm->corrupt = __wt_bm_corrupt;
+ bm->flush_tier = __bm_flush_tier;
bm->free = __bm_free;
bm->is_mapped = __bm_is_mapped;
bm->map_discard = __bm_map_discard;
@@ -591,6 +616,7 @@ __bm_method_set(WT_BM *bm, bool readonly)
bm->compact_page_skip = __bm_compact_page_skip_readonly;
bm->compact_skip = __bm_compact_skip_readonly;
bm->compact_start = __bm_compact_start_readonly;
+ bm->flush_tier = __bm_flush_tier_readonly;
bm->free = __bm_free_readonly;
bm->salvage_end = __bm_salvage_end_readonly;
bm->salvage_next = __bm_salvage_next_readonly;
diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c
index 08069728c8c..80dbb3aac21 100644
--- a/src/third_party/wiredtiger/src/block/block_read.c
+++ b/src/third_party/wiredtiger/src/block/block_read.c
@@ -222,11 +222,11 @@ __wt_block_fh(WT_SESSION_IMPL *session, WT_BLOCK *block, uint32_t logid, WT_FH *
return (0);
}
- /* TODO: fh readlock */
+ /* TODO: tiered: fh readlock; we may want a reference count on each file handle given out. */
if (logid * sizeof(WT_FILE_HANDLE *) < block->lfh_alloc && (*fhp = block->lfh[logid]) != NULL)
return (0);
- /* TODO: fh writelock */
+ /* TODO: tiered: fh writelock */
/* Ensure the array goes far enough. */
WT_RET(__wt_realloc_def(session, &block->lfh_alloc, logid + 1, &block->lfh));
if (logid >= block->max_logid)
diff --git a/src/third_party/wiredtiger/src/block/block_tiered.c b/src/third_party/wiredtiger/src/block/block_tiered.c
new file mode 100644
index 00000000000..776b2a127ad
--- /dev/null
+++ b/src/third_party/wiredtiger/src/block/block_tiered.c
@@ -0,0 +1,99 @@
+/*-
+ * Copyright (c) 2014-present MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_block_tiered_flush --
+ * Flush this file, start another file.
+ */
+int
+__wt_block_tiered_flush(
+ WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t **flush_cookie, size_t *cookie_size)
+{
+ /* TODO: tiered: fill in the cookie. */
+ (void)flush_cookie;
+ (void)cookie_size;
+
+ return (__wt_block_tiered_newfile(session, block));
+}
+
+/*
+ * __wt_block_tiered_load --
+ * Set up log-structured processing when loading a new root page.
+ */
+int
+__wt_block_tiered_load(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci)
+{
+ /*
+ * TODO: tiered: this call currently advances the object id, that's probably not appropriate for
+ * readonly opens. Perhaps it's also not appropriate for opening at an older checkpoint?
+ */
+ if (block->log_structured) {
+ block->logid = ci->root_logid;
+
+ /* Advance to the next file for future changes. */
+ WT_RET(__wt_block_tiered_newfile(session, block));
+ }
+ return (0);
+}
+
+/*
+ * __wt_block_tiered_newfile --
+ * Switch a log-structured block object to a new file.
+ */
+int
+__wt_block_tiered_newfile(WT_SESSION_IMPL *session, WT_BLOCK *block)
+{
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_STORAGE_SOURCE *storage_source;
+ const char *filename;
+
+ /* Get the old file name again. */
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+
+ /*
+ * TODO: tiered: We will get rid of the log id, and this name generation will be replaced by the
+ * name generated by __tiered_switch.
+ */
+ WT_ERR(__wt_buf_fmt(session, tmp, "%s.%08" PRIu32, block->name, block->logid));
+ filename = tmp->data;
+ WT_ERR(__wt_close(session, &block->fh));
+
+ /*
+ * TODO: tiered: Assert that session->bucket_storage is not NULL. We can't do that while we have
+ * tests that use block_allocation=log without setting up bucket storage. This whole function is
+ * going to look very different when flush_tier is fully integrated.
+ */
+ if (session->bucket_storage != NULL && block->logid != 0) {
+ storage_source = session->bucket_storage->storage_source;
+ WT_ASSERT(session, storage_source != NULL);
+ WT_ERR(storage_source->ss_flush(
+ storage_source, &session->iface, session->bucket_storage->file_system, filename, NULL));
+ }
+ /* Bump to a new file ID. */
+ ++block->logid;
+ WT_ERR(__wt_buf_fmt(session, tmp, "%s.%08" PRIu32, block->name, block->logid));
+ filename = tmp->data;
+
+ WT_WITH_BUCKET_STORAGE(session->bucket_storage, session, {
+ ret = __wt_open(session, filename, WT_FS_OPEN_FILE_TYPE_DATA,
+ WT_FS_OPEN_CREATE | block->file_flags, &block->fh);
+ });
+ WT_ERR(ret);
+ WT_ERR(__wt_desc_write(session, block->fh, block->allocsize));
+
+ block->size = block->allocsize;
+ __wt_block_ckpt_destroy(session, &block->live);
+ WT_ERR(__wt_block_ckpt_init(session, &block->live, "live"));
+
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 18fdef73315..a2b7f161d3e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -305,7 +305,8 @@ restart_read:
* Move to the next row-store item.
*/
static inline int
-__cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart, size_t *skippedp)
+__cursor_row_next(
+ WT_CURSOR_BTREE *cbt, bool newpage, bool restart, size_t *skippedp, WT_ITEM *prefix)
{
WT_CELL_UNPACK_KV kpack;
WT_INSERT *ins;
@@ -402,6 +403,17 @@ restart_read_insert:
restart_read_page:
rip = &page->pg_row[cbt->slot];
WT_RET(__cursor_row_slot_key_return(cbt, rip, &kpack, &kpack_used));
+ /*
+ * If the cursor has prefix search configured we can early exit here if the key that we are
+ * visiting is after our prefix.
+ */
+ if (F_ISSET(&cbt->iface, WT_CURSTD_PREFIX_SEARCH) && prefix != NULL &&
+ __wt_prefix_match(prefix, &cbt->iface.key) < 0) {
+ /* It is not okay for the user to have a custom collator. */
+ WT_ASSERT(session, CUR2BT(cbt)->collator == NULL);
+ WT_STAT_CONN_DATA_INCR(session, cursor_search_near_prefix_fast_paths);
+ return (WT_NOTFOUND);
+ }
WT_RET(__wt_txn_read(
session, cbt, &cbt->iface.key, WT_RECNO_OOB, WT_ROW_UPDATE(page, rip), NULL));
if (cbt->upd_value->type == WT_UPDATE_INVALID) {
@@ -622,11 +634,12 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt)
}
/*
- * __wt_btcur_next --
- * Move to the next record in the tree.
+ * __wt_btcur_next_prefix --
+ * Move to the next record in the tree. Taking an optional prefix item for a special case of
+ * search near.
*/
int
-__wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
+__wt_btcur_next_prefix(WT_CURSOR_BTREE *cbt, WT_ITEM *prefix, bool truncating)
{
WT_CURSOR *cursor;
WT_DECL_RET;
@@ -692,8 +705,14 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
total_skipped += skipped;
break;
case WT_PAGE_ROW_LEAF:
- ret = __cursor_row_next(cbt, newpage, restart, &skipped);
+ ret = __cursor_row_next(cbt, newpage, restart, &skipped, prefix);
total_skipped += skipped;
+ /*
+ * We can directly return WT_NOTFOUND here as the caller expects the cursor to be
+ * positioned when traversing keys for prefix search near.
+ */
+ if (ret == WT_NOTFOUND && F_ISSET(&cbt->iface, WT_CURSTD_PREFIX_SEARCH))
+ return (WT_NOTFOUND);
break;
default:
WT_ERR(__wt_illegal_value(session, page->type));
@@ -774,3 +793,13 @@ err:
F_CLR(cbt, WT_CBT_ITERATE_RETRY_PREV);
return (ret);
}
+
+/*
+ * __wt_btcur_next --
+ * Move to the next record in the tree.
+ */
+int
+__wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
+{
+ return (__wt_btcur_next_prefix(cbt, NULL, truncating));
+}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 7517eac77d8..867a46201a4 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -441,10 +441,12 @@ restart_read:
/*
* __cursor_row_prev --
- * Move to the previous row-store item.
+ * Move to the previous row-store item. Taking an optional prefix item for a special case of
+ * search near.
*/
static inline int
-__cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart, size_t *skippedp)
+__cursor_row_prev(
+ WT_CURSOR_BTREE *cbt, bool newpage, bool restart, size_t *skippedp, WT_ITEM *prefix)
{
WT_CELL_UNPACK_KV kpack;
WT_INSERT *ins;
@@ -553,6 +555,17 @@ restart_read_insert:
restart_read_page:
rip = &page->pg_row[cbt->slot];
WT_RET(__cursor_row_slot_key_return(cbt, rip, &kpack, &kpack_used));
+ /*
+ * If the cursor has prefix search configured we can early exit here if the key we are
+ * visiting is before our prefix.
+ */
+ if (F_ISSET(&cbt->iface, WT_CURSTD_PREFIX_SEARCH) && prefix != NULL &&
+ __wt_prefix_match(prefix, &cbt->iface.key) > 0) {
+ /* It is not okay for the user to have a custom collator. */
+ WT_ASSERT(session, CUR2BT(cbt)->collator == NULL);
+ WT_STAT_CONN_DATA_INCR(session, cursor_search_near_prefix_fast_paths);
+ return (WT_NOTFOUND);
+ }
WT_RET(__wt_txn_read(
session, cbt, &cbt->iface.key, WT_RECNO_OOB, WT_ROW_UPDATE(page, rip), NULL));
if (cbt->upd_value->type == WT_UPDATE_INVALID) {
@@ -572,11 +585,11 @@ restart_read_page:
}
/*
- * __wt_btcur_prev --
+ * __wt_btcur_prev_prefix --
* Move to the previous record in the tree.
*/
int
-__wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
+__wt_btcur_prev_prefix(WT_CURSOR_BTREE *cbt, WT_ITEM *prefix, bool truncating)
{
WT_CURSOR *cursor;
WT_DECL_RET;
@@ -653,8 +666,14 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
total_skipped += skipped;
break;
case WT_PAGE_ROW_LEAF:
- ret = __cursor_row_prev(cbt, newpage, restart, &skipped);
+ ret = __cursor_row_prev(cbt, newpage, restart, &skipped, prefix);
total_skipped += skipped;
+ /*
+ * We can directly return WT_NOTFOUND here as the caller will reset the cursor for
+ * us, this way we don't leave the cursor positioned after returning WT_NOTFOUND.
+ */
+ if (ret == WT_NOTFOUND && F_ISSET(&cbt->iface, WT_CURSTD_PREFIX_SEARCH))
+ return (WT_NOTFOUND);
break;
default:
WT_ERR(__wt_illegal_value(session, page->type));
@@ -726,3 +745,13 @@ err:
F_CLR(cbt, WT_CBT_ITERATE_RETRY_NEXT);
return (ret);
}
+
+/*
+ * __wt_btcur_prev --
+ * Move to the previous record in the tree.
+ */
+int
+__wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
+{
+ return (__wt_btcur_prev_prefix(cbt, NULL, truncating));
+}
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index e50327ea193..9449be9603a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -348,6 +348,15 @@ __cursor_col_search(WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool *leaf_foundp)
WT_SESSION_IMPL *session;
session = CUR2S(cbt);
+
+#ifdef HAVE_DIAGNOSTIC
+ /*
+ * Turn off cursor-order checks in all cases on search. The search/search-near functions turn
+ * them back on after a successful search.
+ */
+ __wt_cursor_key_order_reset(cbt);
+#endif
+
WT_WITH_PAGE_INDEX(
session, ret = __wt_col_search(cbt, cbt->iface.recno, leaf, false, leaf_foundp));
return (ret);
@@ -364,6 +373,15 @@ __cursor_row_search(WT_CURSOR_BTREE *cbt, bool insert, WT_REF *leaf, bool *leaf_
WT_SESSION_IMPL *session;
session = CUR2S(cbt);
+
+#ifdef HAVE_DIAGNOSTIC
+ /*
+ * Turn off cursor-order checks in all cases on search. The search/search-near functions turn
+ * them back on after a successful search.
+ */
+ __wt_cursor_key_order_reset(cbt);
+#endif
+
WT_WITH_PAGE_INDEX(
session, ret = __wt_row_search(cbt, &cbt->iface.key, insert, leaf, false, leaf_foundp));
return (ret);
@@ -690,7 +708,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
* here because at low isolation levels, new records could appear as we are stepping through
* the tree.
*/
- while ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND) {
+ while ((ret = __wt_btcur_next_prefix(cbt, &state.key, false)) != WT_NOTFOUND) {
WT_ERR(ret);
if (btree->type == BTREE_ROW)
WT_ERR(__wt_compare(session, btree->collator, &cursor->key, &state.key, &exact));
@@ -703,7 +721,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
/*
* We walked to the end of the tree without finding a match. Walk backwards instead.
*/
- while ((ret = __wt_btcur_prev(cbt, false)) != WT_NOTFOUND) {
+ while ((ret = __wt_btcur_prev_prefix(cbt, &state.key, false)) != WT_NOTFOUND) {
WT_ERR(ret);
if (btree->type == BTREE_ROW)
WT_ERR(__wt_compare(session, btree->collator, &cursor->key, &state.key, &exact));
@@ -725,6 +743,11 @@ err:
#endif
if (ret != 0) {
+ /*
+ * It is important that this reset is kept as the cursor state is modified in the above prev
+ * and next loops. Those internally do reset the cursor but not when performing a prefix
+ * search near.
+ */
WT_TRET(__cursor_reset(cbt));
__cursor_state_restore(cursor, &state);
}
@@ -1168,7 +1191,7 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
WT_DECL_RET;
WT_SESSION_IMPL *session;
uint64_t yield_count, sleep_usecs;
- bool leaf_found, valid;
+ bool valid;
btree = CUR2BT(cbt);
cursor = &cbt->iface;
@@ -1221,30 +1244,11 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
WT_ERR(__cursor_localvalue(cursor));
__cursor_state_save(cursor, &state);
- /* If our caller configures for a local search and we have a page pinned, do that search. */
- if (F_ISSET(cursor, WT_CURSTD_UPDATE_LOCAL) && __cursor_page_pinned(cbt, true)) {
- __wt_txn_cursor_op(session);
- WT_ERR(__wt_txn_autocommit_check(session));
-
- WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, true, cbt->ref, &leaf_found) :
- __cursor_col_search(cbt, cbt->ref, &leaf_found));
- /*
- * Only use the pinned page search results if search returns an exact match or a slot other
- * than the page's boundary slots, if that's not the case, the record might belong on an
- * entirely different page. This test is simplistic as we're ignoring append lists (there
- * may be no page slots or we might be legitimately positioned after the last page slot).
- * Ignore those cases, it makes things too complicated.
- */
- if (leaf_found &&
- (cbt->compare == 0 || (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)))
- goto update_local;
- }
-
retry:
WT_ERR(__cursor_func_init(cbt, true));
WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, true, NULL, NULL) :
__cursor_col_search(cbt, NULL, NULL));
-update_local:
+
if (btree->type == BTREE_ROW) {
/*
* If not overwriting, check for conflicts and fail if the key does not exist.
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 629f013c4c2..0d36f155f7a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -115,8 +115,11 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
if (!WT_PREFIX_SKIP(filename, "file:"))
WT_ERR_MSG(session, EINVAL, "expected a 'file:' URI");
- WT_ERR(__wt_block_manager_open(session, filename, dhandle->cfg, forced_salvage,
- F_ISSET(btree, WT_BTREE_READONLY), btree->allocsize, &btree->bm));
+ WT_WITH_BUCKET_STORAGE(btree->bstorage, session,
+ ret = __wt_block_manager_open(session, filename, dhandle->cfg, forced_salvage,
+ F_ISSET(btree, WT_BTREE_READONLY), btree->allocsize, &btree->bm));
+ WT_ERR(ret);
+
bm = btree->bm;
/*
@@ -297,56 +300,6 @@ __wt_btree_config_encryptor(
}
/*
- * __btree_config_tiered --
- * Return a bucket storage handle based on the configuration.
- */
-static int
-__btree_config_tiered(WT_SESSION_IMPL *session, const char **cfg, WT_BUCKET_STORAGE **bstoragep)
-{
- WT_BUCKET_STORAGE *bstorage;
- WT_CONFIG_ITEM bucket, cval;
- WT_DECL_RET;
- bool local_free;
-
- /*
- * We do not use __wt_config_gets_none for name because "none" and the empty string have
- * different meanings. The empty string means inherit the system tiered storage setting and
- * "none" means this table is not using tiered storage.
- */
- *bstoragep = NULL;
- local_free = false;
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.name", &cval));
- if (cval.len == 0)
- *bstoragep = S2C(session)->bstorage;
- else if (!WT_STRING_MATCH("none", cval.str, cval.len)) {
- WT_RET(__wt_config_gets_none(session, cfg, "tiered_storage.bucket", &bucket));
- WT_RET(__wt_tiered_bucket_config(session, &cval, &bucket, bstoragep));
- local_free = true;
- WT_ASSERT(session, *bstoragep != NULL);
- }
- bstorage = *bstoragep;
- if (bstorage != NULL) {
- /*
- * If we get here then we have a valid bucket storage entry. Now see if the config overrides
- * any of the other settings.
- */
- if (bstorage != S2C(session)->bstorage)
- WT_ERR(__wt_tiered_common_config(session, cfg, bstorage));
- WT_STAT_DATA_SET(session, tiered_object_size, bstorage->object_size);
- WT_STAT_DATA_SET(session, tiered_retention, bstorage->retain_secs);
- }
- return (0);
-err:
- /* If the bucket storage was set up with copies of the strings, free them here. */
- if (bstorage != NULL && local_free && F_ISSET(bstorage, WT_BUCKET_FREE)) {
- __wt_free(session, bstorage->auth_token);
- __wt_free(session, bstorage->bucket);
- __wt_free(session, bstorage);
- }
- return (ret);
-}
-
-/*
* __btree_conf --
* Configure a WT_BTREE structure.
*/
@@ -531,9 +484,6 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
F_SET(btree, WT_BTREE_NO_LOGGING);
}
- /* Configure tiered storage. */
- WT_RET(__btree_config_tiered(session, cfg, &btree->bstorage));
-
/* Configure encryption. */
WT_RET(__wt_btree_config_encryptor(session, cfg, &btree->kencryptor));
diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c
index 373cc7b71f1..3b763a68172 100644
--- a/src/third_party/wiredtiger/src/btree/bt_io.c
+++ b/src/third_party/wiredtiger/src/btree/bt_io.c
@@ -35,12 +35,16 @@ __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t
* into the caller's buffer. Else, read directly into the caller's buffer.
*/
if (btree->compressor == NULL && btree->kencryptor == NULL) {
- WT_RET(bm->read(bm, session, buf, addr, addr_size));
+ WT_WITH_BUCKET_STORAGE(
+ btree->bstorage, session, { ret = bm->read(bm, session, buf, addr, addr_size); });
+ WT_RET(ret);
dsk = buf->data;
ip = NULL;
} else {
WT_RET(__wt_scr_alloc(session, 0, &tmp));
- WT_ERR(bm->read(bm, session, tmp, addr, addr_size));
+ WT_WITH_BUCKET_STORAGE(
+ btree->bstorage, session, { ret = bm->read(bm, session, tmp, addr, addr_size); });
+ WT_ERR(ret);
dsk = tmp->data;
ip = tmp;
}
@@ -303,7 +307,12 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *add
if (encrypted)
F_SET(dsk, WT_PAGE_ENCRYPTED);
- WT_ASSERT(session, (dsk->write_gen != 0 && dsk->write_gen > btree->base_write_gen));
+ /*
+ * The page image must have a proper write generation number before writing it to disk. The page
+ * images that are created during recovery may have the write generation number less than the
+ * btree base write generation number, so don't verify it.
+ */
+ WT_ASSERT(session, dsk->write_gen != 0);
/*
* Checksum the data if the buffer isn't compressed or checksums are configured.
@@ -324,9 +333,13 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *add
if (timer)
time_start = __wt_clock(session);
- /* Call the block manager to write the block. */
- WT_ERR(checkpoint ? bm->checkpoint(bm, session, ip, btree->ckpt, data_checksum) :
+ WT_WITH_BUCKET_STORAGE(btree->bstorage, session, {
+ /* Call the block manager to write the block. */
+ ret =
+ (checkpoint ? bm->checkpoint(bm, session, ip, btree->ckpt, data_checksum) :
bm->write(bm, session, ip, addr, addr_sizep, data_checksum, checkpoint_io));
+ });
+ WT_ERR(ret);
/* Update some statistics now that the write is done */
if (timer) {
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index ab91af5b21a..dc070d5c700 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -344,14 +344,6 @@ read:
else if (ret == EBUSY) {
WT_NOT_READ(ret, 0);
WT_STAT_CONN_INCR(session, page_forcible_evict_blocked);
- /*
- * Forced eviction failed: check if this transaction is keeping content pinned
- * in cache.
- */
- if (force_attempts > 1 &&
- (ret = __wt_txn_is_blocking(session, true)) == WT_ROLLBACK)
- WT_STAT_CONN_INCR(session, cache_eviction_force_rollback);
- WT_RET(ret);
stalled = true;
break;
}
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 43bfb1f769f..25cbb0e8b33 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -96,7 +96,6 @@ static const WT_CONFIG_CHECK confchk_tiered_manager_subconfigs[] = {
{"wait", "int", NULL, "min=0,max=100000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure_tiered_storage_subconfigs[] = {
- {"auth_token", "string", NULL, NULL, NULL, 0},
{"local_retention", "int", NULL, "min=0,max=10000", NULL, 0},
{"object_target_size", "int", NULL, "min=100K,max=10TB", NULL, 0},
{NULL, NULL, NULL, NULL, NULL, 0}};
@@ -135,7 +134,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs, 5},
{"tiered_manager", "category", NULL, NULL, confchk_tiered_manager_subconfigs, 3},
{"tiered_storage", "category", NULL, NULL,
- confchk_WT_CONNECTION_reconfigure_tiered_storage_subconfigs, 3},
+ confchk_WT_CONNECTION_reconfigure_tiered_storage_subconfigs, 2},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
"\"checkpoint_slow\",\"history_store_checkpoint_delay\","
@@ -152,8 +151,8 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ "\"thread_group\",\"tiered\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0},
{NULL, NULL, NULL, NULL, NULL, 0}};
@@ -165,7 +164,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_set_timestamp[] = {
static const WT_CONFIG_CHECK confchk_WT_CURSOR_reconfigure[] = {
{"append", "boolean", NULL, NULL, NULL, 0}, {"overwrite", "boolean", NULL, NULL, NULL, 0},
- {NULL, NULL, NULL, NULL, NULL, 0}};
+ {"prefix_search", "boolean", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_assert_subconfigs[] = {
{"commit_timestamp", "string", NULL,
@@ -258,12 +257,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create_lsm_subconfigs[] = {
{"merge_max", "int", NULL, "min=2,max=100", NULL, 0},
{"merge_min", "int", NULL, "max=100", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
-static const WT_CONFIG_CHECK confchk_WT_SESSION_create_tiered_subconfigs[] = {
- {"chunk_size", "int", NULL, "min=1M", NULL, 0}, {"tiers", "list", NULL, NULL, NULL, 0},
- {NULL, NULL, NULL, NULL, NULL, 0}};
-
static const WT_CONFIG_CHECK confchk_WT_SESSION_create_tiered_storage_subconfigs[] = {
{"auth_token", "string", NULL, NULL, NULL, 0}, {"bucket", "string", NULL, NULL, NULL, 0},
+ {"bucket_prefix", "string", NULL, NULL, NULL, 0},
{"local_retention", "int", NULL, "min=0,max=10000", NULL, 0},
{"name", "string", NULL, NULL, NULL, 0},
{"object_target_size", "int", NULL, "min=100K,max=10TB", NULL, 0},
@@ -308,9 +304,8 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = {
{"split_deepen_min_child", "int", NULL, NULL, NULL, 0},
{"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
{"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
- {"tiered", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_subconfigs, 2},
{"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
- 5},
+ 6},
{"type", "string", NULL, NULL, NULL, 0},
{"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
{"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
@@ -362,9 +357,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = {
{"incremental", "category", NULL, NULL, confchk_WT_SESSION_open_cursor_incremental_subconfigs, 7},
{"next_random", "boolean", NULL, NULL, NULL, 0},
{"next_random_sample_size", "string", NULL, NULL, NULL, 0},
- {"overwrite", "boolean", NULL, NULL, NULL, 0}, {"raw", "boolean", NULL, NULL, NULL, 0},
- {"read_once", "boolean", NULL, NULL, NULL, 0}, {"readonly", "boolean", NULL, NULL, NULL, 0},
- {"skip_sort_check", "boolean", NULL, NULL, NULL, 0},
+ {"overwrite", "boolean", NULL, NULL, NULL, 0}, {"prefix_search", "boolean", NULL, NULL, NULL, 0},
+ {"raw", "boolean", NULL, NULL, NULL, 0}, {"read_once", "boolean", NULL, NULL, NULL, 0},
+ {"readonly", "boolean", NULL, NULL, NULL, 0}, {"skip_sort_check", "boolean", NULL, NULL, NULL, 0},
{"statistics", "list", NULL,
"choices=[\"all\",\"cache_walk\",\"fast\",\"clear\","
"\"size\",\"tree_walk\"]",
@@ -460,7 +455,7 @@ static const WT_CONFIG_CHECK confchk_file_config[] = {
{"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
{"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
{"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
- 5},
+ 6},
{"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
{"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
{"write_timestamp_usage", "string", NULL,
@@ -508,7 +503,7 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = {
{"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
{"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
{"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
- 5},
+ 6},
{"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
{"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
{"version", "string", NULL, NULL, NULL, 0},
@@ -572,9 +567,58 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = {
{"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
{"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
{"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
- 5},
+ 6},
+ {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
+ {"write_timestamp_usage", "string", NULL,
+ "choices=[\"always\",\"key_consistent\",\"mixed_mode\","
+ "\"never\",\"none\",\"ordered\"]",
+ NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
+static const WT_CONFIG_CHECK confchk_object_meta[] = {
+ {"access_pattern_hint", "string", NULL, "choices=[\"none\",\"random\",\"sequential\"]", NULL, 0},
+ {"allocation_size", "int", NULL, "min=512B,max=128MB", NULL, 0},
+ {"app_metadata", "string", NULL, NULL, NULL, 0},
+ {"assert", "category", NULL, NULL, confchk_assert_subconfigs, 4},
+ {"block_allocation", "string", NULL, "choices=[\"best\",\"first\",\"log-structured\"]", NULL, 0},
+ {"block_compressor", "string", NULL, NULL, NULL, 0},
+ {"cache_resident", "boolean", NULL, NULL, NULL, 0}, {"checkpoint", "string", NULL, NULL, NULL, 0},
+ {"checkpoint_backup_info", "string", NULL, NULL, NULL, 0},
+ {"checkpoint_lsn", "string", NULL, NULL, NULL, 0},
+ {"checksum", "string", NULL, "choices=[\"on\",\"off\",\"uncompressed\"]", NULL, 0},
+ {"collator", "string", NULL, NULL, NULL, 0}, {"columns", "list", NULL, NULL, NULL, 0},
+ {"dictionary", "int", NULL, "min=0", NULL, 0},
+ {"encryption", "category", NULL, NULL, confchk_WT_SESSION_create_encryption_subconfigs, 2},
+ {"format", "string", NULL, "choices=[\"btree\"]", NULL, 0},
+ {"huffman_key", "string", NULL, NULL, NULL, 0}, {"huffman_value", "string", NULL, NULL, NULL, 0},
+ {"id", "string", NULL, NULL, NULL, 0},
+ {"ignore_in_memory_cache_size", "boolean", NULL, NULL, NULL, 0},
+ {"internal_item_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_truncate", "boolean", NULL, NULL, NULL, 0},
+ {"internal_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"key_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"key_gap", "int", NULL, "min=0", NULL, 0}, {"leaf_item_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_key_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"leaf_value_max", "int", NULL, "min=0", NULL, 0},
+ {"log", "category", NULL, NULL, confchk_WT_SESSION_create_log_subconfigs, 1},
+ {"memory_page_image_max", "int", NULL, "min=0", NULL, 0},
+ {"memory_page_max", "int", NULL, "min=512B,max=10TB", NULL, 0},
+ {"os_cache_dirty_max", "int", NULL, "min=0", NULL, 0},
+ {"os_cache_max", "int", NULL, "min=0", NULL, 0},
+ {"prefix_compression", "boolean", NULL, NULL, NULL, 0},
+ {"prefix_compression_min", "int", NULL, "min=0", NULL, 0},
+ {"readonly", "boolean", NULL, NULL, NULL, 0},
+ {"split_deepen_min_child", "int", NULL, NULL, NULL, 0},
+ {"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
+ {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
+ {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
+ 6},
{"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
{"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
+ {"version", "string", NULL, NULL, NULL, 0},
{"write_timestamp_usage", "string", NULL,
"choices=[\"always\",\"key_consistent\",\"mixed_mode\","
"\"never\",\"none\",\"ordered\"]",
@@ -595,10 +639,64 @@ static const WT_CONFIG_CHECK confchk_table_meta[] = {
NULL, 0},
{NULL, NULL, NULL, NULL, NULL, 0}};
+static const WT_CONFIG_CHECK confchk_tier_meta[] = {
+ {"access_pattern_hint", "string", NULL, "choices=[\"none\",\"random\",\"sequential\"]", NULL, 0},
+ {"allocation_size", "int", NULL, "min=512B,max=128MB", NULL, 0},
+ {"app_metadata", "string", NULL, NULL, NULL, 0},
+ {"assert", "category", NULL, NULL, confchk_assert_subconfigs, 4},
+ {"block_allocation", "string", NULL, "choices=[\"best\",\"first\",\"log-structured\"]", NULL, 0},
+ {"block_compressor", "string", NULL, NULL, NULL, 0}, {"bucket", "string", NULL, NULL, NULL, 0},
+ {"bucket_prefix", "string", NULL, NULL, NULL, 0},
+ {"cache_resident", "boolean", NULL, NULL, NULL, 0}, {"checkpoint", "string", NULL, NULL, NULL, 0},
+ {"checkpoint_backup_info", "string", NULL, NULL, NULL, 0},
+ {"checkpoint_lsn", "string", NULL, NULL, NULL, 0},
+ {"checksum", "string", NULL, "choices=[\"on\",\"off\",\"uncompressed\"]", NULL, 0},
+ {"collator", "string", NULL, NULL, NULL, 0}, {"columns", "list", NULL, NULL, NULL, 0},
+ {"dictionary", "int", NULL, "min=0", NULL, 0},
+ {"encryption", "category", NULL, NULL, confchk_WT_SESSION_create_encryption_subconfigs, 2},
+ {"format", "string", NULL, "choices=[\"btree\"]", NULL, 0},
+ {"huffman_key", "string", NULL, NULL, NULL, 0}, {"huffman_value", "string", NULL, NULL, NULL, 0},
+ {"id", "string", NULL, NULL, NULL, 0},
+ {"ignore_in_memory_cache_size", "boolean", NULL, NULL, NULL, 0},
+ {"internal_item_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_max", "int", NULL, "min=0", NULL, 0},
+ {"internal_key_truncate", "boolean", NULL, NULL, NULL, 0},
+ {"internal_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"key_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"key_gap", "int", NULL, "min=0", NULL, 0}, {"leaf_item_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_key_max", "int", NULL, "min=0", NULL, 0},
+ {"leaf_page_max", "int", NULL, "min=512B,max=512MB", NULL, 0},
+ {"leaf_value_max", "int", NULL, "min=0", NULL, 0},
+ {"log", "category", NULL, NULL, confchk_WT_SESSION_create_log_subconfigs, 1},
+ {"memory_page_image_max", "int", NULL, "min=0", NULL, 0},
+ {"memory_page_max", "int", NULL, "min=512B,max=10TB", NULL, 0},
+ {"os_cache_dirty_max", "int", NULL, "min=0", NULL, 0},
+ {"os_cache_max", "int", NULL, "min=0", NULL, 0},
+ {"prefix_compression", "boolean", NULL, NULL, NULL, 0},
+ {"prefix_compression_min", "int", NULL, "min=0", NULL, 0},
+ {"readonly", "boolean", NULL, NULL, NULL, 0},
+ {"split_deepen_min_child", "int", NULL, NULL, NULL, 0},
+ {"split_deepen_per_child", "int", NULL, NULL, NULL, 0},
+ {"split_pct", "int", NULL, "min=50,max=100", NULL, 0},
+ {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
+ 6},
+ {"value_format", "format", __wt_struct_confchk, NULL, NULL, 0},
+ {"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
+ {"version", "string", NULL, NULL, NULL, 0},
+ {"write_timestamp_usage", "string", NULL,
+ "choices=[\"always\",\"key_consistent\",\"mixed_mode\","
+ "\"never\",\"none\",\"ordered\"]",
+ NULL, 0},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
static const WT_CONFIG_CHECK confchk_tiered_meta[] = {
{"app_metadata", "string", NULL, NULL, NULL, 0},
{"assert", "category", NULL, NULL, confchk_assert_subconfigs, 4},
- {"tiered", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_subconfigs, 2},
+ {"collator", "string", NULL, NULL, NULL, 0}, {"columns", "list", NULL, NULL, NULL, 0},
+ {"last", "string", NULL, NULL, NULL, 0},
+ {"tiered_storage", "category", NULL, NULL, confchk_WT_SESSION_create_tiered_storage_subconfigs,
+ 6},
+ {"tiers", "list", NULL, NULL, NULL, 0},
{"verbose", "list", NULL, "choices=[\"write_timestamp\"]", NULL, 0},
{"write_timestamp_usage", "string", NULL,
"choices=[\"always\",\"key_consistent\",\"mixed_mode\","
@@ -634,10 +732,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_statistics_log_subconfigs[]
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_tiered_storage_subconfigs[] = {
- {"auth_token", "string", NULL, NULL, NULL, 0}, {"auth_token", "string", NULL, NULL, NULL, 0},
- {"bucket", "string", NULL, NULL, NULL, 0}, {"cluster", "string", NULL, NULL, NULL, 0},
+ {"auth_token", "string", NULL, NULL, NULL, 0}, {"bucket", "string", NULL, NULL, NULL, 0},
+ {"bucket_prefix", "string", NULL, NULL, NULL, 0},
{"local_retention", "int", NULL, "min=0,max=10000", NULL, 0},
- {"member", "string", NULL, NULL, NULL, 0}, {"name", "string", NULL, NULL, NULL, 0},
+ {"name", "string", NULL, NULL, NULL, 0},
{"object_target_size", "int", NULL, "min=100K,max=10TB", NULL, 0},
{NULL, NULL, NULL, NULL, NULL, 0}};
@@ -696,7 +794,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
NULL, 0},
{"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
{"tiered_manager", "category", NULL, NULL, confchk_tiered_manager_subconfigs, 3},
- {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 8},
+ {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
"\"checkpoint_slow\",\"history_store_checkpoint_delay\","
@@ -717,8 +815,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ "\"thread_group\",\"tiered\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0},
{"verify_metadata", "boolean", NULL, NULL, NULL, 0},
{"write_through", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
@@ -774,7 +872,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
NULL, 0},
{"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
{"tiered_manager", "category", NULL, NULL, confchk_tiered_manager_subconfigs, 3},
- {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 8},
+ {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
"\"checkpoint_slow\",\"history_store_checkpoint_delay\","
@@ -795,8 +893,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ "\"thread_group\",\"tiered\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0},
{"verify_metadata", "boolean", NULL, NULL, NULL, 0}, {"version", "string", NULL, NULL, NULL, 0},
{"write_through", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
@@ -849,7 +947,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
NULL, 0},
{"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
{"tiered_manager", "category", NULL, NULL, confchk_tiered_manager_subconfigs, 3},
- {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 8},
+ {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
"\"checkpoint_slow\",\"history_store_checkpoint_delay\","
@@ -868,8 +966,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ "\"thread_group\",\"tiered\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0},
{"verify_metadata", "boolean", NULL, NULL, NULL, 0}, {"version", "string", NULL, NULL, NULL, 0},
{"write_through", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
@@ -922,7 +1020,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
NULL, 0},
{"statistics_log", "category", NULL, NULL, confchk_wiredtiger_open_statistics_log_subconfigs, 6},
{"tiered_manager", "category", NULL, NULL, confchk_tiered_manager_subconfigs, 3},
- {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 8},
+ {"tiered_storage", "category", NULL, NULL, confchk_tiered_storage_subconfigs, 6},
{"timing_stress_for_test", "list", NULL,
"choices=[\"aggressive_sweep\",\"backup_rename\","
"\"checkpoint_slow\",\"history_store_checkpoint_delay\","
@@ -941,8 +1039,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"reconcile\",\"recovery\",\"recovery_progress\",\"rts\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
- "\"version\",\"write\"]",
+ "\"thread_group\",\"tiered\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0},
{"verify_metadata", "boolean", NULL, NULL, NULL, 0},
{"write_through", "list", NULL, "choices=[\"data\",\"log\"]", NULL, 0},
@@ -985,8 +1083,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"statistics=none,statistics_log=(json=false,on_close=false,"
"sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"tiered_manager=(threads_max=8,threads_min=1,wait=0),"
- "tiered_storage=(auth_token=,local_retention=300,"
- "object_target_size=10M),timing_stress_for_test=,verbose=[]",
+ "tiered_storage=(local_retention=300,object_target_size=10M),"
+ "timing_stress_for_test=,verbose=[]",
confchk_WT_CONNECTION_reconfigure, 29},
{"WT_CONNECTION.rollback_to_stable", "", NULL, 0}, {"WT_CONNECTION.set_file_system", "", NULL, 0},
{"WT_CONNECTION.set_timestamp",
@@ -994,7 +1092,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"oldest_timestamp=,stable_timestamp=",
confchk_WT_CONNECTION_set_timestamp, 5},
{"WT_CURSOR.close", "", NULL, 0},
- {"WT_CURSOR.reconfigure", "append=false,overwrite=true", confchk_WT_CURSOR_reconfigure, 2},
+ {"WT_CURSOR.reconfigure", "append=false,overwrite=true,prefix_search=false",
+ confchk_WT_CURSOR_reconfigure, 3},
{"WT_SESSION.alter",
"access_pattern_hint=none,app_metadata=,"
"assert=(commit_timestamp=none,durable_timestamp=none,"
@@ -1036,11 +1135,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
"prefix_compression=false,prefix_compression_min=4,readonly=false"
",source=,split_deepen_min_child=0,split_deepen_per_child=0,"
- "split_pct=90,tiered=(chunk_size=1GB,tiers=),"
- "tiered_storage=(auth_token=,bucket=,local_retention=300,name=,"
- "object_target_size=10M),type=file,value_format=u,verbose=[],"
- "write_timestamp_usage=none",
- confchk_WT_SESSION_create, 50},
+ "split_pct=90,tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),type=file,"
+ "value_format=u,verbose=[],write_timestamp_usage=none",
+ confchk_WT_SESSION_create, 49},
{"WT_SESSION.drop",
"checkpoint_wait=true,force=false,lock_wait=true,"
"remove_files=true",
@@ -1058,9 +1156,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"debug=(release_evict=false),dump=,incremental=(consolidate=false"
",enabled=false,file=,force_stop=false,granularity=16MB,src_id=,"
"this_id=),next_random=false,next_random_sample_size=0,"
- "overwrite=true,raw=false,read_once=false,readonly=false,"
- "skip_sort_check=false,statistics=,target=",
- confchk_WT_SESSION_open_cursor, 16},
+ "overwrite=true,prefix_search=false,raw=false,read_once=false,"
+ "readonly=false,skip_sort_check=false,statistics=,target=",
+ confchk_WT_SESSION_open_cursor, 17},
{"WT_SESSION.prepare_transaction", "prepare_timestamp=", confchk_WT_SESSION_prepare_transaction,
1},
{"WT_SESSION.query_timestamp", "get=read", confchk_WT_SESSION_query_timestamp, 1},
@@ -1104,9 +1202,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,"
"prefix_compression_min=4,readonly=false,split_deepen_min_child=0"
",split_deepen_per_child=0,split_pct=90,"
- "tiered_storage=(auth_token=,bucket=,local_retention=300,name=,"
- "object_target_size=10M),value_format=u,verbose=[],"
- "write_timestamp_usage=none",
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),value_format=u"
+ ",verbose=[],write_timestamp_usage=none",
confchk_file_config, 41},
{"file.meta",
"access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
@@ -1124,9 +1222,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,"
"prefix_compression_min=4,readonly=false,split_deepen_min_child=0"
",split_deepen_per_child=0,split_pct=90,"
- "tiered_storage=(auth_token=,bucket=,local_retention=300,name=,"
- "object_target_size=10M),value_format=u,verbose=[],"
- "version=(major=0,minor=0),write_timestamp_usage=none",
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),value_format=u"
+ ",verbose=[],version=(major=0,minor=0),write_timestamp_usage=none",
confchk_file_meta, 46},
{"index.meta",
"app_metadata=,assert=(commit_timestamp=none,"
@@ -1144,7 +1242,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"name=),format=btree,huffman_key=,huffman_value=,"
"ignore_in_memory_cache_size=false,internal_item_max=0,"
"internal_key_max=0,internal_key_truncate=true,"
- "internal_page_max=4KB,key_format=u,key_gap=10,last=,"
+ "internal_page_max=4KB,key_format=u,key_gap=10,last=0,"
"leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
"leaf_value_max=0,log=(enabled=true),lsm=(auto_throttle=true,"
"bloom=true,bloom_bit_count=16,bloom_config=,bloom_hash_count=8,"
@@ -1155,22 +1253,63 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"os_cache_max=0,prefix_compression=false,prefix_compression_min=4"
",readonly=false,split_deepen_min_child=0,"
"split_deepen_per_child=0,split_pct=90,"
- "tiered_storage=(auth_token=,bucket=,local_retention=300,name=,"
- "object_target_size=10M),value_format=u,verbose=[],"
- "write_timestamp_usage=none",
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),value_format=u"
+ ",verbose=[],write_timestamp_usage=none",
confchk_lsm_meta, 45},
+ {"object.meta",
+ "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
+ "assert=(commit_timestamp=none,durable_timestamp=none,"
+ "read_timestamp=none,write_timestamp=off),block_allocation=best,"
+ "block_compressor=,cache_resident=false,checkpoint=,"
+ "checkpoint_backup_info=,checkpoint_lsn=,checksum=uncompressed,"
+ "collator=,columns=,dictionary=0,encryption=(keyid=,name=),"
+ "format=btree,huffman_key=,huffman_value=,id=,"
+ "ignore_in_memory_cache_size=false,internal_item_max=0,"
+ "internal_key_max=0,internal_key_truncate=true,"
+ "internal_page_max=4KB,key_format=u,key_gap=10,leaf_item_max=0,"
+ "leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0,"
+ "log=(enabled=true),memory_page_image_max=0,memory_page_max=5MB,"
+ "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,"
+ "prefix_compression_min=4,readonly=false,split_deepen_min_child=0"
+ ",split_deepen_per_child=0,split_pct=90,"
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),value_format=u"
+ ",verbose=[],version=(major=0,minor=0),write_timestamp_usage=none",
+ confchk_object_meta, 46},
{"table.meta",
"app_metadata=,assert=(commit_timestamp=none,"
"durable_timestamp=none,read_timestamp=none,write_timestamp=off),"
"colgroups=,collator=,columns=,key_format=u,value_format=u,"
"verbose=[],write_timestamp_usage=none",
confchk_table_meta, 9},
+ {"tier.meta",
+ "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
+ "assert=(commit_timestamp=none,durable_timestamp=none,"
+ "read_timestamp=none,write_timestamp=off),block_allocation=best,"
+ "block_compressor=,bucket=,bucket_prefix=,cache_resident=false,"
+ "checkpoint=,checkpoint_backup_info=,checkpoint_lsn=,"
+ "checksum=uncompressed,collator=,columns=,dictionary=0,"
+ "encryption=(keyid=,name=),format=btree,huffman_key=,"
+ "huffman_value=,id=,ignore_in_memory_cache_size=false,"
+ "internal_item_max=0,internal_key_max=0,"
+ "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
+ "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
+ "leaf_value_max=0,log=(enabled=true),memory_page_image_max=0,"
+ "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
+ "prefix_compression=false,prefix_compression_min=4,readonly=false"
+ ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),value_format=u"
+ ",verbose=[],version=(major=0,minor=0),write_timestamp_usage=none",
+ confchk_tier_meta, 48},
{"tiered.meta",
"app_metadata=,assert=(commit_timestamp=none,"
"durable_timestamp=none,read_timestamp=none,write_timestamp=off),"
- "tiered=(chunk_size=1GB,tiers=),verbose=[],"
- "write_timestamp_usage=none",
- confchk_tiered_meta, 5},
+ "collator=,columns=,last=0,tiered_storage=(auth_token=,bucket=,"
+ "bucket_prefix=,local_retention=300,name=,object_target_size=10M)"
+ ",tiers=,verbose=[],write_timestamp_usage=none",
+ confchk_tiered_meta, 9},
{"wiredtiger_open",
"buffer_alignment=-1,builtin_extension_config=,cache_cursors=true"
",cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
@@ -1200,8 +1339,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
",wait=0),tiered_manager=(threads_max=8,threads_min=1,wait=0),"
- "tiered_storage=(auth_token=,auth_token=,bucket=,cluster=,"
- "local_retention=300,member=,name=,object_target_size=10M),"
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),use_environment=true,use_environment_priv=false,"
"verbose=[],verify_metadata=false,write_through=",
@@ -1235,8 +1374,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
",wait=0),tiered_manager=(threads_max=8,threads_min=1,wait=0),"
- "tiered_storage=(auth_token=,auth_token=,bucket=,cluster=,"
- "local_retention=300,member=,name=,object_target_size=10M),"
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),use_environment=true,use_environment_priv=false,"
"verbose=[],verify_metadata=false,version=(major=0,minor=0),"
@@ -1270,8 +1409,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"statistics=none,statistics_log=(json=false,on_close=false,"
"path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"tiered_manager=(threads_max=8,threads_min=1,wait=0),"
- "tiered_storage=(auth_token=,auth_token=,bucket=,cluster=,"
- "local_retention=300,member=,name=,object_target_size=10M),"
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),verbose=[],verify_metadata=false,version=(major=0,"
"minor=0),write_through=",
@@ -1304,8 +1443,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {{"WT_CONNECTION.add_collator",
"statistics=none,statistics_log=(json=false,on_close=false,"
"path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"tiered_manager=(threads_max=8,threads_min=1,wait=0),"
- "tiered_storage=(auth_token=,auth_token=,bucket=,cluster=,"
- "local_retention=300,member=,name=,object_target_size=10M),"
+ "tiered_storage=(auth_token=,bucket=,bucket_prefix=,"
+ "local_retention=300,name=,object_target_size=10M),"
"timing_stress_for_test=,transaction_sync=(enabled=false,"
"method=fsync),verbose=[],verify_metadata=false,write_through=",
confchk_wiredtiger_open_usercfg, 51},
diff --git a/src/third_party/wiredtiger/src/config/test_config.c b/src/third_party/wiredtiger/src/config/test_config.c
index 1b42bbee4d1..bb46c2a1f24 100644
--- a/src/third_party/wiredtiger/src/config/test_config.c
+++ b/src/third_party/wiredtiger/src/config/test_config.c
@@ -7,19 +7,23 @@ static const WT_CONFIG_CHECK confchk_stat_cache_size_subconfigs[] = {
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_runtime_monitor_subconfigs[] = {
- {"rate_per_second", "int", NULL, "min=1,max=1000", NULL, 0},
+ {"enabled", "boolean", NULL, NULL, NULL, 0},
+ {"interval", "string", NULL, "choices=[\"s\",\"m\",\"h\"]", NULL, 0},
+ {"op_count", "int", NULL, "min=1,max=10000", NULL, 0},
{"stat_cache_size", "category", NULL, NULL, confchk_stat_cache_size_subconfigs, 2},
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_timestamp_manager_subconfigs[] = {
{"enabled", "boolean", NULL, NULL, NULL, 0},
+ {"interval", "string", NULL, "choices=[\"s\",\"m\",\"h\"]", NULL, 0},
{"oldest_lag", "int", NULL, "min=0,max=1000000", NULL, 0},
+ {"op_count", "int", NULL, "min=1,max=10000", NULL, 0},
{"stable_lag", "int", NULL, "min=0,max=1000000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_insert_config_subconfigs[] = {
- {"key_format", "string", NULL, NULL, NULL, 0},
+ {"interval", "string", NULL, "choices=[\"s\",\"m\",\"h\"]", NULL, 0},
{"key_size", "int", NULL, "min=0,max=10000", NULL, 0},
- {"value_format", "string", NULL, NULL, NULL, 0},
+ {"op_count", "int", NULL, "min=1,max=10000", NULL, 0},
{"value_size", "int", NULL, "min=0,max=1000000000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_ops_per_transaction_subconfigs[] = {
@@ -27,50 +31,76 @@ static const WT_CONFIG_CHECK confchk_ops_per_transaction_subconfigs[] = {
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_update_config_subconfigs[] = {
- {"key_format", "string", NULL, NULL, NULL, 0},
+ {"interval", "string", NULL, "choices=[\"s\",\"m\",\"h\"]", NULL, 0},
{"key_size", "int", NULL, "min=0,max=10000", NULL, 0},
- {"value_format", "string", NULL, NULL, NULL, 0},
+ {"op_count", "int", NULL, "min=1,max=10000", NULL, 0},
{"value_size", "int", NULL, "min=0,max=1000000000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_workload_generator_subconfigs[] = {
{"collection_count", "int", NULL, "min=0,max=200000", NULL, 0},
+ {"enabled", "boolean", NULL, NULL, NULL, 0},
{"insert_config", "category", NULL, NULL, confchk_insert_config_subconfigs, 4},
{"insert_threads", "int", NULL, "min=0,max=20", NULL, 0},
+ {"interval", "string", NULL, "choices=[\"s\",\"m\",\"h\"]", NULL, 0},
+ {"interval", "string", NULL, "choices=[\"s\",\"m\",\"h\"]", NULL, 0},
{"key_count", "int", NULL, "min=0,max=1000000", NULL, 0},
- {"key_format", "string", NULL, NULL, NULL, 0},
{"key_size", "int", NULL, "min=0,max=10000", NULL, 0},
+ {"op_count", "int", NULL, "min=1,max=10000", NULL, 0},
+ {"op_count", "int", NULL, "min=1,max=10000", NULL, 0},
{"ops_per_transaction", "category", NULL, NULL, confchk_ops_per_transaction_subconfigs, 2},
{"read_threads", "int", NULL, "min=0,max=100", NULL, 0},
{"update_config", "category", NULL, NULL, confchk_update_config_subconfigs, 4},
{"update_threads", "int", NULL, "min=0,max=20", NULL, 0},
- {"value_format", "string", NULL, NULL, NULL, 0},
{"value_size", "int", NULL, "min=0,max=1000000000", NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_CHECK confchk_workload_tracking_subconfigs[] = {
{"enabled", "boolean", NULL, NULL, NULL, 0}, {NULL, NULL, NULL, NULL, NULL, 0}};
+static const WT_CONFIG_CHECK confchk_example_test[] = {
+ {"cache_size_mb", "int", NULL, "min=0,max=100000000000", NULL, 0},
+ {"duration_seconds", "int", NULL, "min=0,max=1000000", NULL, 0},
+ {"enable_logging", "boolean", NULL, NULL, NULL, 0},
+ {"runtime_monitor", "category", NULL, NULL, confchk_runtime_monitor_subconfigs, 4},
+ {"timestamp_manager", "category", NULL, NULL, confchk_timestamp_manager_subconfigs, 5},
+ {"workload_generator", "category", NULL, NULL, confchk_workload_generator_subconfigs, 15},
+ {"workload_tracking", "category", NULL, NULL, confchk_workload_tracking_subconfigs, 1},
+ {NULL, NULL, NULL, NULL, NULL, 0}};
+
static const WT_CONFIG_CHECK confchk_poc_test[] = {
{"cache_size_mb", "int", NULL, "min=0,max=100000000000", NULL, 0},
{"duration_seconds", "int", NULL, "min=0,max=1000000", NULL, 0},
{"enable_logging", "boolean", NULL, NULL, NULL, 0},
- {"runtime_monitor", "category", NULL, NULL, confchk_runtime_monitor_subconfigs, 2},
- {"timestamp_manager", "category", NULL, NULL, confchk_timestamp_manager_subconfigs, 3},
- {"workload_generator", "category", NULL, NULL, confchk_workload_generator_subconfigs, 12},
+ {"runtime_monitor", "category", NULL, NULL, confchk_runtime_monitor_subconfigs, 4},
+ {"timestamp_manager", "category", NULL, NULL, confchk_timestamp_manager_subconfigs, 5},
+ {"workload_generator", "category", NULL, NULL, confchk_workload_generator_subconfigs, 15},
{"workload_tracking", "category", NULL, NULL, confchk_workload_tracking_subconfigs, 1},
{NULL, NULL, NULL, NULL, NULL, 0}};
static const WT_CONFIG_ENTRY config_entries[] = {
+ {"example_test",
+ "cache_size_mb=0,duration_seconds=0,enable_logging=true,"
+ "runtime_monitor=(enabled=false,interval=s,op_count=1,"
+ "stat_cache_size=(enabled=false,limit=)),"
+ "timestamp_manager=(enabled=false,interval=s,oldest_lag=0,"
+ "op_count=1,stable_lag=0),workload_generator=(collection_count=1,"
+ "enabled=false,insert_config=(interval=s,key_size=0,op_count=1,"
+ "value_size=0),insert_threads=0,interval=s,interval=s,key_count=0"
+ ",key_size=0,op_count=1,op_count=1,ops_per_transaction=(max=1,"
+ "min=),read_threads=0,update_config=(interval=s,key_size=0,"
+ "op_count=1,value_size=0),update_threads=0,value_size=0),"
+ "workload_tracking=(enabled=false)",
+ confchk_example_test, 7},
{"poc_test",
"cache_size_mb=0,duration_seconds=0,enable_logging=true,"
- "runtime_monitor=(rate_per_second=1,"
+ "runtime_monitor=(enabled=false,interval=s,op_count=1,"
"stat_cache_size=(enabled=false,limit=)),"
- "timestamp_manager=(enabled=false,oldest_lag=0,stable_lag=0),"
- "workload_generator=(collection_count=1,"
- "insert_config=(key_format=i,key_size=0,value_format=S,"
- "value_size=0),insert_threads=0,key_count=0,key_format=i,"
- "key_size=0,ops_per_transaction=(max=1,min=),read_threads=0,"
- "update_config=(key_format=i,key_size=0,value_format=S,"
- "value_size=0),update_threads=0,value_format=S,value_size=0),"
+ "timestamp_manager=(enabled=false,interval=s,oldest_lag=0,"
+ "op_count=1,stable_lag=0),workload_generator=(collection_count=1,"
+ "enabled=false,insert_config=(interval=s,key_size=0,op_count=1,"
+ "value_size=0),insert_threads=0,interval=s,interval=s,key_count=0"
+ ",key_size=0,op_count=1,op_count=1,ops_per_transaction=(max=1,"
+ "min=),read_threads=0,update_config=(interval=s,key_size=0,"
+ "op_count=1,value_size=0),update_threads=0,value_size=0),"
"workload_tracking=(enabled=false)",
confchk_poc_test, 7},
{NULL, NULL, NULL, 0}};
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index 17e40ef84e6..47a28e016f2 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -664,116 +664,6 @@ __wt_conn_remove_extractor(WT_SESSION_IMPL *session)
}
/*
- * __tiered_confchk --
- * Check for a valid tiered storage source.
- */
-static int
-__tiered_confchk(
- WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cname, WT_NAMED_STORAGE_SOURCE **nstoragep)
-{
- WT_CONNECTION_IMPL *conn;
- WT_NAMED_STORAGE_SOURCE *nstorage;
-
- *nstoragep = NULL;
-
- if (cname->len == 0 || WT_STRING_MATCH("none", cname->str, cname->len))
- return (0);
-
- conn = S2C(session);
- TAILQ_FOREACH (nstorage, &conn->storagesrcqh, q)
- if (WT_STRING_MATCH(nstorage->name, cname->str, cname->len)) {
- *nstoragep = nstorage;
- return (0);
- }
- WT_RET_MSG(session, EINVAL, "unknown storage source '%.*s'", (int)cname->len, cname->str);
-}
-
-/*
- * __wt_tiered_bucket_config --
- * Given a configuration, configure the bucket storage.
- */
-int
-__wt_tiered_bucket_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_CONFIG_ITEM *bucket,
- WT_BUCKET_STORAGE **bstoragep)
-{
- WT_BUCKET_STORAGE *bstorage, *new;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_NAMED_STORAGE_SOURCE *nstorage;
-#if 0
- WT_STORAGE_SOURCE *custom, *storage;
-#else
- WT_STORAGE_SOURCE *storage;
-#endif
- uint64_t hash_bucket, hash;
-
- *bstoragep = NULL;
-
- bstorage = new = NULL;
- conn = S2C(session);
-
- __wt_spin_lock(session, &conn->storage_lock);
-
- WT_ERR(__tiered_confchk(session, cval, &nstorage));
- if (nstorage == NULL) {
- if (bucket->len != 0)
- WT_ERR_MSG(
- session, EINVAL, "tiered_storage.bucket requires tiered_storage.name to be set");
- goto out;
- }
-
- /*
- * Check if tiered storage is set on the connection. If someone wants tiered storage on a table,
- * it needs to be configured on the database as well.
- */
- if (conn->bstorage == NULL && bstoragep != &conn->bstorage)
- WT_ERR_MSG(
- session, EINVAL, "table tiered storage requires connection tiered storage to be set");
- hash = __wt_hash_city64(bucket->str, bucket->len);
- hash_bucket = hash & (conn->hash_size - 1);
- TAILQ_FOREACH (bstorage, &nstorage->buckethashqh[hash_bucket], q)
- if (WT_STRING_MATCH(bstorage->bucket, bucket->str, bucket->len))
- goto out;
-
- WT_ERR(__wt_calloc_one(session, &new));
- WT_ERR(__wt_strndup(session, bucket->str, bucket->len, &new->bucket));
- storage = nstorage->storage_source;
-#if 0
- if (storage->customize != NULL) {
- custom = NULL;
- WT_ERR(storage->customize(storage, &session->iface, cfg_arg, &custom));
- if (custom != NULL) {
- bstorage->owned = 1;
- storage = custom;
- }
- }
-#endif
- new->storage_source = storage;
- if (bstorage != NULL) {
- new->object_size = bstorage->object_size;
- new->retain_secs = bstorage->retain_secs;
- WT_ERR(__wt_strdup(session, bstorage->auth_token, &new->auth_token));
- }
- TAILQ_INSERT_HEAD(&nstorage->bucketqh, new, q);
- TAILQ_INSERT_HEAD(&nstorage->buckethashqh[hash_bucket], new, hashq);
- F_SET(new, WT_BUCKET_FREE);
-
-out:
- __wt_spin_unlock(session, &conn->storage_lock);
- *bstoragep = new;
- return (0);
-
-err:
- if (bstorage != NULL) {
- __wt_free(session, new->auth_token);
- __wt_free(session, new->bucket);
- __wt_free(session, new);
- }
- __wt_spin_unlock(session, &conn->storage_lock);
- return (ret);
-}
-
-/*
* __conn_add_storage_source --
* WT_CONNECTION->add_storage_source method.
*/
@@ -864,10 +754,6 @@ __wt_conn_remove_storage_source(WT_SESSION_IMPL *session)
while ((bstorage = TAILQ_FIRST(&nstorage->bucketqh)) != NULL) {
/* Remove from the connection's list, free memory. */
TAILQ_REMOVE(&nstorage->bucketqh, bstorage, q);
- storage = bstorage->storage_source;
- WT_ASSERT(session, storage != NULL);
- if (bstorage->owned && storage->terminate != NULL)
- WT_TRET(storage->terminate(storage, (WT_SESSION *)session));
__wt_free(session, bstorage->auth_token);
__wt_free(session, bstorage->bucket);
__wt_free(session, bstorage);
@@ -888,6 +774,25 @@ __wt_conn_remove_storage_source(WT_SESSION_IMPL *session)
}
/*
+ * __conn_ext_file_system_get --
+ * WT_EXTENSION.file_system_get method. Get file system in use.
+ */
+static int
+__conn_ext_file_system_get(
+ WT_EXTENSION_API *wt_api, WT_SESSION *session, WT_FILE_SYSTEM **file_system)
+{
+ WT_FILE_SYSTEM *fs;
+
+ WT_UNUSED(session);
+
+ fs = ((WT_CONNECTION_IMPL *)wt_api->conn)->file_system;
+ if (fs == NULL)
+ return (WT_NOTFOUND);
+ *file_system = fs;
+ return (0);
+}
+
+/*
* __conn_get_extension_api --
* WT_CONNECTION.get_extension_api method.
*/
@@ -911,6 +816,7 @@ __conn_get_extension_api(WT_CONNECTION *wt_conn)
conn->extension_api.config_get_string = __wt_ext_config_get_string;
conn->extension_api.config_parser_open = __wt_ext_config_parser_open;
conn->extension_api.config_parser_open_arg = __wt_ext_config_parser_open_arg;
+ conn->extension_api.file_system_get = __conn_ext_file_system_get;
conn->extension_api.metadata_insert = __wt_ext_metadata_insert;
conn->extension_api.metadata_remove = __wt_ext_metadata_remove;
conn->extension_api.metadata_search = __wt_ext_metadata_search;
@@ -2898,6 +2804,12 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *c
WT_ERR(__conn_load_extensions(session, cfg, false));
/*
+ * Do some early initialization for tiered storage, as this may affect our choice of file system
+ * for some operations.
+ */
+ WT_ERR(__wt_tiered_conn_config(session, cfg, false));
+
+ /*
* The metadata/log encryptor is configured after extensions, since
* extensions may load encryptors. We have to do this before creating
* the metadata file.
@@ -2982,7 +2894,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, const char *c
* FIXME-WT-6682: temporarily disable history store verification.
*/
if (verify_meta) {
- WT_ERR(__wt_open_internal_session(conn, "verify hs", false, 0, &verify_session));
+ WT_ERR(__wt_open_internal_session(conn, "verify hs", false, 0, 0, &verify_session));
ret = __wt_hs_verify(verify_session);
WT_TRET(__wt_session_close_internal(verify_session));
WT_ERR(ret);
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index 2634be7105e..53c4d4b1cc1 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -251,7 +251,7 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET(__wt_spin_init(session, &cache->evict_queue_lock, "cache eviction queue"));
WT_RET(__wt_spin_init(session, &cache->evict_walk_lock, "cache walk"));
if ((ret = __wt_open_internal_session(
- conn, "evict pass", false, WT_SESSION_NO_DATA_HANDLES, &cache->walk_session)) != 0)
+ conn, "evict pass", false, WT_SESSION_NO_DATA_HANDLES, 0, &cache->walk_session)) != 0)
WT_RET_MSG(NULL, ret, "Failed to create session for eviction walks");
/* Allocate the LRU eviction queue. */
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
index 149d2eac2d6..d7a317fd09c 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
@@ -241,7 +241,7 @@ __wt_conn_cache_pool_open(WT_SESSION_IMPL *session)
*/
session_flags = WT_SESSION_NO_DATA_HANDLES;
if ((ret = __wt_open_internal_session(
- conn, "cache-pool", false, session_flags, &cache->cp_session)) != 0)
+ conn, "cache-pool", false, session_flags, 0, &cache->cp_session)) != 0)
WT_RET_MSG(NULL, ret, "Failed to create session for cache pool");
/*
diff --git a/src/third_party/wiredtiger/src/conn/conn_capacity.c b/src/third_party/wiredtiger/src/conn/conn_capacity.c
index 3c453a79ede..a12ccec9147 100644
--- a/src/third_party/wiredtiger/src/conn/conn_capacity.c
+++ b/src/third_party/wiredtiger/src/conn/conn_capacity.c
@@ -134,7 +134,8 @@ __capacity_server_start(WT_CONNECTION_IMPL *conn)
/*
* The capacity server gets its own session.
*/
- WT_RET(__wt_open_internal_session(conn, "capacity-server", false, 0, &conn->capacity_session));
+ WT_RET(
+ __wt_open_internal_session(conn, "capacity-server", false, 0, 0, &conn->capacity_session));
session = conn->capacity_session;
WT_RET(__wt_cond_alloc(session, "capacity server", &conn->capacity_cond));
diff --git a/src/third_party/wiredtiger/src/conn/conn_ckpt.c b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
index 8a1f599a18d..7ac53585134 100644
--- a/src/third_party/wiredtiger/src/conn/conn_ckpt.c
+++ b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
@@ -144,7 +144,7 @@ __ckpt_server_start(WT_CONNECTION_IMPL *conn)
*/
session_flags = WT_SESSION_CAN_WAIT;
WT_RET(__wt_open_internal_session(
- conn, "checkpoint-server", true, session_flags, &conn->ckpt_session));
+ conn, "checkpoint-server", true, session_flags, 0, &conn->ckpt_session));
session = conn->ckpt_session;
WT_RET(__wt_cond_alloc(session, "checkpoint server", &conn->ckpt_cond));
diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
index 73180a119f5..0c39475b207 100644
--- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
@@ -102,6 +102,9 @@ __conn_dhandle_config_set(WT_SESSION_IMPL *session)
case WT_DHANDLE_TYPE_TIERED:
WT_ERR(__wt_strdup(session, WT_CONFIG_BASE(session, tiered_meta), &dhandle->cfg[0]));
break;
+ case WT_DHANDLE_TYPE_TIERED_TREE:
+ WT_ERR(__wt_strdup(session, WT_CONFIG_BASE(session, tier_meta), &dhandle->cfg[0]));
+ break;
}
dhandle->cfg[1] = metaconf;
dhandle->meta_base = base;
@@ -133,6 +136,9 @@ __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle)
case WT_DHANDLE_TYPE_TIERED:
ret = __wt_tiered_close(session, (WT_TIERED *)dhandle);
break;
+ case WT_DHANDLE_TYPE_TIERED_TREE:
+ ret = __wt_tiered_tree_close(session, (WT_TIERED_TREE *)dhandle);
+ break;
}
__wt_rwlock_destroy(session, &dhandle->rwlock);
@@ -157,6 +163,7 @@ __wt_conn_dhandle_alloc(WT_SESSION_IMPL *session, const char *uri, const char *c
WT_DECL_RET;
WT_TABLE *table;
WT_TIERED *tiered;
+ WT_TIERED_TREE *tiered_tree;
uint64_t bucket;
/*
@@ -172,6 +179,10 @@ __wt_conn_dhandle_alloc(WT_SESSION_IMPL *session, const char *uri, const char *c
WT_RET(__wt_calloc_one(session, &table));
dhandle = (WT_DATA_HANDLE *)table;
dhandle->type = WT_DHANDLE_TYPE_TABLE;
+ } else if (WT_PREFIX_MATCH(uri, "tier:")) {
+ WT_RET(__wt_calloc_one(session, &tiered_tree));
+ dhandle = (WT_DATA_HANDLE *)tiered_tree;
+ dhandle->type = WT_DHANDLE_TYPE_TIERED_TREE;
} else if (WT_PREFIX_MATCH(uri, "tiered:")) {
WT_RET(__wt_calloc_one(session, &tiered));
dhandle = (WT_DATA_HANDLE *)tiered;
@@ -234,7 +245,7 @@ __wt_conn_dhandle_find(WT_SESSION_IMPL *session, const char *uri, const char *ch
conn = S2C(session);
/* We must be holding the handle list lock at a higher level. */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST));
bucket = __wt_hash_city64(uri, strlen(uri)) & (conn->dh_hash_size - 1);
if (checkpoint == NULL) {
@@ -301,9 +312,9 @@ __wt_conn_dhandle_close(WT_SESSION_IMPL *session, bool final, bool mark_dead)
* schema lock we might deadlock with a thread that has the schema lock and wants a handle lock.
*/
no_schema_lock = false;
- if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
+ if (!FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA)) {
no_schema_lock = true;
- F_SET(session, WT_SESSION_NO_SCHEMA_LOCK);
+ FLD_SET(session->lock_flags, WT_SESSION_NO_SCHEMA_LOCK);
}
/*
@@ -378,6 +389,9 @@ __wt_conn_dhandle_close(WT_SESSION_IMPL *session, bool final, bool mark_dead)
case WT_DHANDLE_TYPE_TIERED:
WT_TRET(__wt_tiered_close(session, (WT_TIERED *)dhandle));
break;
+ case WT_DHANDLE_TYPE_TIERED_TREE:
+ WT_TRET(__wt_tiered_tree_close(session, (WT_TIERED_TREE *)dhandle));
+ break;
}
/*
@@ -415,7 +429,7 @@ err:
__wt_spin_unlock(session, &dhandle->close_lock);
if (no_schema_lock)
- F_CLR(session, WT_SESSION_NO_SCHEMA_LOCK);
+ FLD_CLR(session->lock_flags, WT_SESSION_NO_SCHEMA_LOCK);
if (is_btree)
__wt_evict_file_exclusive_off(session);
@@ -536,6 +550,9 @@ __wt_conn_dhandle_open(WT_SESSION_IMPL *session, const char *cfg[], uint32_t fla
case WT_DHANDLE_TYPE_TIERED:
WT_ERR(__wt_tiered_open(session, cfg));
break;
+ case WT_DHANDLE_TYPE_TIERED_TREE:
+ WT_ERR(__wt_tiered_tree_open(session, cfg));
+ break;
}
/*
@@ -756,7 +773,7 @@ __wt_conn_dhandle_close_all(WT_SESSION_IMPL *session, const char *uri, bool remo
conn = S2C(session);
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
WT_ASSERT(session, session->dhandle == NULL);
/*
@@ -795,7 +812,7 @@ __conn_dhandle_remove(WT_SESSION_IMPL *session, bool final)
dhandle = session->dhandle;
bucket = dhandle->name_hash & (conn->dh_hash_size - 1);
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
WT_ASSERT(session, dhandle != conn->cache->walk_tree);
/* Check if the handle was reacquired by a session while we waited. */
@@ -833,7 +850,7 @@ __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, bool final, bool mark
* Kludge: interrupt the eviction server in case it is holding the handle list lock.
*/
set_pass_intr = false;
- if (!F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) {
+ if (!FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST)) {
set_pass_intr = true;
(void)__wt_atomic_addv32(&S2C(session)->cache->pass_intr, 1);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index d913b251050..532975fc571 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -54,7 +54,7 @@ __logmgr_force_archive(WT_SESSION_IMPL *session, uint32_t lognum)
log = conn->log;
sleep_usecs = yield_cnt = 0;
- WT_RET(__wt_open_internal_session(conn, "compatibility-reconfig", true, 0, &tmp_session));
+ WT_RET(__wt_open_internal_session(conn, "compatibility-reconfig", true, 0, 0, &tmp_session));
while (log->first_lsn.l.file < lognum) {
/*
* Force a checkpoint to be written in the new log file and force the archiving of all
@@ -1043,7 +1043,7 @@ __wt_logmgr_open(WT_SESSION_IMPL *session)
*/
session_flags = WT_SESSION_NO_DATA_HANDLES;
WT_RET(__wt_open_internal_session(
- conn, "log-close-server", false, session_flags, &conn->log_file_session));
+ conn, "log-close-server", false, session_flags, 0, &conn->log_file_session));
WT_RET(__wt_cond_alloc(conn->log_file_session, "log close server", &conn->log_file_cond));
/*
@@ -1058,7 +1058,7 @@ __wt_logmgr_open(WT_SESSION_IMPL *session)
* runs.
*/
WT_RET(__wt_open_internal_session(
- conn, "log-wrlsn-server", false, session_flags, &conn->log_wrlsn_session));
+ conn, "log-wrlsn-server", false, session_flags, 0, &conn->log_wrlsn_session));
WT_RET(__wt_cond_auto_alloc(
conn->log_wrlsn_session, "log write lsn server", 10000, WT_MILLION, &conn->log_wrlsn_cond));
WT_RET(__wt_thread_create(
@@ -1076,8 +1076,8 @@ __wt_logmgr_open(WT_SESSION_IMPL *session)
__wt_cond_signal(session, conn->log_cond);
} else {
/* The log server gets its own session. */
- WT_RET(
- __wt_open_internal_session(conn, "log-server", false, session_flags, &conn->log_session));
+ WT_RET(__wt_open_internal_session(
+ conn, "log-server", false, session_flags, 0, &conn->log_session));
WT_RET(__wt_cond_auto_alloc(
conn->log_session, "log server", 50000, WT_MILLION, &conn->log_cond));
diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c
index 0fa1a53b629..20467b4228b 100644
--- a/src/third_party/wiredtiger/src/conn/conn_open.c
+++ b/src/third_party/wiredtiger/src/conn/conn_open.c
@@ -28,7 +28,7 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[])
* Open the default session. We open this before starting service threads because those may
* allocate and use session resources that need to get cleaned up on close.
*/
- WT_RET(__wt_open_internal_session(conn, "connection", false, 0, &session));
+ WT_RET(__wt_open_internal_session(conn, "connection", false, 0, 0, &session));
/*
* The connection's default session is originally a static structure, swap that out for a more
diff --git a/src/third_party/wiredtiger/src/conn/conn_stat.c b/src/third_party/wiredtiger/src/conn/conn_stat.c
index 64e67b1acf1..a0473f24d78 100644
--- a/src/third_party/wiredtiger/src/conn/conn_stat.c
+++ b/src/third_party/wiredtiger/src/conn/conn_stat.c
@@ -617,7 +617,7 @@ __statlog_start(WT_CONNECTION_IMPL *conn)
FLD_SET(conn->server_flags, WT_CONN_SERVER_STATISTICS);
/* The statistics log server gets its own session. */
- WT_RET(__wt_open_internal_session(conn, "statlog-server", true, 0, &conn->stat_session));
+ WT_RET(__wt_open_internal_session(conn, "statlog-server", true, 0, 0, &conn->stat_session));
session = conn->stat_session;
WT_RET(__wt_cond_alloc(session, "statistics log server", &conn->stat_cond));
diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c
index 7abd47d626e..90f050e161d 100644
--- a/src/third_party/wiredtiger/src/conn/conn_sweep.c
+++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c
@@ -388,8 +388,8 @@ __wt_sweep_create(WT_SESSION_IMPL *session)
* manager. Sweep should not block due to the cache being full.
*/
session_flags = WT_SESSION_CAN_WAIT | WT_SESSION_IGNORE_CACHE_SIZE;
- WT_RET(
- __wt_open_internal_session(conn, "sweep-server", true, session_flags, &conn->sweep_session));
+ WT_RET(__wt_open_internal_session(
+ conn, "sweep-server", true, session_flags, 0, &conn->sweep_session));
session = conn->sweep_session;
WT_RET(__wt_cond_alloc(session, "handle sweep server", &conn->sweep_cond));
diff --git a/src/third_party/wiredtiger/src/conn/conn_tiered.c b/src/third_party/wiredtiger/src/conn/conn_tiered.c
index 2c0d95542ce..4d8a2ab5958 100644
--- a/src/third_party/wiredtiger/src/conn/conn_tiered.c
+++ b/src/third_party/wiredtiger/src/conn/conn_tiered.c
@@ -23,18 +23,46 @@
* __flush_tier_once --
* Perform one iteration of tiered storage maintenance.
*/
-static void
+static int
__flush_tier_once(WT_SESSION_IMPL *session, bool force)
{
- WT_UNUSED(session);
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ const char *key, *value;
+
WT_UNUSED(force);
+ __wt_verbose(session, WT_VERB_TIERED, "%s", "FLUSH_TIER_ONCE: Called");
/*
* - See if there is any "merging" work to do to prepare and create an object that is
* suitable for placing onto tiered storage.
* - Do the work to create said objects.
* - Move the objects.
*/
- return;
+ cursor = NULL;
+ WT_RET(__wt_metadata_cursor(session, &cursor));
+ while (cursor->next(cursor) == 0) {
+ cursor->get_key(cursor, &key);
+ cursor->get_value(cursor, &value);
+ /* For now just switch tiers which just does metadata manipulation. */
+ if (WT_PREFIX_MATCH(key, "tiered:")) {
+ __wt_verbose(session, WT_VERB_TIERED, "FLUSH_TIER_ONCE: %s %s", key, value);
+ WT_ERR(__wt_session_get_dhandle(session, key, NULL, NULL, WT_DHANDLE_EXCLUSIVE));
+ /*
+ * When we call wt_tiered_switch the session->dhandle points to the tiered: entry and
+ * the arg is the config string that is currently in the metadata.
+ */
+ WT_ERR(__wt_tiered_switch(session, value));
+ WT_ERR(__wt_session_release_dhandle(session));
+ }
+ }
+ WT_ERR(__wt_metadata_cursor_release(session, &cursor));
+
+ return (0);
+
+err:
+ WT_TRET(__wt_session_release_dhandle(session));
+ WT_TRET(__wt_metadata_cursor_release(session, &cursor));
+ return (ret);
}
/*
@@ -92,6 +120,24 @@ err:
}
/*
+ * __tier_storage_copy --
+ * Perform one iteration of copying newly flushed objects to the shared storage.
+ */
+static int
+__tier_storage_copy(WT_SESSION_IMPL *session)
+{
+ /*
+ * Walk the work queue and copy file:<name> to shared storage object:<name>. Walk a tiered
+ * table's tiers array and copy it to any tier that allows WT_TIERS_OP_FLUSH.
+ */
+ /* XXX: We don't want to call this here, it is just to quiet the compiler that this function
+ * can return NULL. So it is a placeholder until we have real content here.
+ */
+ WT_RET(__tier_storage_remove_local(session, NULL, 0));
+ return (0);
+}
+
+/*
* __tier_storage_remove --
* Perform one iteration of tiered storage local tier removal.
*/
@@ -117,11 +163,12 @@ int
__wt_flush_tier(WT_SESSION_IMPL *session, const char *config)
{
WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
const char *cfg[3];
bool force;
WT_STAT_CONN_INCR(session, flush_tier);
- if (FLD_ISSET(S2C(session)->server_flags, WT_CONN_SERVER_TIERED))
+ if (FLD_ISSET(S2C(session)->server_flags, WT_CONN_SERVER_TIERED_MGR))
WT_RET_MSG(
session, EINVAL, "Cannot call flush_tier when storage manager thread is configured");
@@ -131,8 +178,8 @@ __wt_flush_tier(WT_SESSION_IMPL *session, const char *config)
WT_RET(__wt_config_gets(session, cfg, "force", &cval));
force = cval.val != 0;
- __flush_tier_once(session, force);
- return (0);
+ WT_WITH_SCHEMA_LOCK(session, ret = __flush_tier_once(session, force));
+ return (ret);
}
/*
@@ -147,7 +194,7 @@ __tiered_manager_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
WT_TIERED_MANAGER *mgr;
conn = S2C(session);
- mgr = &conn->tiered_manager;
+ mgr = &conn->tiered_mgr;
/* Only start the server if wait time is non-zero */
WT_RET(__wt_config_gets(session, cfg, "tiered_manager.wait", &cval));
@@ -171,92 +218,79 @@ __tiered_manager_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
}
/*
- * __wt_tiered_common_config --
- * Parse configuration options common to connection and btrees.
+ * __tiered_server_run_chk --
+ * Check to decide if the tiered storage server should continue running.
*/
-int
-__wt_tiered_common_config(WT_SESSION_IMPL *session, const char **cfg, WT_BUCKET_STORAGE *bstorage)
+static bool
+__tiered_server_run_chk(WT_SESSION_IMPL *session)
{
- WT_CONFIG_ITEM cval;
-
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.local_retention", &cval));
- bstorage->retain_secs = (uint64_t)cval.val;
-
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.object_target_size", &cval));
- bstorage->object_size = (uint64_t)cval.val;
-
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.auth_token", &cval));
- /*
- * This call is purposely the last configuration processed so we don't need memory management
- * code and an error label to free it. Note this if any code is added after this line.
- */
- WT_RET(__wt_strndup(session, cval.str, cval.len, &bstorage->auth_token));
- return (0);
+ return (FLD_ISSET(S2C(session)->server_flags, WT_CONN_SERVER_TIERED));
}
/*
- * __tiered_config --
- * Parse and setup the storage server options.
+ * __tiered_server --
+ * The tiered storage server thread.
*/
-static int
-__tiered_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp, bool reconfig)
+static WT_THREAD_RET
+__tiered_server(void *arg)
{
- WT_CONFIG_ITEM bucket, cval;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_ITEM path, tmp;
+ WT_SESSION_IMPL *session;
+ session = arg;
conn = S2C(session);
- if (!reconfig) {
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.name", &cval));
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.bucket", &bucket));
- WT_RET(__wt_tiered_bucket_config(session, &cval, &bucket, &conn->bstorage));
- }
- /* If the connection is not set up for tiered storage there is nothing more to do. */
- if (conn->bstorage == NULL)
- return (0);
+ WT_CLEAR(path);
+ WT_CLEAR(tmp);
- WT_ASSERT(session, conn->bstorage != NULL);
- WT_RET(__wt_tiered_common_config(session, cfg, conn->bstorage));
- WT_STAT_CONN_SET(session, tiered_object_size, conn->bstorage->object_size);
- WT_STAT_CONN_SET(session, tiered_retention, conn->bstorage->retain_secs);
+ for (;;) {
+ /* Wait until the next event. */
+ __wt_cond_wait(session, conn->tiered_cond, WT_MINUTE, __tiered_server_run_chk);
- /* The strings for unique identification are connection level not per bucket. */
- WT_RET(__wt_config_gets(session, cfg, "tiered_storage.cluster", &cval));
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &conn->tiered_cluster));
- WT_ERR(__wt_config_gets(session, cfg, "tiered_storage.member", &cval));
- WT_ERR(__wt_strndup(session, cval.str, cval.len, &conn->tiered_member));
+ /* Check if we're quitting or being reconfigured. */
+ if (!__tiered_server_run_chk(session))
+ break;
- return (__tiered_manager_config(session, cfg, runp));
+ /*
+ * Here is where we do work. Work we expect to do:
+ * - Copy any files that need moving from a flush tier call.
+ * - Remove any cached objects that are aged out.
+ */
+ WT_ERR(__tier_storage_copy(session));
+ WT_ERR(__tier_storage_remove(session, false));
+ }
+
+ if (0) {
err:
- __wt_free(session, conn->bstorage->auth_token);
- __wt_free(session, conn->bstorage->bucket);
- __wt_free(session, conn->bstorage);
- __wt_free(session, conn->tiered_cluster);
- __wt_free(session, conn->tiered_member);
- return (ret);
+ WT_IGNORE_RET(__wt_panic(session, ret, "storage server error"));
+ }
+ __wt_buf_free(session, &path);
+ __wt_buf_free(session, &tmp);
+ return (WT_THREAD_RET_VALUE);
}
/*
- * __tiered_server_run_chk --
- * Check to decide if the tiered storage server should continue running.
+ * __tiered_mgr_run_chk --
+ * Check to decide if the tiered storage manager should continue running.
*/
static bool
-__tiered_server_run_chk(WT_SESSION_IMPL *session)
+__tiered_mgr_run_chk(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
conn = S2C(session);
- return ((FLD_ISSET(conn->server_flags, WT_CONN_SERVER_TIERED)) &&
- !F_ISSET(&conn->tiered_manager, WT_TIERED_MANAGER_SHUTDOWN));
+ return ((FLD_ISSET(conn->server_flags, WT_CONN_SERVER_TIERED_MGR)) &&
+ !F_ISSET(&conn->tiered_mgr, WT_TIERED_MANAGER_SHUTDOWN));
}
/*
- * __tiered_server --
- * The tiered storage server thread.
+ * __tiered_mgr_server --
+ * The tiered storage manager thread.
*/
static WT_THREAD_RET
-__tiered_server(void *arg)
+__tiered_mgr_server(void *arg)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
@@ -266,22 +300,21 @@ __tiered_server(void *arg)
session = arg;
conn = S2C(session);
- mgr = &conn->tiered_manager;
+ mgr = &conn->tiered_mgr;
WT_CLEAR(path);
WT_CLEAR(tmp);
for (;;) {
/* Wait until the next event. */
- __wt_cond_wait(session, conn->tiered_cond, mgr->wait_usecs, __tiered_server_run_chk);
+ __wt_cond_wait(session, conn->tiered_mgr_cond, mgr->wait_usecs, __tiered_mgr_run_chk);
/* Check if we're quitting or being reconfigured. */
- if (!__tiered_server_run_chk(session))
+ if (!__tiered_mgr_run_chk(session))
break;
/*
* Here is where we do work. Work we expect to do:
- *
*/
__flush_tier_once(session, false);
WT_ERR(__tier_storage_remove(session, false));
@@ -295,10 +328,31 @@ err:
__wt_buf_free(session, &tmp);
return (WT_THREAD_RET_VALUE);
}
+/*
+ * __tiered_mgr_start --
+ * Start the tiered manager flush thread.
+ */
+static int
+__tiered_mgr_start(WT_CONNECTION_IMPL *conn)
+{
+ WT_SESSION_IMPL *session;
+
+ FLD_SET(conn->server_flags, WT_CONN_SERVER_TIERED_MGR);
+ WT_RET(__wt_open_internal_session(
+ conn, "storage-mgr-server", true, 0, 0, &conn->tiered_mgr_session));
+ session = conn->tiered_mgr_session;
+
+ WT_RET(__wt_cond_alloc(session, "storage server", &conn->tiered_mgr_cond));
+
+ /* Start the thread. */
+ WT_RET(__wt_thread_create(session, &conn->tiered_mgr_tid, __tiered_mgr_server, session));
+ conn->tiered_mgr_tid_set = true;
+ return (0);
+}
/*
* __wt_tiered_storage_create --
- * Start the tiered storage server thread.
+ * Start the tiered storage subsystem.
*/
int
__wt_tiered_storage_create(WT_SESSION_IMPL *session, const char *cfg[], bool reconfig)
@@ -312,14 +366,14 @@ __wt_tiered_storage_create(WT_SESSION_IMPL *session, const char *cfg[], bool rec
/* Destroy any existing thread since we could be a reconfigure. */
WT_RET(__wt_tiered_storage_destroy(session));
- WT_RET(__tiered_config(session, cfg, &start, reconfig));
- if (!start)
- return (0);
+ if (reconfig)
+ WT_RET(__wt_tiered_conn_config(session, cfg, reconfig));
+ WT_RET(__tiered_manager_config(session, cfg, &start));
- /* Set first, the thread might run before we finish up. */
+ /* Start the internal thread. */
FLD_SET(conn->server_flags, WT_CONN_SERVER_TIERED);
- WT_ERR(__wt_open_internal_session(conn, "storage-server", true, 0, &conn->tiered_session));
+ WT_ERR(__wt_open_internal_session(conn, "storage-server", true, 0, 0, &conn->tiered_session));
session = conn->tiered_session;
WT_ERR(__wt_cond_alloc(session, "storage server", &conn->tiered_cond));
@@ -328,6 +382,10 @@ __wt_tiered_storage_create(WT_SESSION_IMPL *session, const char *cfg[], bool rec
WT_ERR(__wt_thread_create(session, &conn->tiered_tid, __tiered_server, session));
conn->tiered_tid_set = true;
+ /* After starting non-configurable threads, start the tiered manager if needed. */
+ if (start)
+ WT_ERR(__tiered_mgr_start(conn));
+
if (0) {
err:
WT_TRET(__wt_tiered_storage_destroy(session));
@@ -346,23 +404,32 @@ __wt_tiered_storage_destroy(WT_SESSION_IMPL *session)
WT_DECL_RET;
conn = S2C(session);
- __wt_free(session, conn->tiered_cluster);
- __wt_free(session, conn->tiered_member);
- /* Stop the server thread. */
- FLD_CLR(conn->server_flags, WT_CONN_SERVER_TIERED);
+ /* Stop the internal server thread. */
+ FLD_CLR(conn->server_flags, WT_CONN_SERVER_TIERED | WT_CONN_SERVER_TIERED_MGR);
if (conn->tiered_tid_set) {
__wt_cond_signal(session, conn->tiered_cond);
WT_TRET(__wt_thread_join(session, &conn->tiered_tid));
conn->tiered_tid_set = false;
}
__wt_cond_destroy(session, &conn->tiered_cond);
-
- /* Close the server thread's session. */
if (conn->tiered_session != NULL) {
WT_TRET(__wt_session_close_internal(conn->tiered_session));
conn->tiered_session = NULL;
}
+ /* Stop the storage manager thread. */
+ if (conn->tiered_mgr_tid_set) {
+ __wt_cond_signal(session, conn->tiered_mgr_cond);
+ WT_TRET(__wt_thread_join(session, &conn->tiered_mgr_tid));
+ conn->tiered_mgr_tid_set = false;
+ }
+ __wt_cond_destroy(session, &conn->tiered_mgr_cond);
+
+ if (conn->tiered_mgr_session != NULL) {
+ WT_TRET(__wt_session_close_internal(conn->tiered_mgr_session));
+ conn->tiered_mgr_session = NULL;
+ }
+
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c
index 28ffa72c6c8..2d7be4f4af7 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_backup.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c
@@ -335,10 +335,12 @@ __backup_add_id(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval)
blk = NULL;
for (i = 0; i < WT_BLKINCR_MAX; ++i) {
blk = &conn->incr_backups[i];
- __wt_verbose(session, WT_VERB_BACKUP, "blk[%u] flags 0x%" PRIx64, i, blk->flags);
/* If it isn't already in use, we can use it. */
- if (!F_ISSET(blk, WT_BLKINCR_INUSE))
+ if (!F_ISSET(blk, WT_BLKINCR_INUSE)) {
+ __wt_verbose(session, WT_VERB_BACKUP, "Free blk[%u] entry", i);
break;
+ }
+ __wt_verbose(session, WT_VERB_BACKUP, "Entry blk[%u] has flags 0x%" PRIx64, i, blk->flags);
}
/*
* We didn't find an entry. This should not happen.
@@ -364,11 +366,12 @@ __backup_add_id(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval)
/*
* If we don't find any checkpoint, backup files need to be full copy.
*/
- __wt_verbose(session, WT_VERB_BACKUP, "ID %s: Did not find any metadata checkpoint for %s.",
- blk->id_str, WT_METAFILE_URI);
+ __wt_verbose(session, WT_VERB_BACKUP,
+ "Backup id %s: Did not find any metadata checkpoint for %s.", blk->id_str,
+ WT_METAFILE_URI);
F_SET(blk, WT_BLKINCR_FULL);
} else {
- __wt_verbose(session, WT_VERB_BACKUP, "Using backup slot %u for id %s", i, blk->id_str);
+ __wt_verbose(session, WT_VERB_BACKUP, "Backup id %s using backup slot %u", blk->id_str, i);
F_CLR(blk, WT_BLKINCR_FULL);
}
F_SET(blk, WT_BLKINCR_VALID);
@@ -402,11 +405,12 @@ __backup_find_id(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_BLKINCR **in
WT_RET_MSG(session, EINVAL, "Incremental backup structure already in use");
if (incrp != NULL)
*incrp = blk;
- __wt_verbose(session, WT_VERB_BACKUP, "Found backup slot %u for id %s", i, blk->id_str);
+ __wt_verbose(
+ session, WT_VERB_BACKUP, "Found src id %s at backup slot %u", blk->id_str, i);
return (0);
}
}
- __wt_verbose(session, WT_VERB_BACKUP, "Did not find %.*s", (int)cval->len, cval->str);
+ __wt_verbose(session, WT_VERB_BACKUP, "Search %.*s not found", (int)cval->len, cval->str);
return (WT_NOTFOUND);
}
@@ -474,6 +478,8 @@ __backup_config(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, const char *cfg[
if (conn->incr_granularity != 0)
WT_RET_MSG(session, EINVAL, "Cannot change the incremental backup granularity");
conn->incr_granularity = (uint64_t)cval.val;
+ __wt_verbose(session, WT_VERB_BACKUP, "Backup config set granularity value %" PRIu64,
+ conn->incr_granularity);
}
/* Granularity can only be set once at the beginning */
F_SET(conn, WT_CONN_INCR_BACKUP);
diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c b/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c
index 1a1db66520e..9b31214a0ee 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_backup_incr.c
@@ -70,6 +70,12 @@ __curbackup_incr_blkmod(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_CURSOR_BAC
cb->nbits = (uint64_t)b.val;
WT_ERR(__wt_config_subgets(session, &v, "offset", &b));
cb->offset = (uint64_t)b.val;
+
+ __wt_verbose(session, WT_VERB_BACKUP,
+ "Found modified incr block gran %" PRIu64 " nbits %" PRIu64 " offset %" PRIu64,
+ cb->granularity, cb->nbits, cb->offset);
+ __wt_verbose(session, WT_VERB_BACKUP, "Modified incr block config: \"%s\"", config);
+
/*
* The rename configuration string component was added later. So don't error if we don't
* find it in the string. If we don't have it, we're not doing a rename.
@@ -144,6 +150,8 @@ __curbackup_incr_next(WT_CURSOR *cursor)
* incremental cursor below and return WT_NOTFOUND.
*/
F_SET(cb, WT_CURBACKUP_INCR_INIT);
+ __wt_verbose(session, WT_VERB_BACKUP, "Set key WT_BACKUP_FILE %s size %" PRIuMAX,
+ cb->incr_file, (uintmax_t)size);
__wt_cursor_set_key(cursor, 0, size, WT_BACKUP_FILE);
} else {
if (!F_ISSET(cb, WT_CURBACKUP_INCR_INIT)) {
@@ -171,6 +179,8 @@ __curbackup_incr_next(WT_CURSOR *cursor)
if (F_ISSET(cb, WT_CURBACKUP_RENAME) ||
(F_ISSET(cb, WT_CURBACKUP_CKPT_FAKE) && F_ISSET(cb, WT_CURBACKUP_HAS_CB_INFO))) {
WT_ERR(__wt_fs_size(session, cb->incr_file, &size));
+ __wt_verbose(session, WT_VERB_BACKUP,
+ "Set key WT_BACKUP_FILE %s size %" PRIuMAX, cb->incr_file, (uintmax_t)size);
__wt_cursor_set_key(cursor, 0, size, WT_BACKUP_FILE);
goto done;
}
@@ -206,6 +216,9 @@ __curbackup_incr_next(WT_CURSOR *cursor)
WT_ERR(WT_NOTFOUND);
WT_ASSERT(session, cb->granularity != 0);
WT_ASSERT(session, total_len != 0);
+ __wt_verbose(session, WT_VERB_BACKUP,
+ "Set key WT_BACKUP_RANGE %s offset %" PRIu64 " length %" PRIu64, cb->incr_file,
+ cb->offset + cb->granularity * start_bitoff, total_len);
__wt_cursor_set_key(
cursor, cb->offset + cb->granularity * start_bitoff, total_len, WT_BACKUP_RANGE);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c
index 1e5d2a5dec7..19a50939a7a 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_std.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_std.c
@@ -757,18 +757,35 @@ err:
}
/*
+ * __wt_cursor_get_hash --
+ * Get hash value from the given uri.
+ */
+void
+__wt_cursor_get_hash(
+ WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup, uint64_t *hash_value)
+{
+ if (to_dup != NULL) {
+ WT_ASSERT(session, uri == NULL);
+ *hash_value = to_dup->uri_hash;
+ } else {
+ WT_ASSERT(session, uri != NULL);
+ *hash_value = __wt_hash_city64(uri, strlen(uri));
+ }
+}
+
+/*
* __wt_cursor_cache_get --
* Open a matching cursor from the cache.
*/
int
-__wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup,
- const char *cfg[], WT_CURSOR **cursorp)
+__wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, uint64_t hash_value,
+ WT_CURSOR *to_dup, const char *cfg[], WT_CURSOR **cursorp)
{
WT_CONFIG_ITEM cval;
WT_CURSOR *cursor;
WT_CURSOR_BTREE *cbt;
WT_DECL_RET;
- uint64_t bucket, hash_value;
+ uint64_t bucket;
uint32_t overwrite_flag;
bool have_config;
@@ -818,18 +835,8 @@ __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_d
return (WT_NOTFOUND);
}
- /*
- * Caller guarantees that exactly one of the URI and the duplicate cursor is non-NULL.
- */
- if (to_dup != NULL) {
- WT_ASSERT(session, uri == NULL);
+ if (to_dup != NULL)
uri = to_dup->uri;
- hash_value = to_dup->uri_hash;
- } else {
- WT_ASSERT(session, uri != NULL);
- hash_value = __wt_hash_city64(uri, strlen(uri));
- }
-
/*
* Walk through all cursors, if there is a cached cursor that matches uri and configuration, use
* it.
@@ -848,7 +855,8 @@ __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_d
* For these configuration values, there is no difference in the resulting cursor other
* than flag values, so fix them up according to the given configuration.
*/
- F_CLR(cursor, WT_CURSTD_APPEND | WT_CURSTD_RAW | WT_CURSTD_OVERWRITE);
+ F_CLR(cursor,
+ WT_CURSTD_APPEND | WT_CURSTD_PREFIX_SEARCH | WT_CURSTD_RAW | WT_CURSTD_OVERWRITE);
F_SET(cursor, overwrite_flag);
/*
* If this is a btree cursor, clear its read_once flag.
@@ -1052,6 +1060,22 @@ __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config)
} else
WT_ERR_NOTFOUND_OK(ret, false);
+ /* Set the prefix search near flag. */
+ if ((ret = __wt_config_getones(session, config, "prefix_key", &cval)) == 0) {
+ if (cval.val) {
+ /* Prefix search near configuration can only be used for row-store. */
+ if (WT_CURSOR_RECNO(cursor))
+ WT_ERR_MSG(
+ session, EINVAL, "cannot use prefix key search near for column store formats");
+ if (CUR2BT(cursor)->collator != NULL)
+ WT_ERR_MSG(
+ session, EINVAL, "cannot use prefix key search near with a custom collator");
+ F_SET(cursor, WT_CURSTD_PREFIX_SEARCH);
+ } else
+ F_CLR(cursor, WT_CURSTD_PREFIX_SEARCH);
+ } else
+ WT_ERR_NOTFOUND_OK(ret, false);
+
WT_ERR(__cursor_config_debug(cursor, cfg));
err:
@@ -1113,8 +1137,11 @@ __wt_cursor_init(
session = CUR2S(cursor);
- if (cursor->internal_uri == NULL)
+ if (cursor->internal_uri == NULL) {
+ /* Various cursor code assumes there is an internal URI, so there better be one to set. */
+ WT_ASSERT(session, uri != NULL);
WT_RET(__wt_strdup(session, uri, &cursor->internal_uri));
+ }
/*
* append The append flag is only relevant to column stores.
diff --git a/src/third_party/wiredtiger/src/docs/arch-transaction.dox b/src/third_party/wiredtiger/src/docs/arch-transaction.dox
index d15a3cbb8d5..bb26829f509 100644
--- a/src/third_party/wiredtiger/src/docs/arch-transaction.dox
+++ b/src/third_party/wiredtiger/src/docs/arch-transaction.dox
@@ -156,14 +156,6 @@ chain, it checks the version on the disk image, which is the version that was ch
to disk in the last reconciliation. If it is still invisible, WiredTiger will search the history
store to check if there is a version visible to the reader there.
-The repeated read guarantee under snapshot isolation may break in one case if the timestamps
-committed to the updates are out of order, e.g,
-
-`U@20 -> U@30 -> U@15`
-
-In the above example, reading with timestamp 15 doesn't guarantee to return the third update. In
-some cases, users may read the second update U@30 if it is moved to the history store.
-
@subsection Durability
WiredTiger transactions support commit level durability and checkpoint level durability. An
diff --git a/src/third_party/wiredtiger/src/docs/custom-storage-sources.dox b/src/third_party/wiredtiger/src/docs/custom-storage-sources.dox
index e1a0b10644e..ffa8ef11783 100644
--- a/src/third_party/wiredtiger/src/docs/custom-storage-sources.dox
+++ b/src/third_party/wiredtiger/src/docs/custom-storage-sources.dox
@@ -63,12 +63,10 @@ It must always be provided when WiredTiger is reopened (again, with the ::wiredt
@section storage_examples Storage source examples
-There are two kinds of example code with overlapping functionality.
-A simple, self contained storage source example is in @ex_ref{ex_storage_source.c}.
-This example includes a small demo storage source that is a no-op and
-simply returns. This example also shows how a storage source is configured
-within an application. The second set of examples are in \c ext/storage. These are
-storage source only (no application level code), showing how a storage source
-might be packaged in a loadable shared library.
+An example of a storage source exists in \c ext/storage_sources/local_store/local_store.c.
+This storage source emulates cloud storage by storing all objects on the local file system.
+This example does not include application level code to call it. By default, WiredTiger builds
+it as a loadable shared library, and it can be loaded during a ::wiredtiger_open call as with
+any other extension, and \c local_store can be specified to be used with tiered storage system.
*/
diff --git a/src/third_party/wiredtiger/src/docs/examples.dox b/src/third_party/wiredtiger/src/docs/examples.dox
index 26167ab0631..d5a5102be61 100644
--- a/src/third_party/wiredtiger/src/docs/examples.dox
+++ b/src/third_party/wiredtiger/src/docs/examples.dox
@@ -52,9 +52,6 @@ Shows how to create column-oriented data and access individual columns.
@example ex_stat.c
Shows how to access database and table statistics.
-@example ex_storage_source.c
-Shows how to extend WiredTiger with a custom storage source implementation.
-
@example ex_thread.c
Shows how to access a database with multiple threads.
diff --git a/src/third_party/wiredtiger/src/docs/spell.ok b/src/third_party/wiredtiger/src/docs/spell.ok
index 8a19d8a5d27..cb5f89ab81d 100644
--- a/src/third_party/wiredtiger/src/docs/spell.ok
+++ b/src/third_party/wiredtiger/src/docs/spell.ok
@@ -121,6 +121,8 @@ WiredTiger
WiredTiger's
WiredTigerCheckpoint
WiredTigerException
+WiredTigerHS
+WiredTigerLAS
WiredTigerLog
WiredTigerPanicException
WiredTigerPreplog
@@ -499,6 +501,7 @@ readonly
realclean
realloc
realloc'd
+rebalance
recno
recnoN
recnum
diff --git a/src/third_party/wiredtiger/src/docs/top/main.dox b/src/third_party/wiredtiger/src/docs/top/main.dox
index 1df794243b6..bcb293f04b8 100644
--- a/src/third_party/wiredtiger/src/docs/top/main.dox
+++ b/src/third_party/wiredtiger/src/docs/top/main.dox
@@ -6,12 +6,12 @@ WiredTiger is an high performance, scalable, production quality, NoSQL,
@section releases Releases
<table>
-@row{<b>WiredTiger 3.2.1</b> (current),
+@row{<b>WiredTiger 10.0.0</b> (current),
+ <a href="releases/wiredtiger-10.0.0.tar.bz2"><b>[Release package]</b></a>,
+ <a href="10.0.0/index.html"><b>[Documentation]</b></a>}
+@row{<b>WiredTiger 3.2.1</b> (previous),
<a href="releases/wiredtiger-3.2.1.tar.bz2"><b>[Release package]</b></a>,
<a href="3.2.1/index.html"><b>[Documentation]</b></a>}
-@row{<b>WiredTiger 3.1.0</b> (previous),
- <a href="releases/wiredtiger-3.1.0.tar.bz2"><b>[Release package]</b></a>,
- <a href="3.1.0/index.html"><b>[Documentation]</b></a>}
@row{<b>Development branch</b>,
<a href="https://github.com/wiredtiger/wiredtiger"><b>[Source code]</b></a>,
<a href="develop/index.html"><b>[Documentation]</b></a>}
diff --git a/src/third_party/wiredtiger/src/docs/upgrading.dox b/src/third_party/wiredtiger/src/docs/upgrading.dox
index 5b072c319fd..e92dde30464 100644
--- a/src/third_party/wiredtiger/src/docs/upgrading.dox
+++ b/src/third_party/wiredtiger/src/docs/upgrading.dox
@@ -1,14 +1,59 @@
/*! @page upgrading Upgrading WiredTiger applications
</dl><hr>
-@section version_322 Upgrading to Version 3.2.2
+@section version_1000 Upgrading to Version 10.0.0
<dl>
+<dt>LAS and HS files</dt>
+<dd>
+The WiredTigerLAS.wt file will no longer be generated by the cache overflow mechanism.
+Instead, a WiredTigerHS.wt file will be generated as a history store for updates.
+Same as other files generated and maintained by WiredTiger storage engine, no manual
+intervention should be performed to the history store file.
+</dd>
+
+<dt>Default transaction isolation level</dt>
+<dd>
+The default transaction isolation level has been changed from "read-committed" to "snapshot"
+in WiredTiger.
+</dd>
+
+<dt>Read committed/uncommitted isolation level</dt>
+<dd>
+If the user provides a read committed/uncommitted isolation, WiredTiger will perform only
+read operations under this isolation. Any write operations will get an error.
+</dd>
+
+<dt>Python 2 support</dt>
+<dd>
+The support for Python 2 has been dropped from WiredTiger.
+</dd>
+
<dt>Asynchronous API</dt>
<dd>
The asynchronous API has been removed from WiredTiger.
</dd>
+<dt>Huffman Encoding support for keys</dt>
+<dd>
+The Huffman Encoding support for keys has been removed from WiredTiger.
+</dd>
+
+<dt>Transaction support for custom data sources</dt>
+<dd>
+The transaction support for custom data sources have been removed from WiredTiger.
+</dd>
+
+<dt>WT_SESSION.rebalance API</dt>
+<dd>
+The WT_SESSION.rebalance API has been removed from WiredTiger.
+</dd>
+
+<dt>Java language API</dt>
+<dd>
+The Java language API has been removed from WiredTiger.
+</dd>
+
<dt>Named snapshots</dt>
<dd>
Named snapshot functionality has been removed from WiredTiger as timestamps offer a better solution
@@ -17,6 +62,12 @@ across sessions. The WT_SESSION.begin_transaction method's \c snapshot configura
WT_SESSION::snapshot method have been removed and are no longer available.
</dd>
+<dt>Btree version and Compatibility with older releases</dt>
+<dd>
+The Btree version WT_BTREE_MAJOR_VERSION_MAX has been bumped in this release. Databases created with
+this release version cannot be downgraded to older versions as the underlying file format has changed.
+</dd>
+
</dl><hr>
@section version_321 Upgrading to Version 3.2.1
<dl>
diff --git a/src/third_party/wiredtiger/src/docs/wtperf.dox b/src/third_party/wiredtiger/src/docs/wtperf.dox
index 809472b3e80..916755ade32 100644
--- a/src/third_party/wiredtiger/src/docs/wtperf.dox
+++ b/src/third_party/wiredtiger/src/docs/wtperf.dox
@@ -128,6 +128,8 @@ configuration options:
DO NOT EDIT: THIS PART OF THE FILE IS GENERATED BY dist/s_docs.
\endif
+@par backup_interval (unsigned int, default=0)
+backup the database every interval seconds during the workload phase, 0 to disable
@par checkpoint_interval (unsigned int, default=120)
checkpoint every interval seconds during the workload phase.
@par checkpoint_stress_rate (unsigned int, default=0)
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 2269a925d3d..189952f0f42 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -292,11 +292,11 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread)
* set the flag on both sessions because we may call clear_walk when we are walking with the
* walk session, locked.
*/
- F_SET(session, WT_SESSION_LOCKED_PASS);
- F_SET(cache->walk_session, WT_SESSION_LOCKED_PASS);
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_PASS);
+ FLD_SET(cache->walk_session->lock_flags, WT_SESSION_LOCKED_PASS);
ret = __evict_server(session, &did_work);
- F_CLR(cache->walk_session, WT_SESSION_LOCKED_PASS);
- F_CLR(session, WT_SESSION_LOCKED_PASS);
+ FLD_CLR(cache->walk_session->lock_flags, WT_SESSION_LOCKED_PASS);
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_PASS);
was_intr = cache->pass_intr != 0;
__wt_spin_unlock(session, &cache->evict_pass_lock);
WT_ERR(ret);
@@ -733,11 +733,11 @@ __evict_pass(WT_SESSION_IMPL *session)
* race conditions that other threads can enter into the flow of evict server when there
* is already another server is running.
*/
- F_CLR(session, WT_SESSION_LOCKED_PASS);
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_PASS);
__wt_spin_unlock(session, &cache->evict_pass_lock);
ret = __evict_lru_pages(session, true);
__wt_spin_lock(session, &cache->evict_pass_lock);
- F_SET(session, WT_SESSION_LOCKED_PASS);
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_PASS);
WT_RET(ret);
}
@@ -809,7 +809,7 @@ __evict_clear_walk(WT_SESSION_IMPL *session)
btree = S2BT(session);
cache = S2C(session)->cache;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_PASS));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_PASS));
if (session->dhandle == cache->walk_tree)
cache->walk_tree = NULL;
@@ -1741,9 +1741,10 @@ __evict_walk_tree(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue, u_int max_ent
*/
if (!WT_IS_HS(btree->dhandle) && __wt_cache_hs_dirty(session)) {
/* If target pages are less than 10, keep it like that. */
- target_pages = target_pages < 10 ? target_pages : target_pages / 10;
- WT_STAT_CONN_INCR(session, cache_eviction_target_page_reduced);
- WT_STAT_DATA_INCR(session, cache_eviction_target_page_reduced);
+ if (target_pages >= 10) {
+ target_pages = target_pages / 10;
+ WT_STAT_CONN_DATA_INCR(session, cache_eviction_target_page_reduced);
+ }
}
/* If we don't want any pages from this tree, move on. */
@@ -2375,7 +2376,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, bool readonly, d
* rolled back. Ignore if in recovery, those transactions can't be rolled back.
*/
if (!F_ISSET(conn, WT_CONN_RECOVERING) && __wt_cache_stuck(session)) {
- ret = __wt_txn_is_blocking(session, false);
+ ret = __wt_txn_is_blocking(session);
if (ret == WT_ROLLBACK) {
--cache->evict_aggressive_score;
WT_STAT_CONN_INCR(session, txn_fail_cache);
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 5d6954cb594..26b38dc5996 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -576,6 +576,20 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags, bool
return (0);
/*
+ * If we are trying to evict a dirty page that does not belong to history store(HS) and
+ * checkpoint is processing the HS file, then avoid evicting the dirty non-HS page for now if
+ * the cache is already dominated by dirty HS content.
+ *
+ * Evicting a non-HS dirty page can generate even more HS content. As we can not evict HS pages
+ * while checkpoint is operating on the HS file, we can end up in a situation where we exceed
+ * the cache size limits.
+ */
+ if (conn->txn_global.checkpoint_running_hs && !WT_IS_HS(btree->dhandle) &&
+ __wt_cache_hs_dirty(session) && __wt_cache_full(session)) {
+ WT_STAT_CONN_INCR(session, cache_eviction_blocked_checkpoint_hs);
+ return (__wt_set_return(session, EBUSY));
+ }
+ /*
* If reconciliation is disabled for this thread (e.g., during an eviction that writes to the
* history store), give up.
*/
diff --git a/src/third_party/wiredtiger/src/history/hs_conn.c b/src/third_party/wiredtiger/src/history/hs_conn.c
index 6163d0042c7..25c4d4f695e 100644
--- a/src/third_party/wiredtiger/src/history/hs_conn.c
+++ b/src/third_party/wiredtiger/src/history/hs_conn.c
@@ -15,8 +15,7 @@
static int
__hs_start_internal_session(WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_sessionp)
{
- WT_ASSERT(session, !F_ISSET(session, WT_CONN_HS_OPEN));
- return (__wt_open_internal_session(S2C(session), "hs_access", true, 0, int_sessionp));
+ return (__wt_open_internal_session(S2C(session), "hs_access", true, 0, 0, int_sessionp));
}
/*
diff --git a/src/third_party/wiredtiger/src/history/hs_cursor.c b/src/third_party/wiredtiger/src/history/hs_cursor.c
index 31da7b2cc9b..1799b068e7e 100644
--- a/src/third_party/wiredtiger/src/history/hs_cursor.c
+++ b/src/third_party/wiredtiger/src/history/hs_cursor.c
@@ -113,9 +113,9 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
WT_DECL_ITEM(orig_hs_value_buf);
WT_DECL_RET;
WT_ITEM hs_key, recno_key;
- WT_MODIFY_VECTOR modifies;
WT_TXN_SHARED *txn_shared;
WT_UPDATE *mod_upd;
+ WT_UPDATE_VECTOR modifies;
wt_timestamp_t durable_timestamp, durable_timestamp_tmp;
wt_timestamp_t hs_stop_durable_ts, hs_stop_durable_ts_tmp, read_timestamp;
uint64_t upd_type_full;
@@ -126,7 +126,7 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
mod_upd = NULL;
orig_hs_value_buf = NULL;
WT_CLEAR(hs_key);
- __wt_modify_vector_init(session, &modifies);
+ __wt_update_vector_init(session, &modifies);
txn_shared = WT_SESSION_TXN_SHARED(session);
upd_found = false;
@@ -201,7 +201,7 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
while (upd_type == WT_UPDATE_MODIFY) {
WT_ERR(__wt_upd_alloc(session, hs_value, upd_type, &mod_upd, NULL));
- WT_ERR(__wt_modify_vector_push(&modifies, mod_upd));
+ WT_ERR(__wt_update_vector_push(&modifies, mod_upd));
mod_upd = NULL;
/*
@@ -230,7 +230,7 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, uint32_t btree_id, WT_ITEM *key,
}
WT_ASSERT(session, upd_type == WT_UPDATE_STANDARD);
while (modifies.size > 0) {
- __wt_modify_vector_pop(&modifies, &mod_upd);
+ __wt_update_vector_pop(&modifies, &mod_upd);
WT_ERR(__wt_modify_apply_item(session, value_format, hs_value, mod_upd->data));
__wt_free_update_list(session, &mod_upd);
}
@@ -258,10 +258,10 @@ err:
__wt_free_update_list(session, &mod_upd);
while (modifies.size > 0) {
- __wt_modify_vector_pop(&modifies, &mod_upd);
+ __wt_update_vector_pop(&modifies, &mod_upd);
__wt_free_update_list(session, &mod_upd);
}
- __wt_modify_vector_free(&modifies);
+ __wt_update_vector_free(&modifies);
if (ret == 0) {
if (upd_found)
diff --git a/src/third_party/wiredtiger/src/history/hs_rec.c b/src/third_party/wiredtiger/src/history/hs_rec.c
index 56573d374bd..0e7e2424c57 100644
--- a/src/third_party/wiredtiger/src/history/hs_rec.c
+++ b/src/third_party/wiredtiger/src/history/hs_rec.c
@@ -8,10 +8,8 @@
#include "wt_internal.h"
-static int __hs_delete_key_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
- uint32_t btree_id, const WT_ITEM *key, bool reinsert);
-static int __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
- WT_BTREE *btree, const WT_ITEM *key, wt_timestamp_t ts, uint64_t *hs_counter);
+static int __hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
+ uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, uint64_t *hs_counter);
/*
* __hs_verbose_cache_stats --
@@ -89,6 +87,16 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
counter = 0;
/*
+ * We might be entering this code from application thread's context. We should make sure that we
+ * are not using snapshot associated with application session to perform visibility checks on
+ * history store records. Note that the history store cursor performs visibility checks based on
+ * snapshot if none of WT_CURSTD_HS_READ_ALL or WT_CURSTD_HS_READ_COMMITTED flags are set.
+ */
+ WT_ASSERT(session,
+ F_ISSET(session, WT_SESSION_INTERNAL) ||
+ F_ISSET(cursor, WT_CURSTD_HS_READ_ALL | WT_CURSTD_HS_READ_COMMITTED));
+
+ /*
* Keep track if the caller had set WT_CURSTD_HS_READ_ALL flag on the history store cursor. We
* want to preserve the flags set by the caller when we exit from this function. Also, we want
* to explicitly set the flag WT_CURSTD_HS_READ_ALL only for the search_near operations on the
@@ -142,13 +150,14 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
&upd_type_full_diag, existing_val));
WT_ERR(__wt_compare(session, NULL, existing_val, hs_value, &cmp));
/*
- * We shouldn't be inserting the same value again for the key unless coming from a
- * different transaction. If the updates are from the same transaction, the start
- * timestamp for each update should be different.
+ * Same value should not be inserted again unless 1. previous entry is already
+ * deleted(i.e. the stop timestamp is globally visible), 2. from a different
+ * transaction 3. with a different timestamp if from the same transaction.
*/
if (cmp == 0)
WT_ASSERT(session,
- tw->start_txn == WT_TXN_NONE ||
+ __wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw) ||
+ tw->start_txn == WT_TXN_NONE ||
tw->start_txn != hs_cbt->upd_value->tw.start_txn ||
tw->start_ts != hs_cbt->upd_value->tw.start_ts);
counter = hs_counter + 1;
@@ -160,30 +169,25 @@ __hs_insert_record(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BTREE *btree,
}
/*
- * If we're inserting a non-zero timestamp, look ahead for any higher timestamps. If we find
- * updates, we should remove them and reinsert them at the current timestamp.
+ * Look ahead for any higher timestamps. If we find updates, we should remove them and reinsert
+ * them at the current timestamp. If there were no keys equal to or less than our target key, we
+ * would have received WT_NOT_FOUND. In that case we need to search again with a higher
+ * timestamp.
*/
- if (tw->start_ts != WT_TS_NONE) {
- /*
- * If there were no keys equal to or less than our target key, we would have received
- * WT_NOTFOUND. In that case we need to search again with a higher timestamp as the cursor
- * would not be positioned correctly.
- */
- if (ret == 0)
- WT_ERR_NOTFOUND_OK(cursor->next(cursor), true);
- else {
- F_SET(cursor, WT_CURSTD_HS_READ_ALL);
+ if (ret == 0)
+ WT_ERR_NOTFOUND_OK(cursor->next(cursor), true);
+ else {
+ F_SET(cursor, WT_CURSTD_HS_READ_ALL);
- cursor->set_key(cursor, 3, btree->id, key, tw->start_ts + 1);
- WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_after(session, cursor), true);
+ cursor->set_key(cursor, 3, btree->id, key, tw->start_ts + 1);
+ WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_after(session, cursor), true);
- if (!hs_read_all_flag)
- F_CLR(cursor, WT_CURSTD_HS_READ_ALL);
- }
- if (ret == 0)
- WT_ERR(__hs_fixup_out_of_order_from_pos(
- session, cursor, btree, key, tw->start_ts, &counter));
+ if (!hs_read_all_flag)
+ F_CLR(cursor, WT_CURSTD_HS_READ_ALL);
}
+ if (ret == 0)
+ WT_ERR(__hs_delete_reinsert_from_pos(
+ session, cursor, btree->id, key, tw->start_ts + 1, true, &counter));
#ifdef HAVE_DIAGNOSTIC
/*
@@ -222,21 +226,21 @@ err:
* Get the next update and its full value.
*/
static inline int
-__hs_next_upd_full_value(WT_SESSION_IMPL *session, WT_MODIFY_VECTOR *modifies,
+__hs_next_upd_full_value(WT_SESSION_IMPL *session, WT_UPDATE_VECTOR *updates,
WT_ITEM *older_full_value, WT_ITEM *full_value, WT_UPDATE **updp)
{
WT_UPDATE *upd;
*updp = NULL;
- __wt_modify_vector_pop(modifies, &upd);
+ __wt_update_vector_pop(updates, &upd);
if (upd->type == WT_UPDATE_TOMBSTONE) {
- if (modifies->size == 0) {
+ if (updates->size == 0) {
WT_ASSERT(session, older_full_value == NULL);
*updp = upd;
return (0);
}
- __wt_modify_vector_pop(modifies, &upd);
+ __wt_update_vector_pop(updates, &upd);
WT_ASSERT(session, upd->type == WT_UPDATE_STANDARD);
full_value->data = upd->data;
full_value->size = upd->size;
@@ -273,19 +277,18 @@ __wt_hs_insert_updates(
/* If the limit is exceeded, we will insert a full update to the history store */
#define MAX_REVERSE_MODIFY_NUM 16
WT_MODIFY entries[MAX_REVERSE_MODIFY_NUM];
- WT_MODIFY_VECTOR modifies;
+ WT_UPDATE_VECTOR updates;
+ WT_UPDATE_VECTOR out_of_order_ts_updates;
WT_SAVE_UPD *list;
- WT_UPDATE *first_globally_visible_upd, *first_non_ts_upd;
+ WT_UPDATE *first_globally_visible_upd, *fix_ts_upd, *min_ts_upd, *out_of_order_ts_upd;
WT_UPDATE *non_aborted_upd, *oldest_upd, *prev_upd, *tombstone, *upd;
WT_TIME_WINDOW tw;
wt_off_t hs_size;
- wt_timestamp_t min_insert_ts;
uint64_t insert_cnt, max_hs_size;
uint32_t i;
uint8_t *p;
int nentries;
- char ts_string[3][WT_TS_INT_STRING_SIZE];
- bool enable_reverse_modify, hs_inserted, squashed, ts_updates_in_hs;
+ bool enable_reverse_modify, hs_inserted, squashed;
*cache_write_hs = false;
btree = S2BT(session);
@@ -296,7 +299,17 @@ __wt_hs_insert_updates(
WT_RET(__wt_curhs_open(session, NULL, &hs_cursor));
F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
- __wt_modify_vector_init(session, &modifies);
+ __wt_update_vector_init(session, &updates);
+ /*
+ * We use another stack to store the out-of-order timestamp updates (including updates without a
+ * timestamp). We walk the update chain from the newest to the oldest. Once an out-of-order
+ * timestamp update is detected, and it has a lower timestamp than the head of the stack, it is
+ * pushed to the stack. When we are inserting updates to the history store, we compare the
+ * update's timestamp with the head of the stack. If it is larger than the out-of-order
+ * timestamp, we fix the timestamp by inserting with the out-of-order timestamp. If the update
+ * we are inserting is the head of the stack, we pop it from the stack.
+ */
+ __wt_update_vector_init(session, &out_of_order_ts_updates);
if (!btree->hs_entries)
btree->hs_entries = true;
@@ -349,12 +362,8 @@ __wt_hs_insert_updates(
WT_ERR(__wt_illegal_value(session, page->type));
}
- first_globally_visible_upd = first_non_ts_upd = NULL;
- ts_updates_in_hs = false;
+ first_globally_visible_upd = min_ts_upd = out_of_order_ts_upd = NULL;
enable_reverse_modify = true;
- min_insert_ts = WT_TS_MAX;
-
- __wt_modify_vector_clear(&modifies);
/*
* The algorithm assumes the oldest update on the update chain in memory is either a full
@@ -390,32 +399,21 @@ __wt_hs_insert_updates(
non_aborted_upd = upd;
- /* If we've seen a smaller timestamp before, use that instead. */
- if (min_insert_ts < upd->start_ts) {
+ /* Detect out of order timestamp update. */
+ if (min_ts_upd != NULL && min_ts_upd->start_ts < upd->start_ts &&
+ out_of_order_ts_upd != min_ts_upd) {
/*
- * Resolved prepared updates will lose their durable timestamp here. This is a
- * wrinkle in our handling of out-of-order updates.
+ * Always insert full update to the history store if we detect out of order
+ * timestamp update.
*/
- if (upd->start_ts != upd->durable_ts) {
- WT_ASSERT(session, min_insert_ts < upd->durable_ts);
- WT_STAT_CONN_DATA_INCR(session, cache_hs_order_lose_durable_timestamp);
- }
- __wt_verbose(session, WT_VERB_TIMESTAMP,
- "fixing out-of-order updates during insertion; start_ts=%s, durable_start_ts=%s, "
- "min_insert_ts=%s",
- __wt_timestamp_to_string(upd->start_ts, ts_string[0]),
- __wt_timestamp_to_string(upd->durable_ts, ts_string[1]),
- __wt_timestamp_to_string(min_insert_ts, ts_string[2]));
- upd->start_ts = upd->durable_ts = min_insert_ts;
- WT_STAT_CONN_DATA_INCR(session, cache_hs_order_fixup_insert);
- } else if (upd->start_ts != WT_TS_NONE)
- /*
- * Don't reset to WT_TS_NONE as we don't want to clear the timestamps for updates
- * older than the update without timestamp.
- */
- min_insert_ts = upd->start_ts;
+ enable_reverse_modify = false;
+ WT_ERR(__wt_update_vector_push(&out_of_order_ts_updates, min_ts_upd));
+ out_of_order_ts_upd = min_ts_upd;
+ } else if (upd->prepare_state != WT_PREPARE_INPROGRESS &&
+ (min_ts_upd == NULL || upd->start_ts < min_ts_upd->start_ts))
+ min_ts_upd = upd;
- WT_ERR(__wt_modify_vector_push(&modifies, upd));
+ WT_ERR(__wt_update_vector_push(&updates, upd));
/* Track the first update that is globally visible. */
if (first_globally_visible_upd == NULL && __wt_txn_upd_visible_all(session, upd))
@@ -433,19 +431,6 @@ __wt_hs_insert_updates(
prev_upd->start_ts == upd->start_ts)
enable_reverse_modify = false;
- /* Always insert full update to the history store if the timestamps are not in order. */
- if (prev_upd != NULL && prev_upd->start_ts < upd->start_ts)
- enable_reverse_modify = false;
-
- /* Find the first update without timestamp. */
- if (first_non_ts_upd == NULL && upd->start_ts == WT_TS_NONE)
- first_non_ts_upd = upd;
- else if (first_non_ts_upd != NULL && upd->start_ts != WT_TS_NONE) {
- F_SET(upd, WT_UPDATE_BEHIND_MIXED_MODE);
- if (F_ISSET(upd, WT_UPDATE_HS))
- ts_updates_in_hs = true;
- }
-
/*
* No need to continue if we see the first self contained value after the first globally
* visible value.
@@ -464,54 +449,66 @@ __wt_hs_insert_updates(
prev_upd = upd = NULL;
/* Construct the oldest full update. */
- WT_ASSERT(session, modifies.size > 0);
+ WT_ASSERT(session, updates.size > 0);
- __wt_modify_vector_peek(&modifies, &oldest_upd);
+ __wt_update_vector_peek(&updates, &oldest_upd);
WT_ASSERT(session,
oldest_upd->type == WT_UPDATE_STANDARD || oldest_upd->type == WT_UPDATE_TOMBSTONE);
/*
- * Clear the history store here if the oldest update is a tombstone and it is the first
- * update without timestamp on the update chain because we don't have the cursor placed at
- * the correct place to delete the history store records when inserting the first update and
- * it may be skipped if there is nothing to insert to the history store.
+ * Fix the history store record here if the oldest update is a tombstone because we don't
+ * have the cursor placed at the correct place to fix the history store records when
+ * inserting the first update and it may be skipped if there is nothing to insert to the
+ * history store.
*/
- if (oldest_upd->type == WT_UPDATE_TOMBSTONE && oldest_upd == first_non_ts_upd &&
- !F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS)) {
- /* We can only delete history store entries that have timestamps. */
- WT_ERR(__wt_hs_delete_key_from_ts(session, hs_cursor, btree->id, key, 1, true));
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate_non_ts);
- WT_STAT_DATA_INCR(session, cache_hs_key_truncate_non_ts);
- F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
- } else if (first_non_ts_upd != NULL && !F_ISSET(first_non_ts_upd, WT_UPDATE_CLEARED_HS) &&
- (list->ins == NULL || ts_updates_in_hs)) {
- WT_ERR(__wt_hs_delete_key_from_ts(session, hs_cursor, btree->id, key, 1, true));
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate_non_ts);
- WT_STAT_DATA_INCR(session, cache_hs_key_truncate_non_ts);
- F_SET(first_non_ts_upd, WT_UPDATE_CLEARED_HS);
+ if (oldest_upd->type == WT_UPDATE_TOMBSTONE) {
+ if (out_of_order_ts_upd != NULL && out_of_order_ts_upd->start_ts < oldest_upd->start_ts)
+ fix_ts_upd = out_of_order_ts_upd;
+ else
+ fix_ts_upd = oldest_upd;
+
+ if (!F_ISSET(fix_ts_upd, WT_UPDATE_FIXED_HS)) {
+ /* Delete and reinsert any update of the key with a higher timestamp.
+ */
+ WT_ERR(__wt_hs_delete_key_from_ts(
+ session, hs_cursor, btree->id, key, fix_ts_upd->start_ts + 1, true));
+ F_SET(fix_ts_upd, WT_UPDATE_FIXED_HS);
+ }
}
- WT_ERR(__hs_next_upd_full_value(session, &modifies, NULL, full_value, &upd));
+ WT_ERR(__hs_next_upd_full_value(session, &updates, NULL, full_value, &upd));
hs_inserted = squashed = false;
/*
* Flush the updates on stack. Stopping once we run out or we reach the onpage upd start
- * time point, we can squash modifies with the same start time point as the onpage upd away.
+ * time point, we can squash updates with the same start time point as the onpage update
+ * away.
*/
- for (; modifies.size > 0 &&
+ for (; updates.size > 0 &&
!(upd->txnid == list->onpage_upd->txnid &&
upd->start_ts == list->onpage_upd->start_ts);
tmp = full_value, full_value = prev_full_value, prev_full_value = tmp,
upd = prev_upd) {
WT_ASSERT(session, upd->type == WT_UPDATE_STANDARD || upd->type == WT_UPDATE_MODIFY);
- tw.durable_start_ts = upd->durable_ts;
- tw.start_ts = upd->start_ts;
- tw.start_txn = upd->txnid;
tombstone = NULL;
- __wt_modify_vector_peek(&modifies, &prev_upd);
+ __wt_update_vector_peek(&updates, &prev_upd);
+
+ if (out_of_order_ts_updates.size > 0) {
+ __wt_update_vector_peek(&out_of_order_ts_updates, &out_of_order_ts_upd);
+ } else
+ out_of_order_ts_upd = NULL;
+
+ if (out_of_order_ts_upd != NULL && out_of_order_ts_upd->start_ts < upd->start_ts) {
+ tw.durable_start_ts = out_of_order_ts_upd->durable_ts;
+ tw.start_ts = out_of_order_ts_upd->start_ts;
+ } else {
+ tw.durable_start_ts = upd->durable_ts;
+ tw.start_ts = upd->start_ts;
+ }
+ tw.start_txn = upd->txnid;
/*
* For any uncommitted prepared updates written to disk, the stop timestamp of the last
@@ -531,8 +528,30 @@ __wt_hs_insert_updates(
* timestamp is globally visible. i.e. durable timestamp of data store version.
*/
WT_ASSERT(session, prev_upd->start_ts <= prev_upd->durable_ts);
- tw.durable_stop_ts = prev_upd->durable_ts;
- tw.stop_ts = prev_upd->start_ts;
+
+ /*
+ * Pop from the out of order timestamp updates stack if the previous update or the
+ * current update is at the head of the stack. We need to check both cases because
+ * if there is a tombstone older than the out of order timestamp, we would not pop
+ * it because we skip the tombstone. Pop it when we are inserting it instead.
+ */
+ if (out_of_order_ts_upd != NULL &&
+ ((out_of_order_ts_upd->txnid == prev_upd->txnid &&
+ out_of_order_ts_upd->start_ts == prev_upd->start_ts) ||
+ (out_of_order_ts_upd->txnid == upd->txnid &&
+ out_of_order_ts_upd->start_ts == upd->start_ts))) {
+ __wt_update_vector_pop(&out_of_order_ts_updates, &out_of_order_ts_upd);
+ out_of_order_ts_upd = NULL;
+ }
+
+ if (out_of_order_ts_upd != NULL &&
+ out_of_order_ts_upd->start_ts < prev_upd->start_ts) {
+ tw.durable_stop_ts = out_of_order_ts_upd->durable_ts;
+ tw.stop_ts = out_of_order_ts_upd->start_ts;
+ } else {
+ tw.durable_stop_ts = prev_upd->durable_ts;
+ tw.stop_ts = prev_upd->start_ts;
+ }
tw.stop_txn = prev_upd->txnid;
if (prev_upd->type == WT_UPDATE_TOMBSTONE)
@@ -540,7 +559,7 @@ __wt_hs_insert_updates(
}
WT_ERR(
- __hs_next_upd_full_value(session, &modifies, full_value, prev_full_value, &prev_upd));
+ __hs_next_upd_full_value(session, &updates, full_value, prev_full_value, &prev_upd));
/* Squash the updates from the same transaction. */
if (upd->start_ts == prev_upd->start_ts && upd->txnid == prev_upd->txnid) {
@@ -557,34 +576,6 @@ __wt_hs_insert_updates(
continue;
}
- /*
- * When we see an update older than a mixed mode update we need to insert it with a zero
- * start and stop timestamp. This means it'll still exist but only use txnid visibility
- * rules. As such older readers should still be able to see it.
- */
- if (F_ISSET(upd, WT_UPDATE_BEHIND_MIXED_MODE)) {
- tw.start_ts = tw.durable_start_ts = WT_TS_NONE;
- tw.stop_ts = tw.durable_stop_ts = WT_TS_NONE;
- }
-
- /*
- * If the time points are out of order (which can happen if the application performs
- * updates with out-of-order timestamps), so this value can never be seen, don't bother
- * inserting it. However if it was made obsolete by a mixed mode operation we still want
- * to insert it, it will be flagged as such.
- *
- * FIXME-WT-6443: We should be able to replace this with an assertion.
- */
- if (!F_ISSET(upd, WT_UPDATE_BEHIND_MIXED_MODE) &&
- (tw.stop_ts < upd->start_ts ||
- (tw.stop_ts == upd->start_ts && tw.stop_txn <= upd->txnid))) {
- __wt_verbose(session, WT_VERB_TIMESTAMP,
- "Warning: fixing out-of-order timestamps %s earlier than previous update %s",
- __wt_timestamp_to_string(tw.stop_ts, ts_string[0]),
- __wt_timestamp_to_string(upd->start_ts, ts_string[1]));
- continue;
- }
-
/* We should never write a prepared update to the history store. */
WT_ASSERT(session,
upd->prepare_state != WT_PREPARE_INPROGRESS &&
@@ -646,8 +637,24 @@ __wt_hs_insert_updates(
}
}
- if (modifies.size > 0)
+ /* If we squash the onpage value, there may be one or more updates left in the stack. */
+ if (updates.size > 0)
WT_STAT_CONN_DATA_INCR(session, cache_hs_write_squash);
+
+ __wt_update_vector_clear(&updates);
+ /*
+ * In the case that the onpage value is an out of order timestamp update and the update
+ * older than it is a tombstone, it remains in the stack. Clean it up.
+ */
+ WT_ASSERT(session, out_of_order_ts_updates.size <= 1);
+#ifdef HAVE_DIAGNOSTIC
+ if (out_of_order_ts_updates.size == 1) {
+ __wt_update_vector_peek(&out_of_order_ts_updates, &upd);
+ WT_ASSERT(session,
+ upd->txnid == list->onpage_upd->txnid && upd->start_ts == list->onpage_upd->start_ts);
+ }
+#endif
+ __wt_update_vector_clear(&out_of_order_ts_updates);
}
WT_ERR(__wt_block_manager_named_size(session, WT_HS_FILE, &hs_size));
@@ -671,7 +678,8 @@ err:
/* modify_value is allocated in __wt_modify_pack. Free it if it is allocated. */
if (modify_value != NULL)
__wt_scr_free(session, &modify_value);
- __wt_modify_vector_free(&modifies);
+ __wt_update_vector_free(&updates);
+ __wt_update_vector_free(&out_of_order_ts_updates);
__wt_scr_free(session, &full_value);
__wt_scr_free(session, &prev_full_value);
@@ -681,44 +689,59 @@ err:
/*
* __wt_hs_delete_key_from_ts --
- * Delete history store content of a given key from a timestamp.
+ * Delete history store content of a given key from a timestamp and optionally reinsert them
+ * with ts-1 timestamp.
*/
int
__wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
const WT_ITEM *key, wt_timestamp_t ts, bool reinsert)
{
WT_DECL_RET;
- bool hs_read_committed;
+ WT_ITEM hs_key;
+ wt_timestamp_t hs_ts;
+ uint64_t hs_counter;
+ uint32_t hs_btree_id;
+ bool hs_read_all_flag;
+
+ /*
+ * If we will delete all the updates of the key from the history store, we should not reinsert
+ * any update.
+ */
+ WT_ASSERT(session, ts > WT_TS_NONE || !reinsert);
- hs_read_committed = F_ISSET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
- if (!hs_read_committed)
- F_SET(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
+ hs_read_all_flag = F_ISSET(hs_cursor, WT_CURSTD_HS_READ_ALL);
hs_cursor->set_key(hs_cursor, 3, btree_id, key, ts);
+ F_SET(hs_cursor, WT_CURSTD_HS_READ_ALL);
WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_after(session, hs_cursor), true);
/* Empty history store is fine. */
if (ret == WT_NOTFOUND) {
ret = 0;
goto done;
+ } else {
+ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter));
+ ++hs_counter;
}
- WT_ERR(__hs_delete_key_from_pos(session, hs_cursor, btree_id, key, reinsert));
+ WT_ERR(
+ __hs_delete_reinsert_from_pos(session, hs_cursor, btree_id, key, ts, reinsert, &hs_counter));
done:
err:
- if (!hs_read_committed)
- F_CLR(hs_cursor, WT_CURSTD_HS_READ_COMMITTED);
+ if (!hs_read_all_flag)
+ F_CLR(hs_cursor, WT_CURSTD_HS_READ_ALL);
return (ret);
}
/*
- * __hs_fixup_out_of_order_from_pos --
- * Fixup existing out-of-order updates in the history store. This function works by looking
- * ahead of the current cursor position for entries for the same key, removing them and
- * reinserting them at the timestamp that is currently being inserted.
+ * __hs_delete_reinsert_from_pos --
+ * Delete updates in the history store if the start timestamp of the update is larger or equal
+ * to the specified timestamp and optionally reinsert them with ts-1 timestamp. This function
+ * works by looking ahead of the current cursor position for entries for the same key, removing
+ * them.
*/
static int
-__hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_BTREE *btree,
- const WT_ITEM *key, wt_timestamp_t ts, uint64_t *counter)
+__hs_delete_reinsert_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
+ const WT_ITEM *key, wt_timestamp_t ts, bool reinsert, uint64_t *counter)
{
WT_CURSOR *hs_insert_cursor;
WT_CURSOR_BTREE *hs_cbt;
@@ -741,24 +764,21 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
#ifndef HAVE_DIAGNOSTIC
WT_UNUSED(key);
#endif
- /*
- * Position ourselves at the beginning of the key range that we may have to fixup. Prior to
- * getting here, we've positioned our cursor at the end of a key/timestamp range and then done a
- * "next". Normally that would leave us pointing at higher timestamps for the same key (if any)
- * but in the case where our insertion timestamp is the lowest for that key, our cursor may be
- * pointing at the previous key and can potentially race with additional key insertions. We need
- * to keep doing "next" until we've got a key greater than the one we attempted to position
- * ourselves with.
- */
+
+ /* If we will delete all the updates of the key from the history store, we should not reinsert
+ * any update. */
+ WT_ASSERT(session, ts > WT_TS_NONE || !reinsert);
+
for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
/* We shouldn't have crossed the btree and user key search space. */
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter));
- WT_ASSERT(session, hs_btree_id == btree->id);
+ WT_ASSERT(session, hs_btree_id == btree_id);
#ifdef HAVE_DIAGNOSTIC
WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
WT_ASSERT(session, cmp == 0);
#endif
- if (hs_ts > ts)
+ /* We find a key that is larger or equal to the specified timestamp*/
+ if (hs_ts >= ts)
break;
}
if (ret == WT_NOTFOUND)
@@ -766,7 +786,7 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
WT_ERR(ret);
/*
- * The goal of this fixup function is to move out-of-order content to maintain ordering in the
+ * The goal of this function is to move out-of-order content to maintain ordering in the
* history store. We do this by removing content with higher timestamps and reinserting it
* behind (from search's point of view) the newly inserted update. Even though these updates
* will all have the same timestamp, they cannot be discarded since older readers may need to
@@ -784,11 +804,24 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
* 2 foo 3 1 bbb
* 2 foo 3 2 ccc
* 2 foo 3 3 ddd
+ *
+ * Another example, if we're inserting an update at timestamp 0 with value ddd:
+ * btree key ts counter value
+ * 2 foo 5 0 aaa
+ * 2 foo 6 0 bbb
+ * 2 foo 7 0 ccc
+ *
+ * We want to end up with this:
+ * btree key ts counter value
+ * 2 foo 0 0 aaa
+ * 2 foo 0 1 bbb
+ * 2 foo 0 2 ccc
+ * 2 foo 0 3 ddd
*/
for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
/* We shouldn't have crossed the btree and user key search space. */
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_ts, &hs_counter));
- WT_ASSERT(session, hs_btree_id == btree->id);
+ WT_ASSERT(session, hs_btree_id == btree_id);
#ifdef HAVE_DIAGNOSTIC
WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
WT_ASSERT(session, cmp == 0);
@@ -796,170 +829,70 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
/*
* If we got here, we've got out-of-order updates in the history store.
*
- * Our strategy to rectify this is to remove all records for the same key with a higher
- * timestamp than the one that we're inserting on and reinsert them at the same timestamp
- * that we're inserting with.
+ * Our strategy to rectify this is to remove all records for the same key with a timestamp
+ * higher or equal than the specified timestamp and reinsert them at the smaller timestamp,
+ * which is the timestamp of the update we are about to insert to the history store.
*/
- WT_ASSERT(session, hs_ts > ts);
-
- /*
- * Don't incur the overhead of opening this new cursor unless we need it. In the regular
- * case, we'll never get here.
- */
- if (hs_insert_cursor == NULL)
- WT_ERR(__wt_curhs_open(session, NULL, &hs_insert_cursor));
-
- /*
- * If these history store records are resolved prepared updates, their durable timestamps
- * will be clobbered by our fix-up process. Keep track of how often this is happening.
- */
- if (hs_cbt->upd_value->tw.start_ts != hs_cbt->upd_value->tw.durable_start_ts ||
- hs_cbt->upd_value->tw.stop_ts != hs_cbt->upd_value->tw.durable_stop_ts)
- WT_STAT_CONN_DATA_INCR(session, cache_hs_order_lose_durable_timestamp);
-
- __wt_verbose(session, WT_VERB_TIMESTAMP,
- "fixing existing out-of-order updates by moving them; start_ts=%s, durable_start_ts=%s, "
- "stop_ts=%s, durable_stop_ts=%s, new_ts=%s",
- __wt_timestamp_to_string(hs_cbt->upd_value->tw.start_ts, ts_string[0]),
- __wt_timestamp_to_string(hs_cbt->upd_value->tw.durable_start_ts, ts_string[1]),
- __wt_timestamp_to_string(hs_cbt->upd_value->tw.stop_ts, ts_string[2]),
- __wt_timestamp_to_string(hs_cbt->upd_value->tw.durable_stop_ts, ts_string[3]),
- __wt_timestamp_to_string(ts, ts_string[4]));
-
- hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = ts;
- hs_insert_tw.start_txn = hs_cbt->upd_value->tw.start_txn;
-
- /*
- * We're going to be inserting something immediately after with the same timestamp. Either
- * another moved update OR the update itself that triggered the correction. In either case,
- * we should preserve the stop transaction id.
- */
- hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = ts;
- hs_insert_tw.stop_txn = hs_cbt->upd_value->tw.stop_txn;
-
- WT_ASSERT(session, hs_insert_tw.stop_txn >= hs_insert_tw.start_txn);
-
- /* Extract the underlying value for reinsertion. */
- WT_ERR(hs_cursor->get_value(
- hs_cursor, &tw.durable_stop_ts, &tw.durable_start_ts, &hs_upd_type, &hs_value));
-
- /* Insert the value back with different timestamps. */
- hs_insert_cursor->set_key(hs_insert_cursor, 4, btree->id, &hs_key, ts, *counter);
- hs_insert_cursor->set_value(hs_insert_cursor, &hs_insert_tw, hs_insert_tw.durable_stop_ts,
- hs_insert_tw.durable_start_ts, (uint64_t)hs_upd_type, &hs_value);
- WT_ERR(hs_insert_cursor->insert(hs_insert_cursor));
- ++(*counter);
+ WT_ASSERT(session, hs_ts >= ts);
- /* Delete the entry with higher timestamp. */
- WT_ERR(hs_cursor->remove(hs_cursor));
- WT_STAT_CONN_INCR(session, cache_hs_order_fixup_move);
- WT_STAT_DATA_INCR(session, cache_hs_order_fixup_move);
- }
- if (ret == WT_NOTFOUND)
- ret = 0;
-err:
- if (hs_insert_cursor != NULL)
- hs_insert_cursor->close(hs_insert_cursor);
- return (ret);
-}
-
-/*
- * __hs_delete_key_from_pos --
- * Delete an entire key's worth of data in the history store. If we chose to reinsert the values
- * the reinserted values will have 0 start and stop timestamps to ensure that they only use
- * txnid based visibility rules.
- */
-static int
-__hs_delete_key_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, uint32_t btree_id,
- const WT_ITEM *key, bool reinsert)
-{
- WT_CURSOR *hs_insert_cursor;
- WT_CURSOR_BTREE *hs_cbt;
- WT_DECL_RET;
- WT_ITEM hs_key, hs_value;
- WT_TIME_WINDOW hs_insert_tw;
- wt_timestamp_t durable_timestamp, hs_start_ts, hs_stop_durable_ts;
- uint64_t hs_counter, hs_insert_counter, hs_upd_type;
- uint32_t hs_btree_id;
-
- hs_cbt = __wt_curhs_get_cbt(hs_cursor);
- hs_insert_counter = 0;
- WT_CLEAR(hs_key);
- WT_CLEAR(hs_value);
-
- hs_insert_cursor = NULL;
- if (reinsert) {
- /*
- * Determine the starting value of our counter, i.e. highest counter value of the timestamp
- * range for timestamp 0. We'll be inserting at timestamp 0 and don't want to overwrite a
- * currently existing counter.
- *
- * The cursor will also be positioned at the start of the range that we wish to start
- * inserting.
- */
- WT_WITHOUT_DHANDLE(session, ret = __wt_curhs_open(session, NULL, &hs_insert_cursor));
- WT_ERR(ret);
- F_SET(hs_insert_cursor, WT_CURSTD_HS_READ_COMMITTED);
- hs_insert_cursor->set_key(hs_insert_cursor, 4, btree_id, key, WT_TS_NONE, UINT64_MAX);
- WT_ERR_NOTFOUND_OK(__wt_curhs_search_near_before(session, hs_insert_cursor), true);
-
- if (ret == WT_NOTFOUND) {
- hs_insert_counter = 0;
- ret = 0;
- } else {
- WT_ERR(hs_insert_cursor->get_key(
- hs_insert_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_insert_counter));
- WT_ASSERT(session, hs_start_ts == WT_TS_NONE);
+ if (reinsert) {
/*
- * Increment the history store counter that we'll be using to insert with to avoid
- * overwriting the record we just found.
+ * Don't incur the overhead of opening this new cursor unless we need it. In the regular
+ * case, we'll never get here.
*/
- hs_insert_counter++;
- }
- }
-
- /* Begin iterating over the range of entries we expect to replace. */
- for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
- WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
+ if (hs_insert_cursor == NULL)
+ WT_ERR(__wt_curhs_open(session, NULL, &hs_insert_cursor));
- if (reinsert) {
- WT_ERR(hs_cursor->get_value(
- hs_cursor, &hs_stop_durable_ts, &durable_timestamp, &hs_upd_type, &hs_value));
-
- /* Reinsert entry with zero timestamp. */
- hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = WT_TS_NONE;
+ /*
+ * If these history store records are resolved prepared updates, their durable
+ * timestamps will be clobbered by our fix-up process. Keep track of how often this is
+ * happening.
+ */
+ if (hs_cbt->upd_value->tw.start_ts != hs_cbt->upd_value->tw.durable_start_ts ||
+ hs_cbt->upd_value->tw.stop_ts != hs_cbt->upd_value->tw.durable_stop_ts)
+ WT_STAT_CONN_DATA_INCR(session, cache_hs_order_lose_durable_timestamp);
+
+ __wt_verbose(session, WT_VERB_TIMESTAMP,
+ "fixing existing out-of-order updates by moving them; start_ts=%s, "
+ "durable_start_ts=%s, "
+ "stop_ts=%s, durable_stop_ts=%s, new_ts=%s",
+ __wt_timestamp_to_string(hs_cbt->upd_value->tw.start_ts, ts_string[0]),
+ __wt_timestamp_to_string(hs_cbt->upd_value->tw.durable_start_ts, ts_string[1]),
+ __wt_timestamp_to_string(hs_cbt->upd_value->tw.stop_ts, ts_string[2]),
+ __wt_timestamp_to_string(hs_cbt->upd_value->tw.durable_stop_ts, ts_string[3]),
+ __wt_timestamp_to_string(ts, ts_string[4]));
+
+ hs_insert_tw.start_ts = hs_insert_tw.durable_start_ts = ts - 1;
hs_insert_tw.start_txn = hs_cbt->upd_value->tw.start_txn;
- hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = WT_TS_NONE;
+ /*
+ * We're going to insert something immediately after with the smaller timestamp. Either
+ * another moved update OR the update itself triggered the correction. In either case,
+ * we should preserve the stop transaction id.
+ */
+ hs_insert_tw.stop_ts = hs_insert_tw.durable_stop_ts = ts - 1;
hs_insert_tw.stop_txn = hs_cbt->upd_value->tw.stop_txn;
+ /* Extract the underlying value for reinsertion. */
+ WT_ERR(hs_cursor->get_value(
+ hs_cursor, &tw.durable_stop_ts, &tw.durable_start_ts, &hs_upd_type, &hs_value));
+
+ /* Insert the value back with different timestamps. */
hs_insert_cursor->set_key(
- hs_insert_cursor, 4, btree_id, key, WT_TS_NONE, hs_insert_counter);
- hs_insert_cursor->set_value(hs_insert_cursor, &hs_insert_tw, WT_TS_NONE, WT_TS_NONE,
- (uint64_t)hs_upd_type, &hs_value);
+ hs_insert_cursor, 4, btree_id, &hs_key, hs_insert_tw.start_ts, *counter);
+ hs_insert_cursor->set_value(hs_insert_cursor, &hs_insert_tw,
+ hs_insert_tw.durable_stop_ts, hs_insert_tw.durable_start_ts, (uint64_t)hs_upd_type,
+ &hs_value);
WT_ERR(hs_insert_cursor->insert(hs_insert_cursor));
- WT_STAT_CONN_INCR(session, cache_hs_insert);
- WT_STAT_DATA_INCR(session, cache_hs_insert);
-
- hs_insert_counter++;
+ ++(*counter);
+ WT_STAT_CONN_INCR(session, cache_hs_order_reinsert);
+ WT_STAT_DATA_INCR(session, cache_hs_order_reinsert);
}
- /*
- * Remove the key using history store cursor interface.
- *
- * If anything fails after this point and we're reinserting we need to panic as it will
- * leave our history store in an unexpected state with duplicate entries.
- */
- if ((ret = hs_cursor->remove(hs_cursor)) != 0) {
- if (reinsert)
- WT_ERR_PANIC(session, WT_PANIC,
- "Failed to insert tombstone, history store now "
- " contains duplicate values.");
- else
- WT_ERR(ret);
- }
- WT_STAT_CONN_INCR(session, cache_hs_key_truncate);
- WT_STAT_DATA_INCR(session, cache_hs_key_truncate);
+ /* Delete the out-of-order entry. */
+ WT_ERR(hs_cursor->remove(hs_cursor));
+ WT_STAT_CONN_INCR(session, cache_hs_order_remove);
+ WT_STAT_DATA_INCR(session, cache_hs_order_remove);
}
if (ret == WT_NOTFOUND)
ret = 0;
diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h
index 3b0370f63dd..b8a982e1713 100644
--- a/src/third_party/wiredtiger/src/include/block.h
+++ b/src/third_party/wiredtiger/src/include/block.h
@@ -185,6 +185,7 @@ struct __wt_bm {
int (*compact_skip)(WT_BM *, WT_SESSION_IMPL *, bool *);
int (*compact_start)(WT_BM *, WT_SESSION_IMPL *);
int (*corrupt)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
+ int (*flush_tier)(WT_BM *, WT_SESSION_IMPL *, uint8_t **, size_t *);
int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *);
int (*map_discard)(WT_BM *, WT_SESSION_IMPL *, void *, size_t);
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 79297af1743..24562280ac1 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -1111,14 +1111,13 @@ struct __wt_update {
volatile uint8_t prepare_state; /* prepare state */
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_UPDATE_BEHIND_MIXED_MODE 0x01u /* Update that older than a mixed mode update. */
-#define WT_UPDATE_CLEARED_HS 0x02u /* Update that cleared the history store. */
-#define WT_UPDATE_DS 0x04u /* Update has been written to the data store. */
-#define WT_UPDATE_HS 0x08u /* Update has been written to history store. */
-#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x10u /* Prepared update restored from data store. */
-#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x20u /* Fast truncate instantiation */
-#define WT_UPDATE_RESTORED_FROM_DS 0x40u /* Update restored from data store. */
-#define WT_UPDATE_RESTORED_FROM_HS 0x80u /* Update restored from history store. */
+#define WT_UPDATE_DS 0x01u /* Update has been written to the data store. */
+#define WT_UPDATE_FIXED_HS 0x02u /* Update that fixed the history store. */
+#define WT_UPDATE_HS 0x04u /* Update has been written to history store. */
+#define WT_UPDATE_PREPARE_RESTORED_FROM_DS 0x08u /* Prepared update restored from data store. */
+#define WT_UPDATE_RESTORED_FAST_TRUNCATE 0x10u /* Fast truncate instantiation */
+#define WT_UPDATE_RESTORED_FROM_DS 0x20u /* Update restored from data store. */
+#define WT_UPDATE_RESTORED_FROM_HS 0x40u /* Update restored from history store. */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint8_t flags;
@@ -1187,17 +1186,17 @@ struct __wt_update_value {
* avoid heap allocation, add a few additional slots to that array.
*/
#define WT_MAX_MODIFY_UPDATE 10
-#define WT_MODIFY_VECTOR_STACK_SIZE (WT_MAX_MODIFY_UPDATE + 10)
+#define WT_UPDATE_VECTOR_STACK_SIZE 20
/*
- * WT_MODIFY_VECTOR --
- * A resizable array for storing modify updates. The allocation strategy is similar to that of
+ * WT_UPDATE_VECTOR --
+ * A resizable array for storing updates. The allocation strategy is similar to that of
* llvm::SmallVector<T> where we keep space on the stack for the regular case but fall back to
* dynamic allocation as needed.
*/
-struct __wt_modify_vector {
+struct __wt_update_vector {
WT_SESSION_IMPL *session;
- WT_UPDATE *list[WT_MODIFY_VECTOR_STACK_SIZE];
+ WT_UPDATE *list[WT_UPDATE_VECTOR_STACK_SIZE];
WT_UPDATE **listp;
size_t allocated_bytes;
size_t size;
diff --git a/src/third_party/wiredtiger/src/include/btree_cmp_inline.h b/src/third_party/wiredtiger/src/include/btree_cmp_inline.h
index 18d8a8e5158..0c7eaf9fdb9 100644
--- a/src/third_party/wiredtiger/src/include/btree_cmp_inline.h
+++ b/src/third_party/wiredtiger/src/include/btree_cmp_inline.h
@@ -23,11 +23,12 @@
* __wt_lex_compare --
* Lexicographic comparison routine. Returns: < 0 if user_item is lexicographically < tree_item
* = 0 if user_item is lexicographically = tree_item > 0 if user_item is lexicographically >
- * tree_item We use the names "user" and "tree" so it's clear in the btree code which the
- * application is looking at when we call its comparison function.
+ * tree_item. We use the names "user" and "tree" so it's clear in the btree code which the
+ * application is looking at when we call its comparison function. If prefix is specified, 0 can
+ * be returned when the user_item is equal to the tree_item for the minimum size.
*/
static inline int
-__wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item)
+__wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item, bool prefix)
{
size_t len, usz, tsz;
const uint8_t *userp, *treep;
@@ -92,7 +93,7 @@ __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item)
return (*userp < *treep ? -1 : 1);
/* Contents are equal up to the smallest length. */
- return ((usz == tsz) ? 0 : (usz < tsz) ? -1 : 1);
+ return ((usz == tsz || prefix) ? 0 : (usz < tsz) ? -1 : 1);
}
/*
@@ -104,13 +105,23 @@ __wt_compare(WT_SESSION_IMPL *session, WT_COLLATOR *collator, const WT_ITEM *use
const WT_ITEM *tree_item, int *cmpp)
{
if (collator == NULL) {
- *cmpp = __wt_lex_compare(user_item, tree_item);
+ *cmpp = __wt_lex_compare(user_item, tree_item, false);
return (0);
}
return (collator->compare(collator, &session->iface, user_item, tree_item, cmpp));
}
/*
+ * __wt_prefix_match --
+ * Check if the prefix item is equal to the leading bytes of the tree item.
+ */
+static inline int
+__wt_prefix_match(const WT_ITEM *prefix, const WT_ITEM *tree_item)
+{
+ return (__wt_lex_compare(prefix, tree_item, true));
+}
+
+/*
* __wt_lex_compare_skip --
* Lexicographic comparison routine, skipping leading bytes. Returns: < 0 if user_item is
* lexicographically < tree_item = 0 if user_item is lexicographically = tree_item > 0 if
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index f2899eda401..61f1e9f6f1b 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -246,7 +246,7 @@ struct __wt_cache {
#define WT_WITH_PASS_LOCK(session, op) \
do { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_PASS)); \
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_PASS)); \
WT_WITH_LOCK_WAIT(session, &cache->evict_pass_lock, WT_SESSION_LOCKED_PASS, op); \
} while (0)
diff --git a/src/third_party/wiredtiger/src/include/cache_inline.h b/src/third_party/wiredtiger/src/include/cache_inline.h
index 7bfa9dd70cd..866c19d6172 100644
--- a/src/third_party/wiredtiger/src/include/cache_inline.h
+++ b/src/third_party/wiredtiger/src/include/cache_inline.h
@@ -260,7 +260,8 @@ __wt_session_can_wait(WT_SESSION_IMPL *session)
* LSM sets the "ignore cache size" flag when holding the LSM tree lock, in that case, or when
* holding the schema lock, we don't want this thread to block for eviction.
*/
- return (!F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_LOCKED_SCHEMA));
+ return (!(F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE) ||
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA)));
}
/*
@@ -498,9 +499,9 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool readonly, bo
* holding the handle list, schema or table locks (which can block checkpoints and eviction),
* don't block the thread for eviction.
*/
- if (F_ISSET(session,
- WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA |
- WT_SESSION_LOCKED_TABLE))
+ if (F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE) ||
+ FLD_ISSET(session->lock_flags,
+ WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA | WT_SESSION_LOCKED_TABLE))
return (0);
/* In memory configurations don't block when the cache is full. */
diff --git a/src/third_party/wiredtiger/src/include/config.h b/src/third_party/wiredtiger/src/include/config.h
index 9830801a01e..3b092857ed3 100644
--- a/src/third_party/wiredtiger/src/include/config.h
+++ b/src/third_party/wiredtiger/src/include/config.h
@@ -100,12 +100,14 @@ struct __wt_config_parser_impl {
#define WT_CONFIG_ENTRY_file_meta 46
#define WT_CONFIG_ENTRY_index_meta 47
#define WT_CONFIG_ENTRY_lsm_meta 48
-#define WT_CONFIG_ENTRY_table_meta 49
-#define WT_CONFIG_ENTRY_tiered_meta 50
-#define WT_CONFIG_ENTRY_wiredtiger_open 51
-#define WT_CONFIG_ENTRY_wiredtiger_open_all 52
-#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 53
-#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 54
+#define WT_CONFIG_ENTRY_object_meta 49
+#define WT_CONFIG_ENTRY_table_meta 50
+#define WT_CONFIG_ENTRY_tier_meta 51
+#define WT_CONFIG_ENTRY_tiered_meta 52
+#define WT_CONFIG_ENTRY_wiredtiger_open 53
+#define WT_CONFIG_ENTRY_wiredtiger_open_all 54
+#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 55
+#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 56
/*
* configuration section: END
* DO NOT EDIT: automatically built by dist/flags.py.
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index 61bbe022371..fc40f05e0d5 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -33,10 +33,11 @@ extern WT_PROCESS __wt_process;
/*
* WT_BUCKET_STORAGE --
- * A list entry for a storage source with a unique (name, bucket).
+ * A list entry for a storage source with a unique name (bucket, prefix).
*/
struct __wt_bucket_storage {
- const char *bucket; /* Bucket location */
+ const char *bucket; /* Bucket name */
+ const char *bucket_prefix; /* Bucket prefix */
int owned; /* Storage needs to be terminated */
uint64_t object_size; /* Tiered object size */
uint64_t retain_secs; /* Tiered period */
@@ -53,6 +54,15 @@ struct __wt_bucket_storage {
uint32_t flags;
};
+/* Call a function with the bucket storage and its associated file system. */
+#define WT_WITH_BUCKET_STORAGE(bsto, s, e) \
+ do { \
+ WT_BUCKET_STORAGE *__saved_bstorage = (s)->bucket_storage; \
+ (s)->bucket_storage = ((bsto) == NULL ? S2C(s)->bstorage : (bsto)); \
+ e; \
+ (s)->bucket_storage = __saved_bstorage; \
+ } while (0)
+
/*
* WT_KEYED_ENCRYPTOR --
* A list entry for an encryptor with a unique (name, keyid).
@@ -156,22 +166,22 @@ struct __wt_name_flag {
* Macros to ensure the dhandle is inserted or removed from both the main queue and the hashed
* queue.
*/
-#define WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket) \
- do { \
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
- TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \
- TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \
- ++(conn)->dh_bucket_count[bucket]; \
- ++(conn)->dhandle_count; \
+#define WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket) \
+ do { \
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
+ TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \
+ TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \
+ ++(conn)->dh_bucket_count[bucket]; \
+ ++(conn)->dhandle_count; \
} while (0)
-#define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) \
- do { \
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
- TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \
- TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \
- --(conn)->dh_bucket_count[bucket]; \
- --(conn)->dhandle_count; \
+#define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) \
+ do { \
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \
+ TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \
+ TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \
+ --(conn)->dh_bucket_count[bucket]; \
+ --(conn)->dhandle_count; \
} while (0)
/*
@@ -378,7 +388,8 @@ struct __wt_connection_impl {
WT_LSM_MANAGER lsm_manager; /* LSM worker thread information */
- WT_BUCKET_STORAGE *bstorage; /* Bucket storage for the connection */
+ WT_BUCKET_STORAGE *bstorage; /* Bucket storage for the connection */
+ WT_BUCKET_STORAGE bstorage_none; /* Bucket storage for "none" */
WT_KEYED_ENCRYPTOR *kencryptor; /* Encryptor for metadata and log */
@@ -406,11 +417,13 @@ struct __wt_connection_impl {
wt_thread_t tiered_tid; /* Tiered thread */
bool tiered_tid_set; /* Tiered thread set */
WT_CONDVAR *tiered_cond; /* Tiered wait mutex */
+ bool tiered_server_running; /* Internal tiered server operating */
- const char *tiered_cluster; /* Tiered storage cluster name */
- const char *tiered_member; /* Tiered storage member name */
- WT_TIERED_MANAGER tiered_manager; /* Tiered worker thread information */
- bool tiered_server_running; /* Internal tiered server operating */
+ WT_TIERED_MANAGER tiered_mgr; /* Tiered manager thread information */
+ WT_SESSION_IMPL *tiered_mgr_session; /* Tiered manager thread session */
+ wt_thread_t tiered_mgr_tid; /* Tiered manager thread */
+ bool tiered_mgr_tid_set; /* Tiered manager thread set */
+ WT_CONDVAR *tiered_mgr_cond; /* Tiered manager wait mutex */
uint32_t tiered_threads_max; /* Max tiered threads */
uint32_t tiered_threads_min; /* Min tiered threads */
@@ -613,6 +626,7 @@ struct __wt_connection_impl {
#define WT_CONN_SERVER_STATISTICS 0x10u
#define WT_CONN_SERVER_SWEEP 0x20u
#define WT_CONN_SERVER_TIERED 0x40u
+#define WT_CONN_SERVER_TIERED_MGR 0x80u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint32_t server_flags;
diff --git a/src/third_party/wiredtiger/src/include/dhandle.h b/src/third_party/wiredtiger/src/include/dhandle.h
index 51357bd03c6..967d0b08be4 100644
--- a/src/third_party/wiredtiger/src/include/dhandle.h
+++ b/src/third_party/wiredtiger/src/include/dhandle.h
@@ -42,17 +42,17 @@
#define WT_DHANDLE_RELEASE(dhandle) (void)__wt_atomic_sub32(&(dhandle)->session_ref, 1)
-#define WT_DHANDLE_NEXT(session, dhandle, head, field) \
- do { \
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \
- if ((dhandle) == NULL) \
- (dhandle) = TAILQ_FIRST(head); \
- else { \
- WT_DHANDLE_RELEASE(dhandle); \
- (dhandle) = TAILQ_NEXT(dhandle, field); \
- } \
- if ((dhandle) != NULL) \
- WT_DHANDLE_ACQUIRE(dhandle); \
+#define WT_DHANDLE_NEXT(session, dhandle, head, field) \
+ do { \
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST)); \
+ if ((dhandle) == NULL) \
+ (dhandle) = TAILQ_FIRST(head); \
+ else { \
+ WT_DHANDLE_RELEASE(dhandle); \
+ (dhandle) = TAILQ_NEXT(dhandle, field); \
+ } \
+ if ((dhandle) != NULL) \
+ WT_DHANDLE_ACQUIRE(dhandle); \
} while (0)
/*
@@ -84,7 +84,12 @@ struct __wt_data_handle {
WT_DATA_SOURCE *dsrc; /* Data source for this handle */
void *handle; /* Generic handle */
- enum { WT_DHANDLE_TYPE_BTREE, WT_DHANDLE_TYPE_TABLE, WT_DHANDLE_TYPE_TIERED } type;
+ enum {
+ WT_DHANDLE_TYPE_BTREE,
+ WT_DHANDLE_TYPE_TABLE,
+ WT_DHANDLE_TYPE_TIERED,
+ WT_DHANDLE_TYPE_TIERED_TREE
+ } type;
bool compact_skip; /* If the handle failed to compact */
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index ef14f73ccfc..fb5c8e361ba 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -175,8 +175,6 @@ extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mappe
extern int __wt_block_misplaced(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *list,
wt_off_t offset, uint32_t size, bool live, const char *func, int line)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_block_newfile(WT_SESSION_IMPL *session, WT_BLOCK *block)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_off_free(WT_SESSION_IMPL *session, WT_BLOCK *block, uint32_t logid,
wt_off_t offset, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el,
@@ -199,6 +197,12 @@ extern int __wt_block_salvage_valid(WT_SESSION_IMPL *session, WT_BLOCK *block, u
size_t addr_size, bool valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_size_alloc(WT_SESSION_IMPL *session, WT_SIZE **szp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_tiered_flush(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ uint8_t **flush_cookie, size_t *cookie_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_tiered_load(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_block_tiered_newfile(WT_SESSION_IMPL *session, WT_BLOCK *block)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region,
@@ -262,10 +266,14 @@ extern int __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentr
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_next_prefix(WT_CURSOR_BTREE *cbt, WT_ITEM *prefix, bool truncating)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_btcur_prev_prefix(WT_CURSOR_BTREE *cbt, WT_ITEM *prefix, bool truncating)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt, bool positioned)
@@ -518,8 +526,9 @@ extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_C
const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_cache(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup,
- const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, uint64_t hash_value,
+ WT_CURSOR *to_dup, const char *cfg[], WT_CURSOR **cursorp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_cache_release(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool *released)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_cached(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1112,8 +1121,6 @@ extern int __wt_modify_pack(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries,
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_modify_reconstruct_from_upd_list(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
WT_UPDATE *upd, WT_UPDATE_VALUE *upd_value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_modify_vector_push(WT_MODIFY_VECTOR *modifies, WT_UPDATE *upd)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...)
WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 2, 3)))
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1132,8 +1139,8 @@ extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE
extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name,
- bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+ bool open_metadata, uint32_t session_flags, uint32_t session_lock_flags,
+ WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler,
const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1285,6 +1292,8 @@ extern int __wt_schema_get_table(WT_SESSION_IMPL *session, const char *name, siz
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_get_table_uri(WT_SESSION_IMPL *session, const char *uri, bool ok_incomplete,
uint32_t flags, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_get_tiered_uri(WT_SESSION_IMPL *session, const char *uri, uint32_t flags,
+ WT_TIERED **tieredp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname,
const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_internal_session(WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_sessionp)
@@ -1310,6 +1319,8 @@ extern int __wt_schema_range_truncate(WT_SESSION_IMPL *session, WT_CURSOR *start
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE **tablep)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_release_tiered(WT_SESSION_IMPL *session, WT_TIERED **tieredp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_rename(WT_SESSION_IMPL *session, const char *uri, const char *newuri,
const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_session_release(WT_SESSION_IMPL *session, WT_SESSION_IMPL *int_session)
@@ -1442,19 +1453,28 @@ extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *
extern int __wt_thread_group_resize(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group,
uint32_t new_min, uint32_t new_max, uint32_t flags)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_tiered_bucket_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval,
- WT_CONFIG_ITEM *bucket, WT_BUCKET_STORAGE **bstoragep)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_bucket_config(WT_SESSION_IMPL *session, const char *cfg[],
+ WT_BUCKET_STORAGE **bstoragep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tiered_close(WT_SESSION_IMPL *session, WT_TIERED *tiered)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_tiered_common_config(WT_SESSION_IMPL *session, const char **cfg,
- WT_BUCKET_STORAGE *bstorage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_conn_config(WT_SESSION_IMPL *session, const char **cfg, bool reconfig)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_name(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, uint64_t id,
+ uint32_t flags, const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tiered_open(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tiered_storage_create(WT_SESSION_IMPL *session, const char *cfg[], bool reconfig)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tiered_storage_destroy(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_switch(WT_SESSION_IMPL *session, const char *config)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_tree_close(WT_SESSION_IMPL *session, WT_TIERED_TREE *tiered_tree)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_tree_create(WT_SESSION_IMPL *session, const char *uri, bool exclusive,
+ bool import, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tiered_tree_open(WT_SESSION_IMPL *session, const char *cfg[])
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_time_aggregate_validate(WT_SESSION_IMPL *session, WT_TIME_AGGREGATE *ta,
WT_TIME_AGGREGATE *parent, bool silent) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_time_value_validate(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw,
@@ -1506,7 +1526,7 @@ extern int __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char **cfg)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative)
+extern int __wt_txn_is_blocking(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1554,6 +1574,8 @@ extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force
extern int __wt_unexpected_object_type(
WT_SESSION_IMPL *session, const char *uri, const char *expect) WT_GCC_FUNC_DECL_ATTRIBUTE((cold))
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_update_vector_push(WT_UPDATE_VECTOR *updates, WT_UPDATE *upd)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE_VALUE *upd_value)
@@ -1660,6 +1682,8 @@ extern void __wt_conn_foc_discard(WT_SESSION_IMPL *session);
extern void __wt_conn_stat_init(WT_SESSION_IMPL *session);
extern void __wt_connection_destroy(WT_CONNECTION_IMPL *conn);
extern void __wt_cursor_close(WT_CURSOR *cursor);
+extern void __wt_cursor_get_hash(
+ WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup, uint64_t *hash_value);
extern void __wt_cursor_key_order_reset(WT_CURSOR_BTREE *cbt);
extern void __wt_cursor_reopen(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle);
extern void __wt_cursor_set_key(WT_CURSOR *cursor, ...);
@@ -1732,11 +1756,6 @@ extern void __wt_meta_track_discard(WT_SESSION_IMPL *session);
extern void __wt_meta_track_sub_on(WT_SESSION_IMPL *session);
extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase)
WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
-extern void __wt_modify_vector_clear(WT_MODIFY_VECTOR *modifies);
-extern void __wt_modify_vector_free(WT_MODIFY_VECTOR *modifies);
-extern void __wt_modify_vector_init(WT_SESSION_IMPL *session, WT_MODIFY_VECTOR *modifies);
-extern void __wt_modify_vector_peek(WT_MODIFY_VECTOR *modifies, WT_UPDATE **updp);
-extern void __wt_modify_vector_pop(WT_MODIFY_VECTOR *modifies, WT_UPDATE **updp);
extern void __wt_optrack_flush_buffer(WT_SESSION_IMPL *s);
extern void __wt_optrack_record_funcid(
WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp);
@@ -1802,6 +1821,11 @@ extern void __wt_txn_release_resources(WT_SESSION_IMPL *session);
extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session);
extern void __wt_txn_stats_update(WT_SESSION_IMPL *session);
extern void __wt_txn_truncate_end(WT_SESSION_IMPL *session);
+extern void __wt_update_vector_clear(WT_UPDATE_VECTOR *updates);
+extern void __wt_update_vector_free(WT_UPDATE_VECTOR *updates);
+extern void __wt_update_vector_init(WT_SESSION_IMPL *session, WT_UPDATE_VECTOR *updates);
+extern void __wt_update_vector_peek(WT_UPDATE_VECTOR *updates, WT_UPDATE **updp);
+extern void __wt_update_vector_pop(WT_UPDATE_VECTOR *updates, WT_UPDATE **updp);
extern void __wt_verbose_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t ts, const char *msg);
extern void __wt_verbose_worker(WT_SESSION_IMPL *session, const char *fmt, ...)
WT_GCC_FUNC_DECL_ATTRIBUTE((format(printf, 2, 3))) WT_GCC_FUNC_DECL_ATTRIBUTE((cold));
@@ -1813,6 +1837,8 @@ static inline WT_CELL *__wt_cell_leaf_value_parse(WT_PAGE *page, WT_CELL *cell)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline WT_CURSOR_BTREE *__wt_curhs_get_cbt(WT_CURSOR *cursor)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline WT_FILE_SYSTEM *__wt_fs_file_system(WT_SESSION_IMPL *session)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline WT_IKEY *__wt_ref_key_instantiated(WT_REF *ref)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline WT_VISIBLE_TYPE __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd)
@@ -1986,7 +2012,7 @@ static inline int __wt_getline(WT_SESSION_IMPL *session, WT_FSTREAM *fstr, WT_IT
static inline int __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, size_t new_ins_size,
u_int skipdepth, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-static inline int __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item)
+static inline int __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item, bool prefix)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_lex_compare_short(const WT_ITEM *user_item, const WT_ITEM *tree_item)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -2011,6 +2037,8 @@ static inline int __wt_page_swap_func(
const char *func, int line
#endif
) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+static inline int __wt_prefix_match(const WT_ITEM *prefix, const WT_ITEM *tree_item)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_read(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len,
void *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
static inline int __wt_rec_cell_build_val(WT_SESSION_IMPL *session, WT_RECONCILE *r,
diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h
index f562e5da383..4f23f98b463 100644
--- a/src/third_party/wiredtiger/src/include/log.h
+++ b/src/third_party/wiredtiger/src/include/log.h
@@ -216,7 +216,7 @@ struct __wt_logslot {
#define WT_WITH_SLOT_LOCK(session, log, op) \
do { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); \
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT)); \
WT_WITH_LOCK_WAIT(session, &(log)->log_slot_lock, WT_SESSION_LOCKED_SLOT, op); \
} while (0)
diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h
index 01f0c6de92a..22b0de65308 100644
--- a/src/third_party/wiredtiger/src/include/meta.h
+++ b/src/third_party/wiredtiger/src/include/meta.h
@@ -70,7 +70,7 @@
*/
#define WT_WITH_TURTLE_LOCK(session, op) \
do { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_TURTLE)); \
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TURTLE)); \
WT_WITH_LOCK_WAIT(session, &S2C(session)->turtle_lock, WT_SESSION_LOCKED_TURTLE, op); \
} while (0)
diff --git a/src/third_party/wiredtiger/src/include/os_fs_inline.h b/src/third_party/wiredtiger/src/include/os_fs_inline.h
index 56d0bc2a5f3..2276f096312 100644
--- a/src/third_party/wiredtiger/src/include/os_fs_inline.h
+++ b/src/third_party/wiredtiger/src/include/os_fs_inline.h
@@ -7,6 +7,16 @@
*/
/*
+ * __wt_fs_file_system --
+ * Get the active file system handle.
+ */
+static inline WT_FILE_SYSTEM *
+__wt_fs_file_system(WT_SESSION_IMPL *session)
+{
+ return (S2FS(session));
+}
+
+/*
* __wt_fs_directory_list --
* Return a list of files from a directory.
*/
@@ -27,7 +37,7 @@ __wt_fs_directory_list(
WT_RET(__wt_filename(session, dir, &path));
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_directory_list(file_system, wt_session, path, prefix, dirlistp, countp);
@@ -56,7 +66,7 @@ __wt_fs_directory_list_single(
WT_RET(__wt_filename(session, dir, &path));
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_directory_list_single(
file_system, wt_session, path, prefix, dirlistp, countp);
@@ -77,7 +87,7 @@ __wt_fs_directory_list_free(WT_SESSION_IMPL *session, char ***dirlistp, u_int co
WT_SESSION *wt_session;
if (*dirlistp != NULL) {
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_directory_list_free(file_system, wt_session, *dirlistp, count);
}
@@ -102,7 +112,7 @@ __wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
WT_RET(__wt_filename(session, name, &path));
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_exist(file_system, wt_session, path, existp);
@@ -137,7 +147,7 @@ __wt_fs_remove(WT_SESSION_IMPL *session, const char *name, bool durable)
WT_RET(__wt_filename(session, name, &path));
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_remove(file_system, wt_session, path, durable ? WT_FS_DURABLE : 0);
@@ -176,7 +186,7 @@ __wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to, bool
WT_ERR(__wt_filename(session, from, &from_path));
WT_ERR(__wt_filename(session, to, &to_path));
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_rename(
file_system, wt_session, from_path, to_path, durable ? WT_FS_DURABLE : 0);
@@ -203,7 +213,7 @@ __wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep)
WT_RET(__wt_filename(session, name, &path));
- file_system = S2C(session)->file_system;
+ file_system = __wt_fs_file_system(session);
wt_session = (WT_SESSION *)session;
ret = file_system->fs_size(file_system, wt_session, path, sizep);
diff --git a/src/third_party/wiredtiger/src/include/schema.h b/src/third_party/wiredtiger/src/include/schema.h
index ebee4b3ca21..9d2487798b5 100644
--- a/src/third_party/wiredtiger/src/include/schema.h
+++ b/src/third_party/wiredtiger/src/include/schema.h
@@ -87,17 +87,17 @@ struct __wt_table {
* WT_WITH_LOCK_WAIT --
* Wait for a lock, perform an operation, drop the lock.
*/
-#define WT_WITH_LOCK_WAIT(session, lock, flag, op) \
- do { \
- if (F_ISSET(session, (flag))) { \
- op; \
- } else { \
- __wt_spin_lock_track(session, lock); \
- F_SET(session, (flag)); \
- op; \
- F_CLR(session, (flag)); \
- __wt_spin_unlock(session, lock); \
- } \
+#define WT_WITH_LOCK_WAIT(session, lock, flag, op) \
+ do { \
+ if (FLD_ISSET(session->lock_flags, (flag))) { \
+ op; \
+ } else { \
+ __wt_spin_lock_track(session, lock); \
+ FLD_SET(session->lock_flags, (flag)); \
+ op; \
+ FLD_CLR(session->lock_flags, (flag)); \
+ __wt_spin_unlock(session, lock); \
+ } \
} while (0)
/*
@@ -107,12 +107,12 @@ struct __wt_table {
#define WT_WITH_LOCK_NOWAIT(session, ret, lock, flag, op) \
do { \
(ret) = 0; \
- if (F_ISSET(session, (flag))) { \
+ if (FLD_ISSET(session->lock_flags, (flag))) { \
op; \
} else if (((ret) = __wt_spin_trylock_track(session, lock)) == 0) { \
- F_SET(session, (flag)); \
+ FLD_SET(session->lock_flags, (flag)); \
op; \
- F_CLR(session, (flag)); \
+ FLD_CLR(session->lock_flags, (flag)); \
__wt_spin_unlock(session, lock); \
} \
} while (0)
@@ -137,17 +137,17 @@ struct __wt_table {
* discard handles, and we only expect it to be held across short
* operations.
*/
-#define WT_WITH_HANDLE_LIST_READ_LOCK(session, op) \
- do { \
- if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) { \
- op; \
- } else { \
- __wt_readlock(session, &S2C(session)->dhandle_lock); \
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- __wt_readunlock(session, &S2C(session)->dhandle_lock); \
- } \
+#define WT_WITH_HANDLE_LIST_READ_LOCK(session, op) \
+ do { \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST)) { \
+ op; \
+ } else { \
+ __wt_readlock(session, &S2C(session)->dhandle_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ op; \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ __wt_readunlock(session, &S2C(session)->dhandle_lock); \
+ } \
} while (0)
/*
@@ -156,18 +156,19 @@ struct __wt_table {
* operation, drop the lock. The handle list lock is a read-write lock so
* the implementation is different to the other lock macros.
*/
-#define WT_WITH_HANDLE_LIST_WRITE_LOCK(session, op) \
- do { \
- if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)) { \
- op; \
- } else { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ)); \
- __wt_writelock(session, &S2C(session)->dhandle_lock); \
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- __wt_writeunlock(session, &S2C(session)->dhandle_lock); \
- } \
+#define WT_WITH_HANDLE_LIST_WRITE_LOCK(session, op) \
+ do { \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)) { \
+ op; \
+ } else { \
+ WT_ASSERT( \
+ session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ)); \
+ __wt_writelock(session, &S2C(session)->dhandle_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ op; \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ __wt_writeunlock(session, &S2C(session)->dhandle_lock); \
+ } \
} while (0)
/*
@@ -186,8 +187,8 @@ struct __wt_table {
#define WT_WITH_SCHEMA_LOCK(session, op) \
do { \
WT_ASSERT(session, \
- F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || \
- !F_ISSET(session, \
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA) || \
+ !FLD_ISSET(session->lock_flags, \
WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_NO_SCHEMA_LOCK | \
WT_SESSION_LOCKED_TABLE)); \
WT_WITH_LOCK_WAIT(session, &S2C(session)->schema_lock, WT_SESSION_LOCKED_SCHEMA, op); \
@@ -195,8 +196,8 @@ struct __wt_table {
#define WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret, op) \
do { \
WT_ASSERT(session, \
- F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || \
- !F_ISSET(session, \
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA) || \
+ !FLD_ISSET(session->lock_flags, \
WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_NO_SCHEMA_LOCK | \
WT_SESSION_LOCKED_TABLE)); \
WT_WITH_LOCK_NOWAIT( \
@@ -214,47 +215,49 @@ struct __wt_table {
* to discard handles, and we only expect it to be held across short
* operations.
*/
-#define WT_WITH_TABLE_READ_LOCK(session, op) \
- do { \
- if (F_ISSET(session, WT_SESSION_LOCKED_TABLE)) { \
- op; \
- } else { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \
- __wt_readlock(session, &S2C(session)->table_lock); \
- F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \
- __wt_readunlock(session, &S2C(session)->table_lock); \
- } \
- } while (0)
-
-#define WT_WITH_TABLE_WRITE_LOCK(session, op) \
+#define WT_WITH_TABLE_READ_LOCK(session, op) \
do { \
- if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE)) { \
op; \
} else { \
- WT_ASSERT(session, \
- !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_HANDLE_LIST)); \
- __wt_writelock(session, &S2C(session)->table_lock); \
- F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST)); \
+ __wt_readlock(session, &S2C(session)->table_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_TABLE_READ); \
op; \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- __wt_writeunlock(session, &S2C(session)->table_lock); \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_TABLE_READ); \
+ __wt_readunlock(session, &S2C(session)->table_lock); \
} \
} while (0)
-#define WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, op) \
- do { \
- WT_ASSERT(session, \
- F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE) || \
- !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_HANDLE_LIST)); \
- if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \
- op; \
- } else if (((ret) = __wt_try_writelock(session, &S2C(session)->table_lock)) == 0) { \
- F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- __wt_writeunlock(session, &S2C(session)->table_lock); \
- } \
+
+#define WT_WITH_TABLE_WRITE_LOCK(session, op) \
+ do { \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE)) { \
+ op; \
+ } else { \
+ WT_ASSERT(session, \
+ !FLD_ISSET(session->lock_flags, \
+ WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_HANDLE_LIST)); \
+ __wt_writelock(session, &S2C(session)->table_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ op; \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ __wt_writeunlock(session, &S2C(session)->table_lock); \
+ } \
+ } while (0)
+#define WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, op) \
+ do { \
+ WT_ASSERT(session, \
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE) || \
+ !FLD_ISSET( \
+ session->lock_flags, WT_SESSION_LOCKED_TABLE_READ | WT_SESSION_LOCKED_HANDLE_LIST)); \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE)) { \
+ op; \
+ } else if (((ret) = __wt_try_writelock(session, &S2C(session)->table_lock)) == 0) { \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ op; \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ __wt_writeunlock(session, &S2C(session)->table_lock); \
+ } \
} while (0)
/*
@@ -263,47 +266,47 @@ struct __wt_table {
* there is no hot backup in progress. The skipp parameter can be used to
* check whether the operation got skipped or not.
*/
-#define WT_WITH_HOTBACKUP_READ_LOCK(session, op, skipp) \
- do { \
- WT_CONNECTION_IMPL *__conn = S2C(session); \
- if ((skipp) != (bool *)NULL) \
- *(bool *)(skipp) = true; \
- if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \
- if (__conn->hot_backup_start == 0) { \
- if ((skipp) != (bool *)NULL) \
- *(bool *)(skipp) = false; \
- op; \
- } \
- } else { \
- __wt_readlock(session, &__conn->hot_backup_lock); \
- F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
- if (__conn->hot_backup_start == 0) { \
- if ((skipp) != (bool *)NULL) \
- *(bool *)(skipp) = false; \
- op; \
- } \
- F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
- __wt_readunlock(session, &__conn->hot_backup_lock); \
- } \
+#define WT_WITH_HOTBACKUP_READ_LOCK(session, op, skipp) \
+ do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ if ((skipp) != (bool *)NULL) \
+ *(bool *)(skipp) = true; \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP)) { \
+ if (__conn->hot_backup_start == 0) { \
+ if ((skipp) != (bool *)NULL) \
+ *(bool *)(skipp) = false; \
+ op; \
+ } \
+ } else { \
+ __wt_readlock(session, &__conn->hot_backup_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ if (__conn->hot_backup_start == 0) { \
+ if ((skipp) != (bool *)NULL) \
+ *(bool *)(skipp) = false; \
+ op; \
+ } \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ __wt_readunlock(session, &__conn->hot_backup_lock); \
+ } \
} while (0)
/*
* WT_WITH_HOTBACKUP_WRITE_LOCK --
* Acquire the hot backup write lock and perform an operation.
*/
-#define WT_WITH_HOTBACKUP_WRITE_LOCK(session, op) \
- do { \
- WT_CONNECTION_IMPL *__conn = S2C(session); \
- if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE)) { \
- op; \
- } else { \
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP_READ)); \
- __wt_writelock(session, &__conn->hot_backup_lock); \
- F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
- __wt_writeunlock(session, &__conn->hot_backup_lock); \
- } \
+#define WT_WITH_HOTBACKUP_WRITE_LOCK(session, op) \
+ do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_WRITE)) { \
+ op; \
+ } else { \
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_READ)); \
+ __wt_writelock(session, &__conn->hot_backup_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
+ op; \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
+ __wt_writeunlock(session, &__conn->hot_backup_lock); \
+ } \
} while (0)
/*
@@ -314,18 +317,18 @@ struct __wt_table {
* WT_WITH_HOTBACKUP_READ_LOCK which checks that there is no hot backup in
* progress.
*/
-#define WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session, op) \
- do { \
- WT_CONNECTION_IMPL *__conn = S2C(session); \
- if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \
- op; \
- } else { \
- __wt_readlock(session, &__conn->hot_backup_lock); \
- F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
- op; \
- F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
- __wt_readunlock(session, &__conn->hot_backup_lock); \
- } \
+#define WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session, op) \
+ do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP)) { \
+ op; \
+ } else { \
+ __wt_readlock(session, &__conn->hot_backup_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ op; \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ __wt_readunlock(session, &__conn->hot_backup_lock); \
+ } \
} while (0)
/*
@@ -333,64 +336,66 @@ struct __wt_table {
* Drop the handle, table and/or schema locks, perform an operation,
* re-acquire the lock(s).
*/
-#define WT_WITHOUT_LOCKS(session, op) \
- do { \
- WT_CONNECTION_IMPL *__conn = S2C(session); \
- bool __checkpoint_locked = F_ISSET(session, WT_SESSION_LOCKED_CHECKPOINT); \
- bool __handle_read_locked = F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- bool __handle_write_locked = F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- bool __table_read_locked = F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ); \
- bool __table_write_locked = F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- bool __schema_locked = F_ISSET(session, WT_SESSION_LOCKED_SCHEMA); \
- if (__handle_read_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- __wt_readunlock(session, &__conn->dhandle_lock); \
- } \
- if (__handle_write_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- __wt_writeunlock(session, &__conn->dhandle_lock); \
- } \
- if (__table_read_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \
- __wt_readunlock(session, &__conn->table_lock); \
- } \
- if (__table_write_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- __wt_writeunlock(session, &__conn->table_lock); \
- } \
- if (__schema_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_SCHEMA); \
- __wt_spin_unlock(session, &__conn->schema_lock); \
- } \
- if (__checkpoint_locked) { \
- F_CLR(session, WT_SESSION_LOCKED_CHECKPOINT); \
- __wt_spin_unlock(session, &__conn->checkpoint_lock); \
- } \
- __wt_yield(); \
- op; \
- __wt_yield(); \
- if (__checkpoint_locked) { \
- __wt_spin_lock(session, &__conn->checkpoint_lock); \
- F_SET(session, WT_SESSION_LOCKED_CHECKPOINT); \
- } \
- if (__schema_locked) { \
- __wt_spin_lock(session, &__conn->schema_lock); \
- F_SET(session, WT_SESSION_LOCKED_SCHEMA); \
- } \
- if (__table_read_locked) { \
- __wt_readlock(session, &__conn->table_lock); \
- F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \
- } \
- if (__table_write_locked) { \
- __wt_writelock(session, &__conn->table_lock); \
- F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \
- } \
- if (__handle_read_locked) { \
- __wt_readlock(session, &__conn->dhandle_lock); \
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
- } \
- if (__handle_write_locked) { \
- __wt_writelock(session, &__conn->dhandle_lock); \
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
- } \
+#define WT_WITHOUT_LOCKS(session, op) \
+ do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ bool __checkpoint_locked = FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_CHECKPOINT); \
+ bool __handle_read_locked = \
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ bool __handle_write_locked = \
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ bool __table_read_locked = FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_READ); \
+ bool __table_write_locked = FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ bool __schema_locked = FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA); \
+ if (__handle_read_locked) { \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ __wt_readunlock(session, &__conn->dhandle_lock); \
+ } \
+ if (__handle_write_locked) { \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ __wt_writeunlock(session, &__conn->dhandle_lock); \
+ } \
+ if (__table_read_locked) { \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_TABLE_READ); \
+ __wt_readunlock(session, &__conn->table_lock); \
+ } \
+ if (__table_write_locked) { \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ __wt_writeunlock(session, &__conn->table_lock); \
+ } \
+ if (__schema_locked) { \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_SCHEMA); \
+ __wt_spin_unlock(session, &__conn->schema_lock); \
+ } \
+ if (__checkpoint_locked) { \
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_CHECKPOINT); \
+ __wt_spin_unlock(session, &__conn->checkpoint_lock); \
+ } \
+ __wt_yield(); \
+ op; \
+ __wt_yield(); \
+ if (__checkpoint_locked) { \
+ __wt_spin_lock(session, &__conn->checkpoint_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_CHECKPOINT); \
+ } \
+ if (__schema_locked) { \
+ __wt_spin_lock(session, &__conn->schema_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA); \
+ } \
+ if (__table_read_locked) { \
+ __wt_readlock(session, &__conn->table_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_TABLE_READ); \
+ } \
+ if (__table_write_locked) { \
+ __wt_writelock(session, &__conn->table_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE); \
+ } \
+ if (__handle_read_locked) { \
+ __wt_readlock(session, &__conn->dhandle_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ); \
+ } \
+ if (__handle_write_locked) { \
+ __wt_writelock(session, &__conn->dhandle_lock); \
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \
+ } \
} while (0)
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index 9d783cede10..f7ec0464a29 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -37,6 +37,11 @@ struct __wt_hazard {
#define S2BT(session) ((WT_BTREE *)(session)->dhandle->handle)
#define S2BT_SAFE(session) ((session)->dhandle == NULL ? NULL : S2BT(session))
+/* Get the file system for a session */
+#define S2FS(session) \
+ ((session)->bucket_storage == NULL ? S2C(session)->file_system : \
+ (session)->bucket_storage->file_system)
+
typedef TAILQ_HEAD(__wt_cursor_list, __wt_cursor) WT_CURSOR_LIST;
/* Number of cursors cached to trigger cursor sweep. */
@@ -68,7 +73,8 @@ struct __wt_session_impl {
uint64_t operation_timeout_us; /* Maximum operation period before rollback */
u_int api_call_counter; /* Depth of api calls */
- WT_DATA_HANDLE *dhandle; /* Current data handle */
+ WT_DATA_HANDLE *dhandle; /* Current data handle */
+ WT_BUCKET_STORAGE *bucket_storage; /* Current bucket storage and file system */
/*
* Each session keeps a cache of data handles. The set of handles can grow quite large so we
@@ -166,38 +172,42 @@ struct __wt_session_impl {
#endif
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_SESSION_BACKUP_CURSOR 0x00000001u
-#define WT_SESSION_BACKUP_DUP 0x00000002u
-#define WT_SESSION_CACHE_CURSORS 0x00000004u
-#define WT_SESSION_CAN_WAIT 0x00000008u
-#define WT_SESSION_EVICTION 0x00000010u
-#define WT_SESSION_IGNORE_CACHE_SIZE 0x00000020u
-#define WT_SESSION_IMPORT 0x00000040u
-#define WT_SESSION_IMPORT_REPAIR 0x00000080u
-#define WT_SESSION_INSTANTIATE_PREPARE 0x00000100u
-#define WT_SESSION_INTERNAL 0x00000200u
-#define WT_SESSION_LOCKED_CHECKPOINT 0x00000400u
-#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000800u
-#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00001000u
-#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00002000u
-#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00004000u
-#define WT_SESSION_LOCKED_METADATA 0x00008000u
-#define WT_SESSION_LOCKED_PASS 0x00010000u
-#define WT_SESSION_LOCKED_SCHEMA 0x00020000u
-#define WT_SESSION_LOCKED_SLOT 0x00040000u
-#define WT_SESSION_LOCKED_TABLE_READ 0x00080000u
-#define WT_SESSION_LOCKED_TABLE_WRITE 0x00100000u
-#define WT_SESSION_LOCKED_TURTLE 0x00200000u
-#define WT_SESSION_LOGGING_INMEM 0x00400000u
-#define WT_SESSION_NO_DATA_HANDLES 0x00800000u
-#define WT_SESSION_NO_LOGGING 0x01000000u
-#define WT_SESSION_NO_RECONCILE 0x02000000u
-#define WT_SESSION_NO_SCHEMA_LOCK 0x04000000u
-#define WT_SESSION_QUIET_CORRUPT_FILE 0x08000000u
-#define WT_SESSION_READ_WONT_NEED 0x10000000u
-#define WT_SESSION_RESOLVING_TXN 0x20000000u
-#define WT_SESSION_ROLLBACK_TO_STABLE 0x40000000u
-#define WT_SESSION_SCHEMA_TXN 0x80000000u
+#define WT_SESSION_LOCKED_CHECKPOINT 0x0001u
+#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x0002u
+#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x0004u
+#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x0008u
+#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x0010u
+#define WT_SESSION_LOCKED_METADATA 0x0020u
+#define WT_SESSION_LOCKED_PASS 0x0040u
+#define WT_SESSION_LOCKED_SCHEMA 0x0080u
+#define WT_SESSION_LOCKED_SLOT 0x0100u
+#define WT_SESSION_LOCKED_TABLE_READ 0x0200u
+#define WT_SESSION_LOCKED_TABLE_WRITE 0x0400u
+#define WT_SESSION_LOCKED_TURTLE 0x0800u
+#define WT_SESSION_NO_SCHEMA_LOCK 0x1000u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t lock_flags;
+
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_SESSION_BACKUP_CURSOR 0x00001u
+#define WT_SESSION_BACKUP_DUP 0x00002u
+#define WT_SESSION_CACHE_CURSORS 0x00004u
+#define WT_SESSION_CAN_WAIT 0x00008u
+#define WT_SESSION_EVICTION 0x00010u
+#define WT_SESSION_IGNORE_CACHE_SIZE 0x00020u
+#define WT_SESSION_IMPORT 0x00040u
+#define WT_SESSION_IMPORT_REPAIR 0x00080u
+#define WT_SESSION_INSTANTIATE_PREPARE 0x00100u
+#define WT_SESSION_INTERNAL 0x00200u
+#define WT_SESSION_LOGGING_INMEM 0x00400u
+#define WT_SESSION_NO_DATA_HANDLES 0x00800u
+#define WT_SESSION_NO_LOGGING 0x01000u
+#define WT_SESSION_NO_RECONCILE 0x02000u
+#define WT_SESSION_QUIET_CORRUPT_FILE 0x04000u
+#define WT_SESSION_READ_WONT_NEED 0x08000u
+#define WT_SESSION_RESOLVING_TXN 0x10000u
+#define WT_SESSION_ROLLBACK_TO_STABLE 0x20000u
+#define WT_SESSION_SCHEMA_TXN 0x40000u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint32_t flags;
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 7e9ff35c7c9..534d4a1cf40 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -379,7 +379,6 @@ struct __wt_connection_stats {
int64_t cache_eviction_force;
int64_t cache_eviction_force_fail;
int64_t cache_eviction_force_fail_time;
- int64_t cache_eviction_force_rollback;
int64_t cache_hazard_checks;
int64_t cache_hazard_walks;
int64_t cache_hazard_max;
@@ -635,6 +634,7 @@ struct __wt_connection_stats {
int64_t txn_rts_pages_visited;
int64_t txn_rts_tree_walk_skip_pages;
int64_t txn_rts_upd_aborted;
+ int64_t txn_sessions_walked;
int64_t txn_set_ts;
int64_t txn_set_ts_durable;
int64_t txn_set_ts_durable_upd;
@@ -644,6 +644,7 @@ struct __wt_connection_stats {
int64_t txn_set_ts_stable_upd;
int64_t txn_begin;
int64_t txn_checkpoint_running;
+ int64_t txn_checkpoint_running_hs;
int64_t txn_checkpoint_generation;
int64_t txn_hs_ckpt_duration;
int64_t txn_checkpoint_time_max;
@@ -687,6 +688,7 @@ struct __wt_connection_stats {
int64_t cache_bytes_read;
int64_t cache_bytes_write;
int64_t cache_eviction_checkpoint;
+ int64_t cache_eviction_blocked_checkpoint_hs;
int64_t cache_eviction_target_page_lt10;
int64_t cache_eviction_target_page_lt32;
int64_t cache_eviction_target_page_ge128;
@@ -705,8 +707,7 @@ struct __wt_connection_stats {
int64_t cache_hs_insert;
int64_t cache_hs_insert_restart;
int64_t cache_hs_order_lose_durable_timestamp;
- int64_t cache_hs_order_fixup_move;
- int64_t cache_hs_order_fixup_insert;
+ int64_t cache_hs_order_reinsert;
int64_t cache_hs_read;
int64_t cache_hs_read_miss;
int64_t cache_hs_read_squash;
@@ -714,7 +715,7 @@ struct __wt_connection_stats {
int64_t cache_hs_key_truncate_rts;
int64_t cache_hs_key_truncate;
int64_t cache_hs_key_truncate_onpage_removal;
- int64_t cache_hs_key_truncate_non_ts;
+ int64_t cache_hs_order_remove;
int64_t cache_hs_write_squash;
int64_t cache_inmem_splittable;
int64_t cache_inmem_split;
@@ -741,6 +742,7 @@ struct __wt_connection_stats {
int64_t cursor_next_skip_total;
int64_t cursor_prev_skip_total;
int64_t cursor_skip_hs_cur_position;
+ int64_t cursor_search_near_prefix_fast_paths;
int64_t cursor_next_hs_tombstone;
int64_t cursor_next_skip_ge_100;
int64_t cursor_next_skip_lt_100;
@@ -905,6 +907,7 @@ struct __wt_dsrc_stats {
int64_t cache_bytes_read;
int64_t cache_bytes_write;
int64_t cache_eviction_checkpoint;
+ int64_t cache_eviction_blocked_checkpoint_hs;
int64_t cache_eviction_target_page_lt10;
int64_t cache_eviction_target_page_lt32;
int64_t cache_eviction_target_page_ge128;
@@ -923,8 +926,7 @@ struct __wt_dsrc_stats {
int64_t cache_hs_insert;
int64_t cache_hs_insert_restart;
int64_t cache_hs_order_lose_durable_timestamp;
- int64_t cache_hs_order_fixup_move;
- int64_t cache_hs_order_fixup_insert;
+ int64_t cache_hs_order_reinsert;
int64_t cache_hs_read;
int64_t cache_hs_read_miss;
int64_t cache_hs_read_squash;
@@ -932,7 +934,7 @@ struct __wt_dsrc_stats {
int64_t cache_hs_key_truncate_rts;
int64_t cache_hs_key_truncate;
int64_t cache_hs_key_truncate_onpage_removal;
- int64_t cache_hs_key_truncate_non_ts;
+ int64_t cache_hs_order_remove;
int64_t cache_hs_write_squash;
int64_t cache_inmem_splittable;
int64_t cache_inmem_split;
@@ -959,6 +961,7 @@ struct __wt_dsrc_stats {
int64_t cursor_next_skip_total;
int64_t cursor_prev_skip_total;
int64_t cursor_skip_hs_cur_position;
+ int64_t cursor_search_near_prefix_fast_paths;
int64_t cursor_next_hs_tombstone;
int64_t cursor_next_skip_ge_100;
int64_t cursor_next_skip_lt_100;
diff --git a/src/third_party/wiredtiger/src/include/tiered.h b/src/third_party/wiredtiger/src/include/tiered.h
index 06a49c20a59..6cd9162e1c4 100644
--- a/src/third_party/wiredtiger/src/include/tiered.h
+++ b/src/third_party/wiredtiger/src/include/tiered.h
@@ -28,7 +28,7 @@ struct __wt_tiered_manager {
/*
* WT_CURSOR_TIERED --
- * An tiered cursor.
+ * A tiered cursor.
*/
struct __wt_cursor_tiered {
WT_CURSOR iface;
@@ -49,17 +49,111 @@ struct __wt_cursor_tiered {
};
/*
+ * Define the maximum number of tiers for convenience. We expect at most two initially. This can
+ * change if more are needed. It is easier to have the array statically allocated initially than
+ * worrying about the memory management. For now also assign types to slots. Local files in slot 0.
+ * Shared tier top level in slot 1.
+ */
+#define WT_TIERED_INDEX_INVALID (uint32_t) - 1
+#define WT_TIERED_INDEX_LOCAL 0
+#define WT_TIERED_INDEX_SHARED 1
+
+#define WT_TIERED_MAX_TIERS 4
+
+/* Object name types */
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_TIERED_NAME_LOCAL 0x1u
+#define WT_TIERED_NAME_OBJECT 0x2u
+#define WT_TIERED_NAME_PREFIX 0x4u
+#define WT_TIERED_NAME_SHARED 0x8u
+/* AUTOMATIC FLAG VALUE GENERATION STOP */
+
+/*
+ * WT_TIERED_TIERS --
+ * Information we need to keep about each tier such as its data handle and name.
+ * We define operations that each tier can accept. The local tier should be able to accept
+ * reads and writes. The shared tier can do reads and flushes. Other ideas for future tiers
+ * may include a merge tier that is read only or an archival tier that is flush only.
+ */
+struct __wt_tiered_tiers {
+ WT_DATA_HANDLE *tier; /* Data handle for this tier */
+ const char *name; /* Tier's metadata name */
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_TIERS_OP_FLUSH 0x1u
+#define WT_TIERS_OP_READ 0x2u
+#define WT_TIERS_OP_WRITE 0x4u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags; /* Flags including operations */
+};
+
+/*
* WT_TIERED --
- * Handle for a tiered data source.
+ * Handle for a tiered data source. This data structure is used as the basis for metadata
+ * as the top level definition of a tiered table. This structure tells us where to find the
+ * parts of the tree and in what order we should look at the tiers. Prior to the first call
+ * to flush_tier after the creation of this table the only tier that exists will be the local
+ * disk represented by a file: URI. Then a second (or more) set of tiers will be where the
+ * tiered data lives. The non-local tier will point to a tier: URI and that is described by a
+ * WT_TIERED_TREE data structure that will encapsulate what the current state of the
+ * individual objects is.
*/
struct __wt_tiered {
WT_DATA_HANDLE iface;
- const char *name, *config, *filename;
+ const char *obj_config; /* Config to use for each object */
const char *key_format, *value_format;
- WT_DATA_HANDLE **tiers;
- u_int ntiers;
+ WT_BUCKET_STORAGE *bstorage;
+
+ WT_TIERED_TIERS tiers[WT_TIERED_MAX_TIERS]; /* Tiers array */
+
+ uint64_t current_id; /* Current object id number */
+ uint64_t next_id; /* Next object number */
WT_COLLATOR *collator; /* TODO: handle custom collation */
+ /* TODO: What about compression, encryption, etc? Do we need to worry about that here? */
+
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_TIERED_FLAG_UNUSED 0x1u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
+};
+
+/*
+ * WT_TIERED_OBJECT --
+ * Definition of a tiered object. This is a single object in a tiered tree.
+ * This is the lowest level data structure and item that makes
+ * up a tiered table. This structure contains the information needed to construct the name of
+ * this object and how to access it.
+ */
+struct __wt_tiered_object {
+ const char *uri; /* Data source for this object */
+ WT_TIERED_TREE *tree; /* Pointer to tree this object is part of */
+ uint64_t count; /* Approximate count of records */
+ uint64_t size; /* Final size of object */
+ uint64_t switch_txn; /* Largest txn that can write to this object */
+ uint64_t switch_ts; /* Timestamp for switching */
+ uint32_t id; /* This object's id */
+ uint32_t generation; /* Do we need this?? */
+ uint32_t refcnt; /* Number of references */
+
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_TIERED_OBJ_LOCAL 0x1u /* Local resident also */
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
+};
+
+/*
+ * WT_TIERED_TREE --
+ * Definition of the shared tiered portion of a tree.
+ */
+struct __wt_tiered_tree {
+ WT_DATA_HANDLE iface;
+ const char *name, *config;
+ const char *key_format, *value_format;
+
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_TIERED_TREE_UNUSED 0x1u
+ /* AUTOMATIC FLAG VALUE GENERATION STOP */
+ uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 23ed483a3fe..7dbc17b9063 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -133,8 +133,6 @@ struct __wt_txn_global {
bool oldest_is_pinned;
bool stable_is_pinned;
- WT_SPINLOCK id_lock;
-
/* Protects the active transaction states. */
WT_RWLOCK rwlock;
@@ -151,6 +149,7 @@ struct __wt_txn_global {
* once checkpoint has finished reading a table, it won't revisit it.
*/
volatile bool checkpoint_running; /* Checkpoint running */
+ volatile bool checkpoint_running_hs; /* Checkpoint running and processing history store file */
volatile uint32_t checkpoint_id; /* Checkpoint's session ID */
WT_TXN_SHARED checkpoint_txn_shared; /* Checkpoint's txn shared state */
wt_timestamp_t checkpoint_timestamp; /* Checkpoint's timestamp */
diff --git a/src/third_party/wiredtiger/src/include/txn_inline.h b/src/third_party/wiredtiger/src/include/txn_inline.h
index 6c89b2024bf..0deaf77a532 100644
--- a/src/third_party/wiredtiger/src/include/txn_inline.h
+++ b/src/third_party/wiredtiger/src/include/txn_inline.h
@@ -631,7 +631,14 @@ __wt_txn_tw_stop_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
static inline bool
__wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
{
- return ((WT_TIME_WINDOW_HAS_STOP(tw) || !tw->prepare) &&
+ /*
+ * Check the prepared flag if there is no stop time point or the start and stop time points are
+ * from the same transaction.
+ */
+ return (((WT_TIME_WINDOW_HAS_STOP(tw) &&
+ (tw->start_txn != tw->stop_txn || tw->start_ts != tw->stop_ts ||
+ tw->durable_start_ts != tw->durable_stop_ts)) ||
+ !tw->prepare) &&
__wt_txn_visible(session, tw->start_txn, tw->start_ts));
}
@@ -642,7 +649,14 @@ __wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
static inline bool
__wt_txn_tw_start_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
{
- return ((WT_TIME_WINDOW_HAS_STOP(tw) || !tw->prepare) &&
+ /*
+ * Check the prepared flag if there is no stop time point or the start and stop time points are
+ * from the same transaction.
+ */
+ return (((WT_TIME_WINDOW_HAS_STOP(tw) &&
+ (tw->start_txn != tw->stop_txn || tw->start_ts != tw->stop_ts ||
+ tw->durable_start_ts != tw->durable_stop_ts)) ||
+ !tw->prepare) &&
__wt_txn_visible_all(session, tw->start_txn, tw->durable_start_ts));
}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index fcceedbe660..a4a1b584b35 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -80,7 +80,6 @@ struct __wt_modify; typedef struct __wt_modify WT_MODIFY;
struct __wt_session; typedef struct __wt_session WT_SESSION;
#if !defined(DOXYGEN)
struct __wt_storage_source; typedef struct __wt_storage_source WT_STORAGE_SOURCE;
-struct __wt_location_handle; typedef struct __wt_location_handle WT_LOCATION_HANDLE;
#endif
#if defined(SWIGJAVA)
@@ -716,9 +715,9 @@ struct __wt_cursor {
#define WT_CURSTD_META_INUSE 0x0040000u
#define WT_CURSTD_OPEN 0x0080000u
#define WT_CURSTD_OVERWRITE 0x0100000u
-#define WT_CURSTD_RAW 0x0200000u
-#define WT_CURSTD_RAW_SEARCH 0x0400000u
-#define WT_CURSTD_UPDATE_LOCAL 0x0800000u
+#define WT_CURSTD_PREFIX_SEARCH 0x0200000u
+#define WT_CURSTD_RAW 0x0400000u
+#define WT_CURSTD_RAW_SEARCH 0x0800000u
#define WT_CURSTD_VALUE_EXT 0x1000000u /* Value points out of tree. */
#define WT_CURSTD_VALUE_INT 0x2000000u /* Value points into tree. */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
@@ -1254,31 +1253,23 @@ struct __wt_session {
* size\, that is\, when a Btree page is split\, it will be split into smaller pages\, where
* each page is the specified percentage of the maximum Btree page size., an integer between
* 50 and 100; default \c 90.}
- * @config{tiered = (, options only relevant for tiered data sources., a set of related
- * configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;chunk_size, the
- * maximum size of the hot chunk of tiered tree. This limit is soft - it is possible for
- * chunks to be temporarily larger than this value., an integer greater than or equal to 1M;
- * default \c 1GB.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;tiers, list of data sources to combine
- * into a tiered storage structure., a list of strings; default empty.}
- * @config{ ),,}
* @config{tiered_storage = (, configure a storage source for this table., a set of related
* configuration options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;auth_token,
* authentication string identifier., a string; default empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;bucket, The bucket indicating the location for this
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;bucket, the bucket indicating the location for this
* table., a string; default empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;local_retention, time
- * in seconds to retain data on tiered storage on the local tier for faster read access., an
- * integer between 0 and 10000; default \c 300.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;name,
- * Permitted values are \c "none" or custom storage source name created with
- * WT_CONNECTION::add_storage_source. See @ref custom_storage_sources for more
- * information., a string; default \c none.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;
- * object_target_size, the approximate size of objects before creating them on the tiered
- * storage tier., an integer between 100K and 10TB; default \c 10M.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;bucket_prefix, the
+ * unique bucket prefix for this table., a string; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;local_retention, time in seconds to retain data on tiered
+ * storage on the local tier for faster read access., an integer between 0 and 10000;
+ * default \c 300.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, permitted values are \c "none" or
+ * custom storage source name created with WT_CONNECTION::add_storage_source. See @ref
+ * custom_storage_sources for more information., a string; default \c none.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;object_target_size, the approximate size of objects
+ * before creating them on the tiered storage tier., an integer between 100K and 10TB;
+ * default \c 10M.}
* @config{ ),,}
* @config{type, set the type of data source used to store a column group\, index or simple
* table. By default\, a \c "file:" URI is derived from the object name. The \c type
@@ -2238,14 +2229,12 @@ struct __wt_connection {
* @config{tiered_storage = (, enable tiered storage. Enabling tiered storage may use one
* session from the configured session_max., a set of related configuration options defined
* below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;auth_token, authentication token string., a
- * string; default empty.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;local_retention, time in seconds
- * to retain data on tiered storage on the local tier for faster read access., an integer
- * between 0 and 10000; default \c 300.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;object_target_size,
- * the approximate size of objects before creating them on the tiered storage tier., an
- * integer between 100K and 10TB; default \c 10M.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;local_retention, time in seconds to retain data
+ * on tiered storage on the local tier for faster read access., an integer between 0 and
+ * 10000; default \c 300.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;object_target_size, the
+ * approximate size of objects before creating them on the tiered storage tier., an integer
+ * between 100K and 10TB; default \c 10M.}
* @config{ ),,}
* @config{verbose, enable messages for various events. Options are given as a list\, such
* as <code>"verbose=[evictserver\,read]"</code>., a list\, with values chosen from the
@@ -2255,8 +2244,8 @@ struct __wt_connection {
* "handleops"\, \c "log"\, \c "history_store"\, \c "history_store_activity"\, \c "lsm"\, \c
* "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c "reconcile"\,
* \c "recovery"\, \c "recovery_progress"\, \c "rts"\, \c "salvage"\, \c "shared_cache"\, \c
- * "split"\, \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c "transaction"\, \c
- * "verify"\, \c "version"\, \c "write"; default \c [].}
+ * "split"\, \c "temporary"\, \c "thread_group"\, \c "tiered"\, \c "timestamp"\, \c
+ * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default \c [].}
* @configend
* @errors
*/
@@ -3009,7 +2998,8 @@ struct __wt_connection {
* "history_store"\, \c "history_store_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c
* "mutex"\, \c "overflow"\, \c "read"\, \c "reconcile"\, \c "recovery"\, \c "recovery_progress"\,
* \c "rts"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c "thread_group"\,
- * \c "timestamp"\, \c "transaction"\, \c "verify"\, \c "version"\, \c "write"; default \c [].}
+ * \c "tiered"\, \c "timestamp"\, \c "transaction"\, \c "verify"\, \c "version"\, \c "write";
+ * default \c [].}
* @config{verify_metadata, open connection and verify any WiredTiger metadata. This API allows
* verification and detection of corruption in WiredTiger metadata., a boolean flag; default \c
* false.}
@@ -4232,7 +4222,6 @@ struct __wt_extractor {
int (*terminate)(WT_EXTRACTOR *extractor, WT_SESSION *session);
};
-#if !defined(SWIG)
/*! WT_FILE_SYSTEM::open_file file types */
typedef enum {
WT_FS_OPEN_FILE_TYPE_CHECKPOINT,/*!< open a data file checkpoint */
@@ -4444,7 +4433,6 @@ struct __wt_file_system {
*/
int (*terminate)(WT_FILE_SYSTEM *file_system, WT_SESSION *session);
};
-#endif /* !defined(SWIG) */
/*! WT_FILE_HANDLE::fadvise flags: no longer need */
#define WT_FILE_HANDLE_DONTNEED 1
@@ -4735,28 +4723,6 @@ struct __wt_file_handle {
#if !defined(DOXYGEN)
/* This interface is not yet public. */
-/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_SS_OPEN_CREATE 0x1u
-#define WT_SS_OPEN_READONLY 0x2u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
-
-/*!
- * A location handle, and its encoding is defined by each implementation
- * of the WT_STORAGE_SOURCE interface.
- */
-struct __wt_location_handle {
- /*!
- * Close a location handle, the handle will not be further accessed by
- * WiredTiger.
- *
- * @errors
- *
- * @param location_handle the WT_LOCATION_HANDLE
- * @param session the current WiredTiger session
- */
- int (*close)(WT_LOCATION_HANDLE *location_handle, WT_SESSION *session);
-};
-
/*!
* The interface implemented by applications to provide a storage source
* implementation. This documentation refers to "object" and "bucket"
@@ -4773,66 +4739,56 @@ struct __wt_location_handle {
*/
struct __wt_storage_source {
/*!
- * Return a location handle from a location string.
- * A location string may encode a bucket name, or the equivalent for this
- * storage source, authorization information for that bucket,
- * naming prefixes to be used for objects in that bucket, etc.
+ * Create a customized file system to access the storage source
+ * objects.
+ *
+ * The file system returned behaves as if objects in the specified buckets are
+ * files in the file system. In particular, the fs_open_file method requires
+ * its flags argument to include either WT_FS_OPEN_CREATE or WT_FS_OPEN_READONLY.
+ * Objects being created are not deemed to "exist" and be visible to
+ * WT_FILE_SYSTEM::fs_exist and other file system methods until the new handle has
+ * been closed. Objects once created are immutable. That is, only objects that
+ * do not already exist can be opened with the create flag, and objects that
+ * already exist can only be opened with the readonly flag. Only objects that
+ * exist can be transferred to the underlying shared object storage. This can
+ * happen at any time after an object is created, and can be forced to happen using
+ * WT_STORAGE_SOURCE::ss_flush.
+ *
+ * Additionally file handles returned by the file system behave as file handles to a
+ * local file. For example, WT_FILE_HANDLE::fh_sync synchronizes writes to the
+ * local file, and does not imply any transferring of data to the shared object store.
+ *
+ * The directory argument to the WT_FILE_SYSTEM::fs_directory_list method is normally
+ * the empty string as the cloud equivalent (bucket) has already been given when
+ * customizing the file system. If specified, the directory path is interpreted
+ * as another prefix, which is removed from the results.
+ *
+ * Names used by the file system methods are generally flat. However, in some
+ * implementations of a file system returned by a storage source, "..", ".", "/"
+ * may have a particular meaning, as in a POSIX file system. We suggest that
+ * these constructs be avoided when a caller chooses file names within the returned
+ * file system; they may be rejected by the implementation. Within a bucket name,
+ * these characters may or may not be acceptable. That is implementation dependent.
+ * In the prefix, "/" is specifically allowed, as this may have performance or
+ * administrative benefits. That said, within a prefix, certain combinations
+ * involving "/" may be rejected, for example "/../".
*
* @errors
*
* @param storage_source the WT_STORAGE_SOURCE
* @param session the current WiredTiger session
- * @param location the location string
- * @param[out] location_handle the allocated handle
+ * @param bucket_name the name of the bucket. Use of '/' is implementation dependent.
+ * @param prefix a prefix for each file. If used, the prefix will be added to the
+ * name of each object created or otherwise accessed in the bucket. Also, only
+ * objects with this prefix will be visible, and the prefix will be removed when
+ * listed. Prefixes may contain '/' as a separator.
+ * @param auth_token the authorization identifier.
+ * @param config additional configuration, currently must be NULL.
+ * @param[out] file_system the customized file system returned
*/
- int (*ss_location_handle)(WT_STORAGE_SOURCE *storage_source,
- WT_SESSION *session, const char *location, WT_LOCATION_HANDLE **location_handle);
-
- /*!
- * Return a list of object names for the given location.
- *
- * @errors
- *
- * @param storage_source the WT_STORAGE_SOURCE
- * @param session the current WiredTiger session
- * @param location_handle the location to list
- * @param prefix if not NULL, only files with names matching the prefix
- * are returned
- * @param limit if not 0, limits the number of objects listed to this number.
- * @param[out] object_list the method returns an allocated array of
- * individually allocated strings, one for each object in the location.
- * @param[out] countp the number of entries returned
- */
- int (*ss_location_list)(WT_STORAGE_SOURCE *storage_source,
- WT_SESSION *session, WT_LOCATION_HANDLE *location_handle, const char *prefix,
- uint32_t limit, char ***object_list, uint32_t *countp);
-
- /*!
- * Free memory allocated by WT_STORAGE_SOURCE::location_list.
- *
- * @errors
- *
- * @param storage_source the WT_STORAGE_SOURCE
- * @param session the current WiredTiger session
- * @param object_list array returned by WT_STORAGE_SOURCE::location_list
- * @param count count returned by WT_STORAGE_SOURCE::location_list
- */
- int (*ss_location_list_free)(WT_STORAGE_SOURCE *storage_source,
- WT_SESSION *session, char **object_list, uint32_t count);
-
- /*!
- * Return if the named object exists in the location.
- *
- * @errors
- *
- * @param storage_source the WT_STORAGE_SOURCE
- * @param session the current WiredTiger session
- * @param location_handle the location to search
- * @param name the name of the object
- * @param[out] existp If the named storage source object exists
- */
- int (*ss_exist)(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, bool *existp);
+ int (*ss_customize_file_system)(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
+ const char *bucket_name, const char *prefix, const char *auth_token, const char *config,
+ WT_FILE_SYSTEM **file_system);
/*!
* Flush any existing objects that match the location and name from
@@ -4844,85 +4800,13 @@ struct __wt_storage_source {
*
* @param storage_source the WT_STORAGE_SOURCE
* @param session the current WiredTiger session
- * @param location_handle the location to flush (or NULL for all)
+ * @param file_system if NULL, all objects are considered, otherwise only objects
+ * managed by the given file system.
* @param name the name of the object to flush (or NULL for all)
* @param config additional configuration, currently must be NULL
*/
int (*ss_flush)(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, const char *config);
-
- /*!
- * Open a handle for a named storage source object.
- *
- * Objects created are not deemed to "exist" and be visible to other APIs
- * like WT_STORAGE_SOURCE::ss_exist until the new handle has been closed.
- * Objects once created are immutable. That is, only objects that do not already
- * exist can be opened with the create flag, and objects that already exist can
- * only be opened with the readonly flag.
- *
- * Only objects that exist can be transferred to and made visible in the underlying
- * shared object store. However, they don't need to be transferred immediately when
- * the created handle is closed. Transfers can be forced with WT_STORAGE_SOURCE::ss_flush.
- *
- * File handles returned behave as file handles to a local file. For example,
- * WT_FILE_HANDLE::fh_sync synchronizes writes to the local file, and does not
- * imply any transferring of data to the shared object store.
- *
- * The method should return ENOENT if the object is not being created and
- * does not exist.
- *
- * The method should return EACCES if the object cannot be opened given
- * permissions by the location.
- *
- * @errors
- *
- * @param storage_source the WT_STORAGE_SOURCE
- * @param session the current WiredTiger session
- * @param location_handle the location where the object will be stored.
- * @param name the name of the object within the location.
- * @param flags flags indicating how to open the object, exactly one of
- * ::WT_SS_OPEN_CREATE, ::WT_SS_OPEN_READONLY.
- * @param[out] file_handlep the handle to the newly opened object. Storage
- * source implementations must allocate memory for the handle and
- * the WT_FILE_HANDLE::name field, and fill in the WT_FILE_HANDLE::
- * fields. Applications wanting to associate private information
- * with the WT_FILE_HANDLE:: structure should declare and allocate
- * their own structure as a superset of a WT_FILE_HANDLE:: structure.
- */
- int (*ss_open_object)(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, uint32_t flags,
- WT_FILE_HANDLE **file_handlep);
-
- /*!
- * Remove a named storage source object
- *
- * This method is not required if storage source is configured readonly
- * and should be set to NULL when not required by the storage source implementation.
- *
- * @errors
- *
- * @param storage_source the WT_STORAGE_SOURCE
- * @param session the current WiredTiger session
- * @param location_handle the location containing the object
- * @param name the name of the storage source object
- * @param flags must be 0
- */
- int (*ss_remove)(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, uint32_t flags);
-
- /*!
- * Return the size of a named storage source object
- *
- * @errors
- *
- * @param storage_source the WT_STORAGE_SOURCE
- * @param session the current WiredTiger session
- * @param location_handle the location containing the object
- * @param name the name of the storage source object
- * @param[out] sizep the size of the storage source object
- */
- int (*ss_size)(WT_STORAGE_SOURCE *storage_source, WT_SESSION *session,
- WT_LOCATION_HANDLE *location_handle, const char *name, wt_off_t *sizep);
+ WT_FILE_SYSTEM *file_system, const char *name, const char *config);
/*!
* A callback performed when the storage source is closed and will no
@@ -5193,572 +5077,569 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1064
/*! cache: forced eviction - pages selected unable to be evicted time */
#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL_TIME 1065
-/*!
- * cache: forced eviction - session returned rollback error while force
- * evicting due to being oldest
- */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_ROLLBACK 1066
/*! cache: hazard pointer check calls */
-#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1067
+#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1066
/*! cache: hazard pointer check entries walked */
-#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1068
+#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1067
/*! cache: hazard pointer maximum array length */
-#define WT_STAT_CONN_CACHE_HAZARD_MAX 1069
+#define WT_STAT_CONN_CACHE_HAZARD_MAX 1068
/*! cache: history store score */
-#define WT_STAT_CONN_CACHE_HS_SCORE 1070
+#define WT_STAT_CONN_CACHE_HS_SCORE 1069
/*! cache: history store table max on-disk size */
-#define WT_STAT_CONN_CACHE_HS_ONDISK_MAX 1071
+#define WT_STAT_CONN_CACHE_HS_ONDISK_MAX 1070
/*! cache: history store table on-disk size */
-#define WT_STAT_CONN_CACHE_HS_ONDISK 1072
+#define WT_STAT_CONN_CACHE_HS_ONDISK 1071
/*! cache: internal pages queued for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_QUEUED 1073
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_QUEUED 1072
/*! cache: internal pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_SEEN 1074
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_SEEN 1073
/*! cache: internal pages seen by eviction walk that are already queued */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_ALREADY_QUEUED 1075
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL_PAGES_ALREADY_QUEUED 1074
/*! cache: maximum bytes configured */
-#define WT_STAT_CONN_CACHE_BYTES_MAX 1076
+#define WT_STAT_CONN_CACHE_BYTES_MAX 1075
/*! cache: maximum page size at eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1077
+#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1076
/*! cache: modified pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1078
+#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1077
/*! cache: operations timed out waiting for space in cache */
-#define WT_STAT_CONN_CACHE_TIMED_OUT_OPS 1079
+#define WT_STAT_CONN_CACHE_TIMED_OUT_OPS 1078
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 1080
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 1079
/*! cache: pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP 1081
+#define WT_STAT_CONN_CACHE_EVICTION_APP 1080
/*! cache: pages evicted in parallel with checkpoint */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_IN_PARALLEL_WITH_CHECKPOINT 1082
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_IN_PARALLEL_WITH_CHECKPOINT 1081
/*! cache: pages queued for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1083
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1082
/*! cache: pages queued for eviction post lru sorting */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_POST_LRU 1084
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_POST_LRU 1083
/*! cache: pages queued for urgent eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1085
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1084
/*! cache: pages queued for urgent eviction during walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1086
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1085
/*!
* cache: pages queued for urgent eviction from history store due to high
* dirty content
*/
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT_HS_DIRTY 1087
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT_HS_DIRTY 1086
/*! cache: pages seen by eviction walk that are already queued */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_ALREADY_QUEUED 1088
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_ALREADY_QUEUED 1087
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1089
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1088
/*!
* cache: pages selected for eviction unable to be evicted as the parent
* page has overflow items
*/
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL_PARENT_HAS_OVERFLOW_ITEMS 1090
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL_PARENT_HAS_OVERFLOW_ITEMS 1089
/*!
* cache: pages selected for eviction unable to be evicted because of
* active children on an internal page
*/
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL_ACTIVE_CHILDREN_ON_AN_INTERNAL_PAGE 1091
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL_ACTIVE_CHILDREN_ON_AN_INTERNAL_PAGE 1090
/*!
* cache: pages selected for eviction unable to be evicted because of
* failure in reconciliation
*/
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL_IN_RECONCILIATION 1092
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL_IN_RECONCILIATION 1091
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1093
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1092
/*! cache: percentage overhead */
-#define WT_STAT_CONN_CACHE_OVERHEAD 1094
+#define WT_STAT_CONN_CACHE_OVERHEAD 1093
/*! cache: tracked bytes belonging to internal pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1095
+#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1094
/*! cache: tracked bytes belonging to leaf pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_LEAF 1096
+#define WT_STAT_CONN_CACHE_BYTES_LEAF 1095
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1097
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1096
/*! capacity: background fsync file handles considered */
-#define WT_STAT_CONN_FSYNC_ALL_FH_TOTAL 1098
+#define WT_STAT_CONN_FSYNC_ALL_FH_TOTAL 1097
/*! capacity: background fsync file handles synced */
-#define WT_STAT_CONN_FSYNC_ALL_FH 1099
+#define WT_STAT_CONN_FSYNC_ALL_FH 1098
/*! capacity: background fsync time (msecs) */
-#define WT_STAT_CONN_FSYNC_ALL_TIME 1100
+#define WT_STAT_CONN_FSYNC_ALL_TIME 1099
/*! capacity: bytes read */
-#define WT_STAT_CONN_CAPACITY_BYTES_READ 1101
+#define WT_STAT_CONN_CAPACITY_BYTES_READ 1100
/*! capacity: bytes written for checkpoint */
-#define WT_STAT_CONN_CAPACITY_BYTES_CKPT 1102
+#define WT_STAT_CONN_CAPACITY_BYTES_CKPT 1101
/*! capacity: bytes written for eviction */
-#define WT_STAT_CONN_CAPACITY_BYTES_EVICT 1103
+#define WT_STAT_CONN_CAPACITY_BYTES_EVICT 1102
/*! capacity: bytes written for log */
-#define WT_STAT_CONN_CAPACITY_BYTES_LOG 1104
+#define WT_STAT_CONN_CAPACITY_BYTES_LOG 1103
/*! capacity: bytes written total */
-#define WT_STAT_CONN_CAPACITY_BYTES_WRITTEN 1105
+#define WT_STAT_CONN_CAPACITY_BYTES_WRITTEN 1104
/*! capacity: threshold to call fsync */
-#define WT_STAT_CONN_CAPACITY_THRESHOLD 1106
+#define WT_STAT_CONN_CAPACITY_THRESHOLD 1105
/*! capacity: time waiting due to total capacity (usecs) */
-#define WT_STAT_CONN_CAPACITY_TIME_TOTAL 1107
+#define WT_STAT_CONN_CAPACITY_TIME_TOTAL 1106
/*! capacity: time waiting during checkpoint (usecs) */
-#define WT_STAT_CONN_CAPACITY_TIME_CKPT 1108
+#define WT_STAT_CONN_CAPACITY_TIME_CKPT 1107
/*! capacity: time waiting during eviction (usecs) */
-#define WT_STAT_CONN_CAPACITY_TIME_EVICT 1109
+#define WT_STAT_CONN_CAPACITY_TIME_EVICT 1108
/*! capacity: time waiting during logging (usecs) */
-#define WT_STAT_CONN_CAPACITY_TIME_LOG 1110
+#define WT_STAT_CONN_CAPACITY_TIME_LOG 1109
/*! capacity: time waiting during read (usecs) */
-#define WT_STAT_CONN_CAPACITY_TIME_READ 1111
+#define WT_STAT_CONN_CAPACITY_TIME_READ 1110
/*! connection: auto adjusting condition resets */
-#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1112
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1111
/*! connection: auto adjusting condition wait calls */
-#define WT_STAT_CONN_COND_AUTO_WAIT 1113
+#define WT_STAT_CONN_COND_AUTO_WAIT 1112
/*!
* connection: auto adjusting condition wait raced to update timeout and
* skipped updating
*/
-#define WT_STAT_CONN_COND_AUTO_WAIT_SKIPPED 1114
+#define WT_STAT_CONN_COND_AUTO_WAIT_SKIPPED 1113
/*! connection: detected system time went backwards */
-#define WT_STAT_CONN_TIME_TRAVEL 1115
+#define WT_STAT_CONN_TIME_TRAVEL 1114
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1116
+#define WT_STAT_CONN_FILE_OPEN 1115
/*! connection: hash bucket array size for data handles */
-#define WT_STAT_CONN_BUCKETS_DH 1117
+#define WT_STAT_CONN_BUCKETS_DH 1116
/*! connection: hash bucket array size general */
-#define WT_STAT_CONN_BUCKETS 1118
+#define WT_STAT_CONN_BUCKETS 1117
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1119
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1118
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1120
+#define WT_STAT_CONN_MEMORY_FREE 1119
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1121
+#define WT_STAT_CONN_MEMORY_GROW 1120
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1122
+#define WT_STAT_CONN_COND_WAIT 1121
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1123
+#define WT_STAT_CONN_RWLOCK_READ 1122
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1124
+#define WT_STAT_CONN_RWLOCK_WRITE 1123
/*! connection: total fsync I/Os */
-#define WT_STAT_CONN_FSYNC_IO 1125
+#define WT_STAT_CONN_FSYNC_IO 1124
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1126
+#define WT_STAT_CONN_READ_IO 1125
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1127
+#define WT_STAT_CONN_WRITE_IO 1126
/*! cursor: cached cursor count */
-#define WT_STAT_CONN_CURSOR_CACHED_COUNT 1128
+#define WT_STAT_CONN_CURSOR_CACHED_COUNT 1127
/*! cursor: cursor bulk loaded cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT_BULK 1129
+#define WT_STAT_CONN_CURSOR_INSERT_BULK 1128
/*! cursor: cursor close calls that result in cache */
-#define WT_STAT_CONN_CURSOR_CACHE 1130
+#define WT_STAT_CONN_CURSOR_CACHE 1129
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1131
+#define WT_STAT_CONN_CURSOR_CREATE 1130
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1132
+#define WT_STAT_CONN_CURSOR_INSERT 1131
/*! cursor: cursor insert key and value bytes */
-#define WT_STAT_CONN_CURSOR_INSERT_BYTES 1133
+#define WT_STAT_CONN_CURSOR_INSERT_BYTES 1132
/*! cursor: cursor modify calls */
-#define WT_STAT_CONN_CURSOR_MODIFY 1134
+#define WT_STAT_CONN_CURSOR_MODIFY 1133
/*! cursor: cursor modify key and value bytes affected */
-#define WT_STAT_CONN_CURSOR_MODIFY_BYTES 1135
+#define WT_STAT_CONN_CURSOR_MODIFY_BYTES 1134
/*! cursor: cursor modify value bytes modified */
-#define WT_STAT_CONN_CURSOR_MODIFY_BYTES_TOUCH 1136
+#define WT_STAT_CONN_CURSOR_MODIFY_BYTES_TOUCH 1135
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1137
+#define WT_STAT_CONN_CURSOR_NEXT 1136
/*! cursor: cursor operation restarted */
-#define WT_STAT_CONN_CURSOR_RESTART 1138
+#define WT_STAT_CONN_CURSOR_RESTART 1137
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1139
+#define WT_STAT_CONN_CURSOR_PREV 1138
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1140
+#define WT_STAT_CONN_CURSOR_REMOVE 1139
/*! cursor: cursor remove key bytes removed */
-#define WT_STAT_CONN_CURSOR_REMOVE_BYTES 1141
+#define WT_STAT_CONN_CURSOR_REMOVE_BYTES 1140
/*! cursor: cursor reserve calls */
-#define WT_STAT_CONN_CURSOR_RESERVE 1142
+#define WT_STAT_CONN_CURSOR_RESERVE 1141
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1143
+#define WT_STAT_CONN_CURSOR_RESET 1142
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1144
+#define WT_STAT_CONN_CURSOR_SEARCH 1143
/*! cursor: cursor search history store calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_HS 1145
+#define WT_STAT_CONN_CURSOR_SEARCH_HS 1144
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1146
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1145
/*! cursor: cursor sweep buckets */
-#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1147
+#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1146
/*! cursor: cursor sweep cursors closed */
-#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1148
+#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1147
/*! cursor: cursor sweep cursors examined */
-#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1149
+#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1148
/*! cursor: cursor sweeps */
-#define WT_STAT_CONN_CURSOR_SWEEP 1150
+#define WT_STAT_CONN_CURSOR_SWEEP 1149
/*! cursor: cursor truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1151
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1150
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1152
+#define WT_STAT_CONN_CURSOR_UPDATE 1151
/*! cursor: cursor update key and value bytes */
-#define WT_STAT_CONN_CURSOR_UPDATE_BYTES 1153
+#define WT_STAT_CONN_CURSOR_UPDATE_BYTES 1152
/*! cursor: cursor update value size change */
-#define WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED 1154
+#define WT_STAT_CONN_CURSOR_UPDATE_BYTES_CHANGED 1153
/*! cursor: cursors reused from cache */
-#define WT_STAT_CONN_CURSOR_REOPEN 1155
+#define WT_STAT_CONN_CURSOR_REOPEN 1154
/*! data-handle: connection data handle size */
-#define WT_STAT_CONN_DH_CONN_HANDLE_SIZE 1156
+#define WT_STAT_CONN_DH_CONN_HANDLE_SIZE 1155
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1157
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1156
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1158
+#define WT_STAT_CONN_DH_SWEEP_REF 1157
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1159
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1158
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1160
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1159
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1161
+#define WT_STAT_CONN_DH_SWEEP_TOD 1160
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1162
+#define WT_STAT_CONN_DH_SWEEPS 1161
/*!
* data-handle: connection sweeps skipped due to checkpoint gathering
* handles
*/
-#define WT_STAT_CONN_DH_SWEEP_SKIP_CKPT 1163
+#define WT_STAT_CONN_DH_SWEEP_SKIP_CKPT 1162
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1164
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1163
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1165
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1164
/*! lock: checkpoint lock acquisitions */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1166
+#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1165
/*! lock: checkpoint lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1167
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1166
/*! lock: checkpoint lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1168
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1167
/*! lock: dhandle lock application thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1169
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1168
/*! lock: dhandle lock internal thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1170
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1169
/*! lock: dhandle read lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1171
+#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1170
/*! lock: dhandle write lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1172
+#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1171
/*!
* lock: durable timestamp queue lock application thread time waiting
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_APPLICATION 1173
+#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_APPLICATION 1172
/*!
* lock: durable timestamp queue lock internal thread time waiting
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_INTERNAL 1174
+#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WAIT_INTERNAL 1173
/*! lock: durable timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_READ_COUNT 1175
+#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_READ_COUNT 1174
/*! lock: durable timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WRITE_COUNT 1176
+#define WT_STAT_CONN_LOCK_DURABLE_TIMESTAMP_WRITE_COUNT 1175
/*! lock: metadata lock acquisitions */
-#define WT_STAT_CONN_LOCK_METADATA_COUNT 1177
+#define WT_STAT_CONN_LOCK_METADATA_COUNT 1176
/*! lock: metadata lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1178
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1177
/*! lock: metadata lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1179
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1178
/*!
* lock: read timestamp queue lock application thread time waiting
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1180
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1179
/*! lock: read timestamp queue lock internal thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1181
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1180
/*! lock: read timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1182
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1181
/*! lock: read timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1183
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1182
/*! lock: schema lock acquisitions */
-#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1184
+#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1183
/*! lock: schema lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1185
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1184
/*! lock: schema lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1186
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1185
/*!
* lock: table lock application thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1187
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1186
/*!
* lock: table lock internal thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1188
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1187
/*! lock: table read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1189
+#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1188
/*! lock: table write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1190
+#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1189
/*! lock: txn global lock application thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1191
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1190
/*! lock: txn global lock internal thread time waiting (usecs) */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1192
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1191
/*! lock: txn global read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1193
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1192
/*! lock: txn global write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1194
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1193
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1195
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1194
/*! log: force archive time sleeping (usecs) */
-#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1196
+#define WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP 1195
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1197
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1196
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1198
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1197
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1199
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1198
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1200
+#define WT_STAT_CONN_LOG_FLUSH 1199
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1201
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1200
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1202
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1201
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1203
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1202
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1204
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1203
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1205
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1204
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1206
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1205
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1207
+#define WT_STAT_CONN_LOG_SCANS 1206
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1208
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1207
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1209
+#define WT_STAT_CONN_LOG_WRITE_LSN 1208
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1210
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1209
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1211
+#define WT_STAT_CONN_LOG_SYNC 1210
/*! log: log sync time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DURATION 1212
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1211
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1213
+#define WT_STAT_CONN_LOG_SYNC_DIR 1212
/*! log: log sync_dir time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1214
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1213
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1215
+#define WT_STAT_CONN_LOG_WRITES 1214
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1216
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1215
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1217
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1216
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1218
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1217
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1219
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1218
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1220
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1219
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1221
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1220
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1222
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1221
/*! log: slot close lost race */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1223
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1222
/*! log: slot close unbuffered waits */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1224
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1223
/*! log: slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1225
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1224
/*! log: slot join atomic update races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1226
+#define WT_STAT_CONN_LOG_SLOT_RACES 1225
/*! log: slot join calls atomic updates raced */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1227
+#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1226
/*! log: slot join calls did not yield */
-#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1228
+#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1227
/*! log: slot join calls found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1229
+#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1228
/*! log: slot join calls slept */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1230
+#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1229
/*! log: slot join calls yielded */
-#define WT_STAT_CONN_LOG_SLOT_YIELD 1231
+#define WT_STAT_CONN_LOG_SLOT_YIELD 1230
/*! log: slot join found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1232
+#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1231
/*! log: slot joins yield time (usecs) */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1233
+#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1232
/*! log: slot transitions unable to find free slot */
-#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1234
+#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1233
/*! log: slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1235
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1234
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1236
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1235
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1237
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1236
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1238
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1237
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1239
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1238
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1240
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1239
/*! perf: file system read latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1241
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1240
/*! perf: file system read latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1242
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1241
/*! perf: file system read latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1243
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1242
/*! perf: file system read latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1244
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1243
/*! perf: file system read latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1245
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1244
/*! perf: file system read latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1246
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1245
/*! perf: file system write latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1247
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1246
/*! perf: file system write latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1248
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1247
/*! perf: file system write latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1249
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1248
/*! perf: file system write latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1250
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1249
/*! perf: file system write latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1251
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1250
/*! perf: file system write latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1252
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1251
/*! perf: operation read latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1253
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1252
/*! perf: operation read latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1254
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1253
/*! perf: operation read latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1255
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1254
/*! perf: operation read latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1256
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1255
/*! perf: operation read latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1257
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1256
/*! perf: operation write latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1258
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1257
/*! perf: operation write latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1259
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1258
/*! perf: operation write latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1260
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1259
/*! perf: operation write latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1261
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1260
/*! perf: operation write latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1262
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1261
/*! reconciliation: internal-page overflow keys */
-#define WT_STAT_CONN_REC_OVERFLOW_KEY_INTERNAL 1263
+#define WT_STAT_CONN_REC_OVERFLOW_KEY_INTERNAL 1262
/*! reconciliation: leaf-page overflow keys */
-#define WT_STAT_CONN_REC_OVERFLOW_KEY_LEAF 1264
+#define WT_STAT_CONN_REC_OVERFLOW_KEY_LEAF 1263
/*! reconciliation: maximum seconds spent in a reconciliation call */
-#define WT_STAT_CONN_REC_MAXIMUM_SECONDS 1265
+#define WT_STAT_CONN_REC_MAXIMUM_SECONDS 1264
/*!
* reconciliation: page reconciliation calls that resulted in values with
* prepared transaction metadata
*/
-#define WT_STAT_CONN_REC_PAGES_WITH_PREPARE 1266
+#define WT_STAT_CONN_REC_PAGES_WITH_PREPARE 1265
/*!
* reconciliation: page reconciliation calls that resulted in values with
* timestamps
*/
-#define WT_STAT_CONN_REC_PAGES_WITH_TS 1267
+#define WT_STAT_CONN_REC_PAGES_WITH_TS 1266
/*!
* reconciliation: page reconciliation calls that resulted in values with
* transaction ids
*/
-#define WT_STAT_CONN_REC_PAGES_WITH_TXN 1268
+#define WT_STAT_CONN_REC_PAGES_WITH_TXN 1267
/*! reconciliation: pages written including at least one prepare state */
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_PREPARED 1269
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_PREPARED 1268
/*! reconciliation: pages written including at least one start timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TS 1270
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TS 1269
/*! reconciliation: records written including a prepare state */
-#define WT_STAT_CONN_REC_TIME_WINDOW_PREPARED 1271
+#define WT_STAT_CONN_REC_TIME_WINDOW_PREPARED 1270
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1272
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1271
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1273
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1272
/*! session: flush_tier operation calls */
-#define WT_STAT_CONN_FLUSH_TIER 1274
+#define WT_STAT_CONN_FLUSH_TIER 1273
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1275
+#define WT_STAT_CONN_SESSION_OPEN 1274
/*! session: session query timestamp calls */
-#define WT_STAT_CONN_SESSION_QUERY_TS 1276
+#define WT_STAT_CONN_SESSION_QUERY_TS 1275
/*! session: table alter failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1277
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1276
/*! session: table alter successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1278
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1277
/*! session: table alter unchanged and skipped */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1279
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1278
/*! session: table compact failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1280
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1279
/*! session: table compact successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1281
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1280
/*! session: table create failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1282
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1281
/*! session: table create successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1283
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1282
/*! session: table drop failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1284
+#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1283
/*! session: table drop successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1285
+#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1284
/*! session: table rename failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1286
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1285
/*! session: table rename successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1287
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1286
/*! session: table salvage failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1288
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1287
/*! session: table salvage successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1289
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1288
/*! session: table truncate failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1290
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1289
/*! session: table truncate successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1291
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1290
/*! session: table verify failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1292
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1291
/*! session: table verify successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1293
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1292
/*! thread-state: active filesystem fsync calls */
-#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1294
+#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1293
/*! thread-state: active filesystem read calls */
-#define WT_STAT_CONN_THREAD_READ_ACTIVE 1295
+#define WT_STAT_CONN_THREAD_READ_ACTIVE 1294
/*! thread-state: active filesystem write calls */
-#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1296
+#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1295
/*! thread-yield: application thread time evicting (usecs) */
-#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1297
+#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1296
/*! thread-yield: application thread time waiting for cache (usecs) */
-#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1298
+#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1297
/*!
* thread-yield: connection close blocked waiting for transaction state
* stabilization
*/
-#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1299
+#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1298
/*! thread-yield: connection close yielded for lsm manager shutdown */
-#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1300
+#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1299
/*! thread-yield: data handle lock yielded */
-#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1301
+#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1300
/*!
* thread-yield: get reference for page index and slot time sleeping
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1302
+#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1301
/*! thread-yield: log server sync yielded for log write */
-#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1303
+#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1302
/*! thread-yield: page access yielded due to prepare state change */
-#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1304
+#define WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE 1303
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1305
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1304
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1306
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1305
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1307
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1306
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1308
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1307
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1309
+#define WT_STAT_CONN_PAGE_SLEEP 1308
/*!
* thread-yield: page delete rollback time sleeping for state change
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1310
+#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1309
/*! thread-yield: page reconciliation yielded due to child modification */
-#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1311
+#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1310
/*! transaction: Number of prepared updates */
-#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1312
+#define WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT 1311
/*! transaction: prepared transactions */
-#define WT_STAT_CONN_TXN_PREPARE 1313
+#define WT_STAT_CONN_TXN_PREPARE 1312
/*! transaction: prepared transactions committed */
-#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1314
+#define WT_STAT_CONN_TXN_PREPARE_COMMIT 1313
/*! transaction: prepared transactions currently active */
-#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1315
+#define WT_STAT_CONN_TXN_PREPARE_ACTIVE 1314
/*! transaction: prepared transactions rolled back */
-#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1316
+#define WT_STAT_CONN_TXN_PREPARE_ROLLBACK 1315
/*! transaction: query timestamp calls */
-#define WT_STAT_CONN_TXN_QUERY_TS 1317
+#define WT_STAT_CONN_TXN_QUERY_TS 1316
/*! transaction: rollback to stable calls */
-#define WT_STAT_CONN_TXN_RTS 1318
+#define WT_STAT_CONN_TXN_RTS 1317
/*! transaction: rollback to stable pages visited */
-#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1319
+#define WT_STAT_CONN_TXN_RTS_PAGES_VISITED 1318
/*! transaction: rollback to stable tree walk skipping pages */
-#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1320
+#define WT_STAT_CONN_TXN_RTS_TREE_WALK_SKIP_PAGES 1319
/*! transaction: rollback to stable updates aborted */
-#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1321
+#define WT_STAT_CONN_TXN_RTS_UPD_ABORTED 1320
+/*! transaction: sessions scanned in each walk of concurrent sessions */
+#define WT_STAT_CONN_TXN_SESSIONS_WALKED 1321
/*! transaction: set timestamp calls */
#define WT_STAT_CONN_TXN_SET_TS 1322
/*! transaction: set timestamp durable calls */
@@ -5777,396 +5658,406 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_TXN_BEGIN 1329
/*! transaction: transaction checkpoint currently running */
#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1330
+/*!
+ * transaction: transaction checkpoint currently running for history
+ * store file
+ */
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING_HS 1331
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1331
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1332
/*!
* transaction: transaction checkpoint history store file duration
* (usecs)
*/
-#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1332
+#define WT_STAT_CONN_TXN_HS_CKPT_DURATION 1333
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1333
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1334
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1334
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1335
/*!
* transaction: transaction checkpoint most recent duration for gathering
* all handles (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1335
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION 1336
/*!
* transaction: transaction checkpoint most recent duration for gathering
* applied handles (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1336
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_APPLY 1337
/*!
* transaction: transaction checkpoint most recent duration for gathering
* skipped handles (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1337
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_DURATION_SKIP 1338
/*! transaction: transaction checkpoint most recent handles applied */
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1338
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_APPLIED 1339
/*! transaction: transaction checkpoint most recent handles skipped */
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1339
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_SKIPPED 1340
/*! transaction: transaction checkpoint most recent handles walked */
-#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1340
+#define WT_STAT_CONN_TXN_CHECKPOINT_HANDLE_WALKED 1341
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1341
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1342
/*! transaction: transaction checkpoint prepare currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1342
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RUNNING 1343
/*! transaction: transaction checkpoint prepare max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1343
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MAX 1344
/*! transaction: transaction checkpoint prepare min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1344
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_MIN 1345
/*! transaction: transaction checkpoint prepare most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1345
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_RECENT 1346
/*! transaction: transaction checkpoint prepare total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1346
+#define WT_STAT_CONN_TXN_CHECKPOINT_PREP_TOTAL 1347
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1347
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1348
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1348
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1349
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1349
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1350
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1350
+#define WT_STAT_CONN_TXN_CHECKPOINT 1351
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1351
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1352
/*! transaction: transaction failures due to history store */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1352
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1353
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1353
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1354
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1354
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1355
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1355
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1356
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1356
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1357
/*! transaction: transaction range of timestamps currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1357
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1358
/*! transaction: transaction range of timestamps pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1358
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1359
/*!
* transaction: transaction range of timestamps pinned by the oldest
* active read timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1359
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1360
/*!
* transaction: transaction range of timestamps pinned by the oldest
* timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1360
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1361
/*! transaction: transaction read timestamp of the oldest active reader */
-#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1361
+#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1362
/*! transaction: transaction rollback to stable currently running */
-#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING 1362
+#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE_RUNNING 1363
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1363
+#define WT_STAT_CONN_TXN_SYNC 1364
/*! transaction: transaction walk of concurrent sessions */
-#define WT_STAT_CONN_TXN_WALK_SESSIONS 1364
+#define WT_STAT_CONN_TXN_WALK_SESSIONS 1365
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1365
+#define WT_STAT_CONN_TXN_COMMIT 1366
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1366
+#define WT_STAT_CONN_TXN_ROLLBACK 1367
/*! LSM: sleep for LSM checkpoint throttle */
-#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1367
+#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1368
/*! LSM: sleep for LSM merge throttle */
-#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1368
+#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1369
/*! cache: bytes currently in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INUSE 1369
+#define WT_STAT_CONN_CACHE_BYTES_INUSE 1370
/*! cache: bytes dirty in the cache cumulative */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY_TOTAL 1370
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY_TOTAL 1371
/*! cache: bytes read into cache */
-#define WT_STAT_CONN_CACHE_BYTES_READ 1371
+#define WT_STAT_CONN_CACHE_BYTES_READ 1372
/*! cache: bytes written from cache */
-#define WT_STAT_CONN_CACHE_BYTES_WRITE 1372
+#define WT_STAT_CONN_CACHE_BYTES_WRITE 1373
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1373
+#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1374
+/*!
+ * cache: checkpoint of history store file blocked non-history store page
+ * eviction
+ */
+#define WT_STAT_CONN_CACHE_EVICTION_BLOCKED_CHECKPOINT_HS 1375
/*! cache: eviction walk target pages histogram - 0-9 */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT10 1374
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT10 1376
/*! cache: eviction walk target pages histogram - 10-31 */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT32 1375
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT32 1377
/*! cache: eviction walk target pages histogram - 128 and higher */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_GE128 1376
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_GE128 1378
/*! cache: eviction walk target pages histogram - 32-63 */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT64 1377
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT64 1379
/*! cache: eviction walk target pages histogram - 64-128 */
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT128 1378
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT128 1380
/*!
* cache: eviction walk target pages reduced due to history store cache
* pressure
*/
-#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_REDUCED 1379
+#define WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_REDUCED 1381
/*! cache: eviction walks abandoned */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1380
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1382
/*! cache: eviction walks gave up because they restarted their walk twice */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STOPPED 1381
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STOPPED 1383
/*!
* cache: eviction walks gave up because they saw too many pages and
* found no candidates
*/
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 1382
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 1384
/*!
* cache: eviction walks gave up because they saw too many pages and
* found too few candidates
*/
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 1383
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 1385
/*! cache: eviction walks reached end of tree */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ENDED 1384
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ENDED 1386
/*! cache: eviction walks restarted */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK_RESTART 1385
+#define WT_STAT_CONN_CACHE_EVICTION_WALK_RESTART 1387
/*! cache: eviction walks started from root of tree */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT 1386
+#define WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT 1388
/*! cache: eviction walks started from saved location in tree */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS 1387
+#define WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS 1389
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1388
+#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1390
/*! cache: history store table insert calls */
-#define WT_STAT_CONN_CACHE_HS_INSERT 1389
+#define WT_STAT_CONN_CACHE_HS_INSERT 1391
/*! cache: history store table insert calls that returned restart */
-#define WT_STAT_CONN_CACHE_HS_INSERT_RESTART 1390
+#define WT_STAT_CONN_CACHE_HS_INSERT_RESTART 1392
/*!
* cache: history store table out-of-order resolved updates that lose
* their durable timestamp
*/
-#define WT_STAT_CONN_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 1391
+#define WT_STAT_CONN_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 1393
/*!
* cache: history store table out-of-order updates that were fixed up by
- * moving existing records
- */
-#define WT_STAT_CONN_CACHE_HS_ORDER_FIXUP_MOVE 1392
-/*!
- * cache: history store table out-of-order updates that were fixed up
- * during insertion
+ * reinserting with the fixed timestamp
*/
-#define WT_STAT_CONN_CACHE_HS_ORDER_FIXUP_INSERT 1393
+#define WT_STAT_CONN_CACHE_HS_ORDER_REINSERT 1394
/*! cache: history store table reads */
-#define WT_STAT_CONN_CACHE_HS_READ 1394
+#define WT_STAT_CONN_CACHE_HS_READ 1395
/*! cache: history store table reads missed */
-#define WT_STAT_CONN_CACHE_HS_READ_MISS 1395
+#define WT_STAT_CONN_CACHE_HS_READ_MISS 1396
/*! cache: history store table reads requiring squashed modifies */
-#define WT_STAT_CONN_CACHE_HS_READ_SQUASH 1396
+#define WT_STAT_CONN_CACHE_HS_READ_SQUASH 1397
/*!
* cache: history store table truncation by rollback to stable to remove
* an unstable update
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS_UNSTABLE 1397
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS_UNSTABLE 1398
/*!
* cache: history store table truncation by rollback to stable to remove
* an update
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS 1398
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_RTS 1399
/*! cache: history store table truncation to remove an update */
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE 1399
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE 1400
/*!
* cache: history store table truncation to remove range of updates due
* to key being removed from the data page during reconciliation
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 1400
+#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 1401
/*!
* cache: history store table truncation to remove range of updates due
- * to non timestamped update on data page
+ * to out-of-order timestamp update on data page
*/
-#define WT_STAT_CONN_CACHE_HS_KEY_TRUNCATE_NON_TS 1401
+#define WT_STAT_CONN_CACHE_HS_ORDER_REMOVE 1402
/*! cache: history store table writes requiring squashed modifies */
-#define WT_STAT_CONN_CACHE_HS_WRITE_SQUASH 1402
+#define WT_STAT_CONN_CACHE_HS_WRITE_SQUASH 1403
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1403
+#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1404
/*! cache: in-memory page splits */
-#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1404
+#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1405
/*! cache: internal pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1405
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1406
/*! cache: internal pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1406
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1407
/*! cache: leaf pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1407
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1408
/*! cache: modified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1408
+#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1409
/*! cache: overflow pages read into cache */
-#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1409
+#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1410
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1410
+#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1411
/*! cache: page written requiring history store records */
-#define WT_STAT_CONN_CACHE_WRITE_HS 1411
+#define WT_STAT_CONN_CACHE_WRITE_HS 1412
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1412
+#define WT_STAT_CONN_CACHE_READ 1413
/*! cache: pages read into cache after truncate */
-#define WT_STAT_CONN_CACHE_READ_DELETED 1413
+#define WT_STAT_CONN_CACHE_READ_DELETED 1414
/*! cache: pages read into cache after truncate in prepare state */
-#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1414
+#define WT_STAT_CONN_CACHE_READ_DELETED_PREPARED 1415
/*! cache: pages requested from the cache */
-#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1415
+#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1416
/*! cache: pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1416
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1417
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1417
+#define WT_STAT_CONN_CACHE_WRITE 1418
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1418
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1419
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1419
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1420
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1420
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1421
/*! checkpoint-cleanup: pages added for eviction */
-#define WT_STAT_CONN_CC_PAGES_EVICT 1421
+#define WT_STAT_CONN_CC_PAGES_EVICT 1422
/*! checkpoint-cleanup: pages removed */
-#define WT_STAT_CONN_CC_PAGES_REMOVED 1422
+#define WT_STAT_CONN_CC_PAGES_REMOVED 1423
/*! checkpoint-cleanup: pages skipped during tree walk */
-#define WT_STAT_CONN_CC_PAGES_WALK_SKIPPED 1423
+#define WT_STAT_CONN_CC_PAGES_WALK_SKIPPED 1424
/*! checkpoint-cleanup: pages visited */
-#define WT_STAT_CONN_CC_PAGES_VISITED 1424
+#define WT_STAT_CONN_CC_PAGES_VISITED 1425
/*! cursor: Total number of entries skipped by cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT_SKIP_TOTAL 1425
+#define WT_STAT_CONN_CURSOR_NEXT_SKIP_TOTAL 1426
/*! cursor: Total number of entries skipped by cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV_SKIP_TOTAL 1426
+#define WT_STAT_CONN_CURSOR_PREV_SKIP_TOTAL 1427
/*!
* cursor: Total number of entries skipped to position the history store
* cursor
*/
-#define WT_STAT_CONN_CURSOR_SKIP_HS_CUR_POSITION 1427
+#define WT_STAT_CONN_CURSOR_SKIP_HS_CUR_POSITION 1428
+/*!
+ * cursor: Total number of times a search near has exited due to prefix
+ * config
+ */
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR_PREFIX_FAST_PATHS 1429
/*!
* cursor: cursor next calls that skip due to a globally visible history
* store tombstone
*/
-#define WT_STAT_CONN_CURSOR_NEXT_HS_TOMBSTONE 1428
+#define WT_STAT_CONN_CURSOR_NEXT_HS_TOMBSTONE 1430
/*!
* cursor: cursor next calls that skip greater than or equal to 100
* entries
*/
-#define WT_STAT_CONN_CURSOR_NEXT_SKIP_GE_100 1429
+#define WT_STAT_CONN_CURSOR_NEXT_SKIP_GE_100 1431
/*! cursor: cursor next calls that skip less than 100 entries */
-#define WT_STAT_CONN_CURSOR_NEXT_SKIP_LT_100 1430
+#define WT_STAT_CONN_CURSOR_NEXT_SKIP_LT_100 1432
/*!
* cursor: cursor prev calls that skip due to a globally visible history
* store tombstone
*/
-#define WT_STAT_CONN_CURSOR_PREV_HS_TOMBSTONE 1431
+#define WT_STAT_CONN_CURSOR_PREV_HS_TOMBSTONE 1433
/*!
* cursor: cursor prev calls that skip greater than or equal to 100
* entries
*/
-#define WT_STAT_CONN_CURSOR_PREV_SKIP_GE_100 1432
+#define WT_STAT_CONN_CURSOR_PREV_SKIP_GE_100 1434
/*! cursor: cursor prev calls that skip less than 100 entries */
-#define WT_STAT_CONN_CURSOR_PREV_SKIP_LT_100 1433
+#define WT_STAT_CONN_CURSOR_PREV_SKIP_LT_100 1435
/*! cursor: open cursor count */
-#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1434
+#define WT_STAT_CONN_CURSOR_OPEN_COUNT 1436
/*! reconciliation: approximate byte size of timestamps in pages written */
-#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TS 1435
+#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TS 1437
/*!
* reconciliation: approximate byte size of transaction IDs in pages
* written
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TXN 1436
+#define WT_STAT_CONN_REC_TIME_WINDOW_BYTES_TXN 1438
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1437
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1439
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1438
+#define WT_STAT_CONN_REC_PAGES 1440
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1439
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1441
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1440
+#define WT_STAT_CONN_REC_PAGE_DELETE 1442
/*!
* reconciliation: pages written including an aggregated newest start
* durable timestamp
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 1441
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 1443
/*!
* reconciliation: pages written including an aggregated newest stop
* durable timestamp
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 1442
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 1444
/*!
* reconciliation: pages written including an aggregated newest stop
* timestamp
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TS 1443
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TS 1445
/*!
* reconciliation: pages written including an aggregated newest stop
* transaction ID
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TXN 1444
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_STOP_TXN 1446
/*!
* reconciliation: pages written including an aggregated newest
* transaction ID
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_TXN 1445
+#define WT_STAT_CONN_REC_TIME_AGGR_NEWEST_TXN 1447
/*!
* reconciliation: pages written including an aggregated oldest start
* timestamp
*/
-#define WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TS 1446
+#define WT_STAT_CONN_REC_TIME_AGGR_OLDEST_START_TS 1448
/*! reconciliation: pages written including an aggregated prepare */
-#define WT_STAT_CONN_REC_TIME_AGGR_PREPARED 1447
+#define WT_STAT_CONN_REC_TIME_AGGR_PREPARED 1449
/*!
* reconciliation: pages written including at least one start durable
* timestamp
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 1448
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 1450
/*!
* reconciliation: pages written including at least one start transaction
* ID
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TXN 1449
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_START_TXN 1451
/*!
* reconciliation: pages written including at least one stop durable
* timestamp
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 1450
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 1452
/*! reconciliation: pages written including at least one stop timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TS 1451
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TS 1453
/*!
* reconciliation: pages written including at least one stop transaction
* ID
*/
-#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TXN 1452
+#define WT_STAT_CONN_REC_TIME_WINDOW_PAGES_STOP_TXN 1454
/*! reconciliation: records written including a start durable timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_START_TS 1453
+#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_START_TS 1455
/*! reconciliation: records written including a start timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_START_TS 1454
+#define WT_STAT_CONN_REC_TIME_WINDOW_START_TS 1456
/*! reconciliation: records written including a start transaction ID */
-#define WT_STAT_CONN_REC_TIME_WINDOW_START_TXN 1455
+#define WT_STAT_CONN_REC_TIME_WINDOW_START_TXN 1457
/*! reconciliation: records written including a stop durable timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_STOP_TS 1456
+#define WT_STAT_CONN_REC_TIME_WINDOW_DURABLE_STOP_TS 1458
/*! reconciliation: records written including a stop timestamp */
-#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TS 1457
+#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TS 1459
/*! reconciliation: records written including a stop transaction ID */
-#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TXN 1458
+#define WT_STAT_CONN_REC_TIME_WINDOW_STOP_TXN 1460
/*! session: tiered storage local retention time (secs) */
-#define WT_STAT_CONN_TIERED_RETENTION 1459
+#define WT_STAT_CONN_TIERED_RETENTION 1461
/*! session: tiered storage object size */
-#define WT_STAT_CONN_TIERED_OBJECT_SIZE 1460
+#define WT_STAT_CONN_TIERED_OBJECT_SIZE 1462
/*! transaction: race to read prepared update retry */
-#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1461
+#define WT_STAT_CONN_TXN_READ_RACE_PREPARE_UPDATE 1463
/*!
* transaction: rollback to stable history store records with stop
* timestamps older than newer records
*/
-#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1462
+#define WT_STAT_CONN_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 1464
/*! transaction: rollback to stable inconsistent checkpoint */
-#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1463
+#define WT_STAT_CONN_TXN_RTS_INCONSISTENT_CKPT 1465
/*! transaction: rollback to stable keys removed */
-#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1464
+#define WT_STAT_CONN_TXN_RTS_KEYS_REMOVED 1466
/*! transaction: rollback to stable keys restored */
-#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1465
+#define WT_STAT_CONN_TXN_RTS_KEYS_RESTORED 1467
/*! transaction: rollback to stable restored tombstones from history store */
-#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1466
+#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_TOMBSTONES 1468
/*! transaction: rollback to stable restored updates from history store */
-#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1467
+#define WT_STAT_CONN_TXN_RTS_HS_RESTORE_UPDATES 1469
/*! transaction: rollback to stable sweeping history store keys */
-#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1468
+#define WT_STAT_CONN_TXN_RTS_SWEEP_HS_KEYS 1470
/*! transaction: rollback to stable updates removed from history store */
-#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1469
+#define WT_STAT_CONN_TXN_RTS_HS_REMOVED 1471
/*! transaction: transaction checkpoints due to obsolete pages */
-#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1470
+#define WT_STAT_CONN_TXN_CHECKPOINT_OBSOLETE_APPLIED 1472
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1471
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1473
/*!
* @}
@@ -6509,64 +6400,64 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2111
/*! cache: checkpoint blocked page eviction */
#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2112
+/*!
+ * cache: checkpoint of history store file blocked non-history store page
+ * eviction
+ */
+#define WT_STAT_DSRC_CACHE_EVICTION_BLOCKED_CHECKPOINT_HS 2113
/*! cache: eviction walk target pages histogram - 0-9 */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT10 2113
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT10 2114
/*! cache: eviction walk target pages histogram - 10-31 */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT32 2114
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT32 2115
/*! cache: eviction walk target pages histogram - 128 and higher */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_GE128 2115
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_GE128 2116
/*! cache: eviction walk target pages histogram - 32-63 */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT64 2116
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT64 2117
/*! cache: eviction walk target pages histogram - 64-128 */
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT128 2117
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_LT128 2118
/*!
* cache: eviction walk target pages reduced due to history store cache
* pressure
*/
-#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_REDUCED 2118
+#define WT_STAT_DSRC_CACHE_EVICTION_TARGET_PAGE_REDUCED 2119
/*! cache: eviction walks abandoned */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ABANDONED 2119
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ABANDONED 2120
/*! cache: eviction walks gave up because they restarted their walk twice */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_STOPPED 2120
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_STOPPED 2121
/*!
* cache: eviction walks gave up because they saw too many pages and
* found no candidates
*/
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 2121
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS 2122
/*!
* cache: eviction walks gave up because they saw too many pages and
* found too few candidates
*/
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 2122
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_GAVE_UP_RATIO 2123
/*! cache: eviction walks reached end of tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ENDED 2123
+#define WT_STAT_DSRC_CACHE_EVICTION_WALKS_ENDED 2124
/*! cache: eviction walks restarted */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALK_RESTART 2124
+#define WT_STAT_DSRC_CACHE_EVICTION_WALK_RESTART 2125
/*! cache: eviction walks started from root of tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALK_FROM_ROOT 2125
+#define WT_STAT_DSRC_CACHE_EVICTION_WALK_FROM_ROOT 2126
/*! cache: eviction walks started from saved location in tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_WALK_SAVED_POS 2126
+#define WT_STAT_DSRC_CACHE_EVICTION_WALK_SAVED_POS 2127
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2127
+#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2128
/*! cache: history store table insert calls */
-#define WT_STAT_DSRC_CACHE_HS_INSERT 2128
+#define WT_STAT_DSRC_CACHE_HS_INSERT 2129
/*! cache: history store table insert calls that returned restart */
-#define WT_STAT_DSRC_CACHE_HS_INSERT_RESTART 2129
+#define WT_STAT_DSRC_CACHE_HS_INSERT_RESTART 2130
/*!
* cache: history store table out-of-order resolved updates that lose
* their durable timestamp
*/
-#define WT_STAT_DSRC_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 2130
+#define WT_STAT_DSRC_CACHE_HS_ORDER_LOSE_DURABLE_TIMESTAMP 2131
/*!
* cache: history store table out-of-order updates that were fixed up by
- * moving existing records
- */
-#define WT_STAT_DSRC_CACHE_HS_ORDER_FIXUP_MOVE 2131
-/*!
- * cache: history store table out-of-order updates that were fixed up
- * during insertion
+ * reinserting with the fixed timestamp
*/
-#define WT_STAT_DSRC_CACHE_HS_ORDER_FIXUP_INSERT 2132
+#define WT_STAT_DSRC_CACHE_HS_ORDER_REINSERT 2132
/*! cache: history store table reads */
#define WT_STAT_DSRC_CACHE_HS_READ 2133
/*! cache: history store table reads missed */
@@ -6592,9 +6483,9 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_CACHE_HS_KEY_TRUNCATE_ONPAGE_REMOVAL 2139
/*!
* cache: history store table truncation to remove range of updates due
- * to non timestamped update on data page
+ * to out-of-order timestamp update on data page
*/
-#define WT_STAT_DSRC_CACHE_HS_KEY_TRUNCATE_NON_TS 2140
+#define WT_STAT_DSRC_CACHE_HS_ORDER_REMOVE 2140
/*! cache: history store table writes requiring squashed modifies */
#define WT_STAT_DSRC_CACHE_HS_WRITE_SQUASH 2141
/*! cache: in-memory page passed criteria to be split */
@@ -6651,141 +6542,146 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
*/
#define WT_STAT_DSRC_CURSOR_SKIP_HS_CUR_POSITION 2166
/*!
+ * cursor: Total number of times a search near has exited due to prefix
+ * config
+ */
+#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR_PREFIX_FAST_PATHS 2167
+/*!
* cursor: cursor next calls that skip due to a globally visible history
* store tombstone
*/
-#define WT_STAT_DSRC_CURSOR_NEXT_HS_TOMBSTONE 2167
+#define WT_STAT_DSRC_CURSOR_NEXT_HS_TOMBSTONE 2168
/*!
* cursor: cursor next calls that skip greater than or equal to 100
* entries
*/
-#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_GE_100 2168
+#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_GE_100 2169
/*! cursor: cursor next calls that skip less than 100 entries */
-#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_LT_100 2169
+#define WT_STAT_DSRC_CURSOR_NEXT_SKIP_LT_100 2170
/*!
* cursor: cursor prev calls that skip due to a globally visible history
* store tombstone
*/
-#define WT_STAT_DSRC_CURSOR_PREV_HS_TOMBSTONE 2170
+#define WT_STAT_DSRC_CURSOR_PREV_HS_TOMBSTONE 2171
/*!
* cursor: cursor prev calls that skip greater than or equal to 100
* entries
*/
-#define WT_STAT_DSRC_CURSOR_PREV_SKIP_GE_100 2171
+#define WT_STAT_DSRC_CURSOR_PREV_SKIP_GE_100 2172
/*! cursor: cursor prev calls that skip less than 100 entries */
-#define WT_STAT_DSRC_CURSOR_PREV_SKIP_LT_100 2172
+#define WT_STAT_DSRC_CURSOR_PREV_SKIP_LT_100 2173
/*! cursor: open cursor count */
-#define WT_STAT_DSRC_CURSOR_OPEN_COUNT 2173
+#define WT_STAT_DSRC_CURSOR_OPEN_COUNT 2174
/*! reconciliation: approximate byte size of timestamps in pages written */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TS 2174
+#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TS 2175
/*!
* reconciliation: approximate byte size of transaction IDs in pages
* written
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TXN 2175
+#define WT_STAT_DSRC_REC_TIME_WINDOW_BYTES_TXN 2176
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2176
+#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2177
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_DSRC_REC_PAGES 2177
+#define WT_STAT_DSRC_REC_PAGES 2178
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_DSRC_REC_PAGES_EVICTION 2178
+#define WT_STAT_DSRC_REC_PAGES_EVICTION 2179
/*! reconciliation: pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE 2179
+#define WT_STAT_DSRC_REC_PAGE_DELETE 2180
/*!
* reconciliation: pages written including an aggregated newest start
* durable timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 2180
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_START_DURABLE_TS 2181
/*!
* reconciliation: pages written including an aggregated newest stop
* durable timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 2181
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_DURABLE_TS 2182
/*!
* reconciliation: pages written including an aggregated newest stop
* timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TS 2182
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TS 2183
/*!
* reconciliation: pages written including an aggregated newest stop
* transaction ID
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TXN 2183
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_STOP_TXN 2184
/*!
* reconciliation: pages written including an aggregated newest
* transaction ID
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_TXN 2184
+#define WT_STAT_DSRC_REC_TIME_AGGR_NEWEST_TXN 2185
/*!
* reconciliation: pages written including an aggregated oldest start
* timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_AGGR_OLDEST_START_TS 2185
+#define WT_STAT_DSRC_REC_TIME_AGGR_OLDEST_START_TS 2186
/*! reconciliation: pages written including an aggregated prepare */
-#define WT_STAT_DSRC_REC_TIME_AGGR_PREPARED 2186
+#define WT_STAT_DSRC_REC_TIME_AGGR_PREPARED 2187
/*!
* reconciliation: pages written including at least one start durable
* timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 2187
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_START_TS 2188
/*!
* reconciliation: pages written including at least one start transaction
* ID
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_START_TXN 2188
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_START_TXN 2189
/*!
* reconciliation: pages written including at least one stop durable
* timestamp
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 2189
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_DURABLE_STOP_TS 2190
/*! reconciliation: pages written including at least one stop timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TS 2190
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TS 2191
/*!
* reconciliation: pages written including at least one stop transaction
* ID
*/
-#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TXN 2191
+#define WT_STAT_DSRC_REC_TIME_WINDOW_PAGES_STOP_TXN 2192
/*! reconciliation: records written including a start durable timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_START_TS 2192
+#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_START_TS 2193
/*! reconciliation: records written including a start timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TS 2193
+#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TS 2194
/*! reconciliation: records written including a start transaction ID */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TXN 2194
+#define WT_STAT_DSRC_REC_TIME_WINDOW_START_TXN 2195
/*! reconciliation: records written including a stop durable timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_STOP_TS 2195
+#define WT_STAT_DSRC_REC_TIME_WINDOW_DURABLE_STOP_TS 2196
/*! reconciliation: records written including a stop timestamp */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TS 2196
+#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TS 2197
/*! reconciliation: records written including a stop transaction ID */
-#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TXN 2197
+#define WT_STAT_DSRC_REC_TIME_WINDOW_STOP_TXN 2198
/*! session: tiered storage local retention time (secs) */
-#define WT_STAT_DSRC_TIERED_RETENTION 2198
+#define WT_STAT_DSRC_TIERED_RETENTION 2199
/*! session: tiered storage object size */
-#define WT_STAT_DSRC_TIERED_OBJECT_SIZE 2199
+#define WT_STAT_DSRC_TIERED_OBJECT_SIZE 2200
/*! transaction: race to read prepared update retry */
-#define WT_STAT_DSRC_TXN_READ_RACE_PREPARE_UPDATE 2200
+#define WT_STAT_DSRC_TXN_READ_RACE_PREPARE_UPDATE 2201
/*!
* transaction: rollback to stable history store records with stop
* timestamps older than newer records
*/
-#define WT_STAT_DSRC_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 2201
+#define WT_STAT_DSRC_TXN_RTS_HS_STOP_OLDER_THAN_NEWER_START 2202
/*! transaction: rollback to stable inconsistent checkpoint */
-#define WT_STAT_DSRC_TXN_RTS_INCONSISTENT_CKPT 2202
+#define WT_STAT_DSRC_TXN_RTS_INCONSISTENT_CKPT 2203
/*! transaction: rollback to stable keys removed */
-#define WT_STAT_DSRC_TXN_RTS_KEYS_REMOVED 2203
+#define WT_STAT_DSRC_TXN_RTS_KEYS_REMOVED 2204
/*! transaction: rollback to stable keys restored */
-#define WT_STAT_DSRC_TXN_RTS_KEYS_RESTORED 2204
+#define WT_STAT_DSRC_TXN_RTS_KEYS_RESTORED 2205
/*! transaction: rollback to stable restored tombstones from history store */
-#define WT_STAT_DSRC_TXN_RTS_HS_RESTORE_TOMBSTONES 2205
+#define WT_STAT_DSRC_TXN_RTS_HS_RESTORE_TOMBSTONES 2206
/*! transaction: rollback to stable restored updates from history store */
-#define WT_STAT_DSRC_TXN_RTS_HS_RESTORE_UPDATES 2206
+#define WT_STAT_DSRC_TXN_RTS_HS_RESTORE_UPDATES 2207
/*! transaction: rollback to stable sweeping history store keys */
-#define WT_STAT_DSRC_TXN_RTS_SWEEP_HS_KEYS 2207
+#define WT_STAT_DSRC_TXN_RTS_SWEEP_HS_KEYS 2208
/*! transaction: rollback to stable updates removed from history store */
-#define WT_STAT_DSRC_TXN_RTS_HS_REMOVED 2208
+#define WT_STAT_DSRC_TXN_RTS_HS_REMOVED 2209
/*! transaction: transaction checkpoints due to obsolete pages */
-#define WT_STAT_DSRC_TXN_CHECKPOINT_OBSOLETE_APPLIED 2209
+#define WT_STAT_DSRC_TXN_CHECKPOINT_OBSOLETE_APPLIED 2210
/*! transaction: update conflicts */
-#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2210
+#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2211
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h
index 7251ea7fcc0..0efdc3bfefc 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger_ext.h
+++ b/src/third_party/wiredtiger/src/include/wiredtiger_ext.h
@@ -239,6 +239,21 @@ struct __wt_extension_api {
WT_CONFIG_ARG *config, WT_CONFIG_PARSER **config_parserp);
/*!
+ * Get the file system abstraction used by WiredTiger.
+ *
+ * @param wt_api the extension handle
+ * @param session the session handle (or NULL if none available)
+ * @param file_system the returned file system handle.
+ * @errors
+ * If called from an extension's initialization routine, this may
+ * return WT_NOTFOUND if the file system has not yet been established.
+ *
+ * @snippet ex_data_source.c WT_EXTENSION metadata insert
+ */
+ int (*file_system_get)(
+ WT_EXTENSION_API *wt_api, WT_SESSION *session, WT_FILE_SYSTEM **file_system);
+
+ /*!
* Insert a row into the metadata if it does not already exist.
*
* @param wt_api the extension handle
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 79b10aeda22..b29575e0628 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -239,8 +239,6 @@ struct __wt_lsm_worker_args;
typedef struct __wt_lsm_worker_args WT_LSM_WORKER_ARGS;
struct __wt_lsm_worker_cookie;
typedef struct __wt_lsm_worker_cookie WT_LSM_WORKER_COOKIE;
-struct __wt_modify_vector;
-typedef struct __wt_modify_vector WT_MODIFY_VECTOR;
struct __wt_multi;
typedef struct __wt_multi WT_MULTI;
struct __wt_myslot;
@@ -323,6 +321,12 @@ struct __wt_tiered;
typedef struct __wt_tiered WT_TIERED;
struct __wt_tiered_manager;
typedef struct __wt_tiered_manager WT_TIERED_MANAGER;
+struct __wt_tiered_object;
+typedef struct __wt_tiered_object WT_TIERED_OBJECT;
+struct __wt_tiered_tiers;
+typedef struct __wt_tiered_tiers WT_TIERED_TIERS;
+struct __wt_tiered_tree;
+typedef struct __wt_tiered_tree WT_TIERED_TREE;
struct __wt_time_aggregate;
typedef struct __wt_time_aggregate WT_TIME_AGGREGATE;
struct __wt_time_window;
@@ -341,6 +345,8 @@ struct __wt_update;
typedef struct __wt_update WT_UPDATE;
struct __wt_update_value;
typedef struct __wt_update_value WT_UPDATE_VALUE;
+struct __wt_update_vector;
+typedef struct __wt_update_vector WT_UPDATE_VECTOR;
union __wt_lsn;
typedef union __wt_lsn WT_LSN;
union __wt_rand_state;
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 9a2632da7c9..e95dde65807 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -157,7 +157,7 @@ __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
* If we're on a locked path and the write LSN is not advancing, unlock in case an earlier
* thread is trying to switch its slot and complete its operation.
*/
- if (F_ISSET(session, WT_SESSION_LOCKED_SLOT))
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT))
__wt_spin_unlock(session, &log->log_slot_lock);
/*
* This may not be initialized if we are starting at an older log file version. So only
@@ -169,7 +169,7 @@ __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
__wt_yield();
else
__wt_cond_wait(session, log->log_write_cond, 200, NULL);
- if (F_ISSET(session, WT_SESSION_LOCKED_SLOT))
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT))
__wt_spin_lock(session, &log->log_slot_lock);
}
}
@@ -1129,7 +1129,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
* write to the log. If the log file size is small we could fill a log file before the previous
* one is closed. Wait for that to close.
*/
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT));
for (yield_cnt = 0; log->log_close_fh != NULL;) {
WT_STAT_CONN_INCR(session, log_close_yields);
/*
@@ -1334,7 +1334,7 @@ __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot)
* the release LSN. That way when log files switch, we're waiting for the correct LSN from
* outstanding writes.
*/
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT));
/*
* We need to set the release LSN earlier, before a log file change.
*/
diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c
index 30c3cd41016..a4583462300 100644
--- a/src/third_party/wiredtiger/src/log/log_slot.c
+++ b/src/third_party/wiredtiger/src/log/log_slot.c
@@ -103,7 +103,7 @@ __log_slot_close(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *releasep, boo
*releasep = false;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT));
conn = S2C(session);
log = conn->log;
if (slot == NULL)
@@ -235,7 +235,7 @@ __log_slot_new(WT_SESSION_IMPL *session)
int count;
#endif
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT));
conn = S2C(session);
log = conn->log;
#ifdef HAVE_DIAGNOSTIC
@@ -318,7 +318,7 @@ __log_slot_switch_internal(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool for
release = false;
slot = myslot->slot;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT));
/*
* If someone else raced us to closing this specific slot, we're done here.
@@ -526,7 +526,7 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT
log = conn->log;
time_start = time_stop = 0;
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT));
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SLOT));
WT_ASSERT(session, mysize != 0);
/*
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
index 0d5f1eae331..4dfcaa65902 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
@@ -208,7 +208,7 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session)
* files. Use read-uncommitted isolation to avoid keeping updates in cache unnecessarily.
*/
for (i = 0; i < WT_LSM_MAX_WORKERS; i++) {
- WT_ERR(__wt_open_internal_session(conn, "lsm-worker", false, 0, &worker_session));
+ WT_ERR(__wt_open_internal_session(conn, "lsm-worker", false, 0, 0, &worker_session));
worker_session->isolation = WT_ISO_READ_UNCOMMITTED;
manager->lsm_worker_cookies[i].session = worker_session;
}
@@ -356,7 +356,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
if (TAILQ_EMPTY(&conn->lsmqh))
continue;
__wt_readlock(session, &conn->dhandle_lock);
- F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
+ FLD_SET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ);
dhandle_locked = true;
TAILQ_FOREACH (lsm_tree, &conn->lsmqh, q) {
if (!lsm_tree->active)
@@ -403,14 +403,14 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
}
}
__wt_readunlock(session, &conn->dhandle_lock);
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ);
dhandle_locked = false;
}
err:
if (dhandle_locked) {
__wt_readunlock(session, &conn->dhandle_lock);
- F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ);
+ FLD_CLR(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_READ);
}
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
index 39264213ecc..be9e9dae345 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
@@ -73,7 +73,8 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
/* We may be destroying an lsm_tree before it was added. */
if (F_ISSET(lsm_tree, WT_LSM_TREE_OPEN)) {
- WT_ASSERT(session, final || F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
+ WT_ASSERT(
+ session, final || FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
TAILQ_REMOVE(&S2C(session)->lsmqh, lsm_tree, q);
}
@@ -282,7 +283,7 @@ __lsm_tree_cleanup_old(WT_SESSION_IMPL *session, const char *uri)
int
__wt_lsm_tree_setup_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
{
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA));
__wt_epoch(session, &chunk->create_time);
WT_RET(__wt_spin_init(session, &chunk->timestamp_spinlock, "LSM chunk timestamp"));
@@ -376,7 +377,7 @@ __lsm_tree_find(WT_SESSION_IMPL *session, const char *uri, bool exclusive, WT_LS
WT_LSM_TREE *lsm_tree;
*treep = NULL;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST));
/* See if the tree is already open. */
TAILQ_FOREACH (lsm_tree, &S2C(session)->lsmqh, q)
@@ -461,7 +462,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session, const char *uri, bool exclusive, WT_LS
conn = S2C(session);
lsm_tree = NULL;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
/* Start the LSM manager thread if it isn't running. */
WT_RET(__wt_lsm_manager_start(session));
@@ -1006,7 +1007,8 @@ __wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
* Diagnostic: avoid deadlocks with the schema lock: if we need it for an operation, we should
* already have it.
*/
- F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
+ F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
+ FLD_SET(session->lock_flags, WT_SESSION_NO_SCHEMA_LOCK);
}
/*
@@ -1016,7 +1018,8 @@ __wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
void
__wt_lsm_tree_readunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
+ F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);
+ FLD_CLR(session->lock_flags, WT_SESSION_NO_SCHEMA_LOCK);
__wt_readunlock(session, &lsm_tree->rwlock);
}
@@ -1034,7 +1037,8 @@ __wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
* Diagnostic: avoid deadlocks with the schema lock: if we need it for an operation, we should
* already have it.
*/
- F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
+ F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
+ FLD_SET(session->lock_flags, WT_SESSION_NO_SCHEMA_LOCK);
}
/*
@@ -1044,7 +1048,8 @@ __wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
void
__wt_lsm_tree_writeunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
+ F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);
+ FLD_CLR(session->lock_flags, WT_SESSION_NO_SCHEMA_LOCK);
__wt_writeunlock(session, &lsm_tree->rwlock);
}
diff --git a/src/third_party/wiredtiger/src/meta/meta_apply.c b/src/third_party/wiredtiger/src/meta/meta_apply.c
index 66663f9b4f0..a729ce1d69b 100644
--- a/src/third_party/wiredtiger/src/meta/meta_apply.c
+++ b/src/third_party/wiredtiger/src/meta/meta_apply.c
@@ -73,7 +73,7 @@ __wt_meta_apply_all(WT_SESSION_IMPL *session, int (*file_func)(WT_SESSION_IMPL *
WT_CURSOR *cursor;
WT_DECL_RET;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA));
WT_RET(__wt_metadata_cursor(session, &cursor));
WT_SAVE_DHANDLE(session, ret = __meta_btree_apply(session, cursor, file_func, name_func, cfg));
WT_TRET(__wt_metadata_cursor_release(session, &cursor));
diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c
index 95125961485..0739175c3a1 100644
--- a/src/third_party/wiredtiger/src/meta/meta_track.c
+++ b/src/third_party/wiredtiger/src/meta/meta_track.c
@@ -276,14 +276,14 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
WT_WITH_DHANDLE(session, WT_SESSION_META_DHANDLE(session),
ret = __wt_txn_checkpoint_log(session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
else {
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA));
ckpt_session = S2C(session)->meta_ckpt_session;
/*
* If this operation is part of a running transaction, that should be included in the
* checkpoint.
*/
ckpt_session->txn->id = session->txn->id;
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_METADATA));
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_METADATA));
WT_WITH_DHANDLE(ckpt_session, WT_SESSION_META_DHANDLE(session),
WT_WITH_METADATA_LOCK(ckpt_session, ret = __wt_checkpoint(ckpt_session, NULL)));
ckpt_session->txn->id = WT_TXN_NONE;
@@ -515,7 +515,7 @@ __wt_meta_track_init(WT_SESSION_IMPL *session)
conn = S2C(session);
if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) {
WT_RET(__wt_open_internal_session(
- conn, "metadata-ckpt", false, WT_SESSION_NO_DATA_HANDLES, &conn->meta_ckpt_session));
+ conn, "metadata-ckpt", false, WT_SESSION_NO_DATA_HANDLES, 0, &conn->meta_ckpt_session));
/*
* Set session transaction isolation to read-committed isolation, we rely on that for the
diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c
index a3eeae7c295..6c0b432a067 100644
--- a/src/third_party/wiredtiger/src/meta/meta_turtle.c
+++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c
@@ -129,7 +129,9 @@ __metadata_load_bulk(WT_SESSION_IMPL *session)
WT_ERR(cursor->get_value(cursor, &value));
filecfg[1] = value;
WT_ERR(__wt_direct_io_size_check(session, filecfg, "allocation_size", &allocsize));
- WT_ERR(__wt_block_manager_create(session, key, allocsize));
+ WT_WITH_BUCKET_STORAGE(
+ NULL, session, ret = __wt_block_manager_create(session, key, allocsize));
+ WT_ERR(ret);
}
WT_ERR_NOTFOUND_OK(ret, false);
@@ -339,7 +341,7 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep)
*valuep = NULL;
/* Require single-threading. */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TURTLE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TURTLE));
/*
* Open the turtle file; there's one case where we won't find the turtle file, yet still
@@ -404,7 +406,7 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value)
conn = S2C(session);
/* Require single-threading. */
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TURTLE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TURTLE));
/*
* Create the turtle setup file: we currently re-write it from scratch every time.
diff --git a/src/third_party/wiredtiger/src/os_common/os_fhandle.c b/src/third_party/wiredtiger/src/os_common/os_fhandle.c
index 18024f50ee3..f39fbd599e7 100644
--- a/src/third_party/wiredtiger/src/os_common/os_fhandle.c
+++ b/src/third_party/wiredtiger/src/os_common/os_fhandle.c
@@ -215,7 +215,7 @@ __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_
*fhp = NULL;
conn = S2C(session);
- file_system = conn->file_system;
+ file_system = __wt_fs_file_system(session);
fh = NULL;
open_called = false;
path = NULL;
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c b/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c
index 93c748e54bc..2e6c6f832f6 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c
@@ -156,7 +156,7 @@ __wt_rec_dictionary_lookup(
*dpp = NULL;
/* Search the dictionary, and return any match we find. */
- hash = __wt_hash_fnv64(val->buf.data, val->buf.size);
+ hash = __wt_hash_city64(val->buf.data, val->buf.size);
for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash);
dp != NULL && dp->hash == hash; dp = dp->next[0]) {
WT_RET(
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
index 0848660c455..d1e4d909b50 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -216,8 +216,10 @@ __rec_need_save_upd(
if (F_ISSET(r, WT_REC_CHECKPOINT) && upd_select->upd == NULL)
return (false);
- return (!__wt_txn_tw_stop_visible_all(session, &upd_select->tw) &&
- !__wt_txn_tw_start_visible_all(session, &upd_select->tw));
+ if (WT_TIME_WINDOW_HAS_STOP(&upd_select->tw))
+ return (!__wt_txn_tw_stop_visible_all(session, &upd_select->tw));
+ else
+ return (!__wt_txn_tw_start_visible_all(session, &upd_select->tw));
}
/*
diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c
index de019758a1b..bc76391db43 100644
--- a/src/third_party/wiredtiger/src/schema/schema_create.c
+++ b/src/third_party/wiredtiger/src/schema/schema_create.c
@@ -97,6 +97,29 @@ err:
}
/*
+ * __create_file_block_manager --
+ * Create a new file in the block manager, and track it.
+ */
+static int
+__create_file_block_manager(
+ WT_SESSION_IMPL *session, const char *uri, const char *filename, uint32_t allocsize)
+{
+ WT_RET(__wt_block_manager_create(session, filename, allocsize));
+
+ /*
+ * Track the creation of this file.
+ *
+ * If something down the line fails, we're going to need to roll this back. Specifically do NOT
+ * track the op in the import case since we do not want to wipe a data file just because we fail
+ * to import it.
+ */
+ if (WT_META_TRACKING(session))
+ WT_RET(__wt_meta_track_fileop(session, NULL, uri));
+
+ return (0);
+}
+
+/*
* __create_file --
* Create a new 'file:' object.
*/
@@ -189,29 +212,19 @@ __create_file(
uri);
}
}
- } else {
+ } else
/* Create the file. */
- WT_ERR(__wt_block_manager_create(session, filename, allocsize));
-
- /*
- * Track the creation of this file.
- *
- * If something down the line fails, we're going to need to roll this back. Specifically do
- * NOT track the op in the import case since we do not want to wipe a data file just because
- * we fail to import it.
- */
- if (WT_META_TRACKING(session))
- WT_ERR(__wt_meta_track_fileop(session, NULL, uri));
- }
+ WT_ERR(__create_file_block_manager(session, uri, filename, allocsize));
/*
- * If creating an ordinary file, append the file ID and current version numbers to the passed-in
- * configuration and insert the resulting configuration into the metadata.
+ * If creating an ordinary file, update the file ID and current version numbers and strip the
+ * incremental backup information and checkpoint LSN from the extracted metadata.
*/
if (!is_metadata) {
if (!import_repair) {
WT_ERR(__wt_scr_alloc(session, 0, &val));
- WT_ERR(__wt_buf_fmt(session, val, "id=%" PRIu32 ",version=(major=%d,minor=%d)",
+ WT_ERR(__wt_buf_fmt(session, val,
+ "id=%" PRIu32 ",version=(major=%d,minor=%d),checkpoint_backup_info=,checkpoint_lsn=",
++S2C(session)->next_file_id, WT_BTREE_MAJOR_VERSION_MAX,
WT_BTREE_MINOR_VERSION_MAX));
for (p = filecfg; *p != NULL; ++p)
@@ -272,10 +285,20 @@ __wt_schema_colgroup_source(
prefix = cval.str;
len = cval.len;
suffix = "";
- } else {
+ } else if ((S2C(session)->bstorage == NULL) ||
+ ((ret = __wt_config_getones(session, config, "tiered_storage.name", &cval)) == 0 &&
+ cval.len != 0 && WT_STRING_MATCH("none", cval.str, cval.len))) {
+ /*
+ * If we're using tiered storage, the default is not file unless the user explicitly turns
+ * off using tiered storage for this create. Otherwise the default prefix is tiered.
+ */
prefix = "file";
len = strlen(prefix);
suffix = ".wt";
+ } else {
+ prefix = "tiered";
+ len = strlen(prefix);
+ suffix = "";
}
WT_RET_NOTFOUND_OK(ret);
@@ -676,7 +699,7 @@ __create_table(
cgname = filename = NULL;
table = NULL;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE));
tablename = uri;
WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
@@ -768,24 +791,51 @@ err:
}
/*
+ * __create_object --
+ * Create a tiered object for the given name.
+ */
+static int
+__create_object(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config)
+{
+ WT_UNUSED(exclusive);
+ WT_RET(__wt_metadata_insert(session, uri, config));
+ return (0);
+}
+
+/*
+ * __wt_tiered_tree_create --
+ * Create a tiered tree structure for the given name.
+ */
+int
+__wt_tiered_tree_create(
+ WT_SESSION_IMPL *session, const char *uri, bool exclusive, bool import, const char *config)
+{
+ WT_UNUSED(exclusive);
+ WT_UNUSED(import);
+ WT_RET(__wt_metadata_insert(session, uri, config));
+ return (0);
+}
+
+/*
* __create_tiered --
* Create a tiered tree structure for the given name.
*/
static int
__create_tiered(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config)
{
- WT_CONFIG cparser;
- WT_CONFIG_ITEM ckey, cval, tierconf;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(tmp);
WT_DECL_RET;
- int ntiers;
+ WT_TIERED *tiered;
char *meta_value;
- const char *cfg[] = {WT_CONFIG_BASE(session, tiered_meta), config, NULL};
+ const char *cfg[5] = {WT_CONFIG_BASE(session, tiered_meta), NULL, NULL, NULL, NULL};
const char *metadata;
+ conn = S2C(session);
metadata = NULL;
- ntiers = 0;
+ tiered = NULL;
- /* If it can be opened, it already exists. */
+ /* Check if the tiered table already exists. */
if ((ret = __wt_metadata_search(session, uri, &meta_value)) != WT_NOTFOUND) {
if (exclusive)
WT_TRET(EEXIST);
@@ -793,23 +843,34 @@ __create_tiered(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const
}
WT_RET_NOTFOUND_OK(ret);
- /* A tiered cursor must specify at least one underlying table */
- WT_RET(__wt_config_gets(session, cfg, "tiered.tiers", &tierconf));
- __wt_config_subinit(session, &cparser, &tierconf);
-
- while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
- ++ntiers;
- WT_RET_NOTFOUND_OK(ret);
-
- if (ntiers == 0)
- WT_RET_MSG(session, EINVAL, "tiered table must specify at least one tier");
-
- if (!F_ISSET(S2C(session), WT_CONN_READONLY)) {
+ /*
+ * We're creating a tiered table. Set the initial tiers list to empty. Opening the table will
+ * cause us to create our first file or tiered object.
+ */
+ if (!F_ISSET(conn, WT_CONN_READONLY)) {
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ /*
+ * By default use the connection level bucket and prefix. Then we add in any user
+ * configuration that may override the system one.
+ */
+ WT_ERR(__wt_buf_fmt(session, tmp, ",tiered_storage=(bucket=%s,bucket_prefix=%s)",
+ conn->bstorage->bucket, conn->bstorage->bucket_prefix));
+ cfg[1] = tmp->data;
+ cfg[2] = config;
+ cfg[3] = "tiers=()";
WT_ERR(__wt_config_merge(session, cfg, NULL, &metadata));
WT_ERR(__wt_metadata_insert(session, uri, metadata));
}
+ WT_ERR(__wt_schema_get_tiered_uri(session, uri, WT_DHANDLE_EXCLUSIVE, &tiered));
+ if (WT_META_TRACKING(session)) {
+ WT_WITH_DHANDLE(session, &tiered->iface, ret = __wt_meta_track_handle_lock(session, true));
+ WT_ERR(ret);
+ tiered = NULL;
+ }
err:
+ WT_TRET(__wt_schema_release_tiered(session, &tiered));
+ __wt_scr_free(session, &tmp);
__wt_free(session, meta_value);
__wt_free(session, metadata);
return (ret);
@@ -880,8 +941,12 @@ __schema_create(WT_SESSION_IMPL *session, const char *uri, const char *config)
ret = __wt_lsm_tree_create(session, uri, exclusive, config);
else if (WT_PREFIX_MATCH(uri, "index:"))
ret = __create_index(session, uri, exclusive, config);
+ else if (WT_PREFIX_MATCH(uri, "object:"))
+ ret = __create_object(session, uri, exclusive, config);
else if (WT_PREFIX_MATCH(uri, "table:"))
ret = __create_table(session, uri, exclusive, import, config);
+ else if (WT_PREFIX_MATCH(uri, "tier:"))
+ ret = __wt_tiered_tree_create(session, uri, exclusive, import, config);
else if (WT_PREFIX_MATCH(uri, "tiered:"))
ret = __create_tiered(session, uri, exclusive, config);
else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL)
diff --git a/src/third_party/wiredtiger/src/schema/schema_drop.c b/src/third_party/wiredtiger/src/schema/schema_drop.c
index d5f4264ebc8..8c6226b2dec 100644
--- a/src/third_party/wiredtiger/src/schema/schema_drop.c
+++ b/src/third_party/wiredtiger/src/schema/schema_drop.c
@@ -56,7 +56,7 @@ __drop_colgroup(WT_SESSION_IMPL *session, const char *uri, bool force, const cha
WT_DECL_RET;
WT_TABLE *table;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE));
/* If we can get the colgroup, detach it from the table. */
if ((ret = __wt_schema_get_colgroup(session, uri, force, &table, &colgroup)) == 0) {
@@ -102,7 +102,7 @@ __drop_table(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
const char *name;
bool tracked;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE));
name = uri;
WT_PREFIX_SKIP_REQUIRED(session, name, "table:");
@@ -186,9 +186,10 @@ __drop_tiered(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
tiered = (WT_TIERED *)session->dhandle;
/* Drop the tiers. */
- for (i = 0; i < tiered->ntiers; i++) {
- tier = tiered->tiers[i];
- WT_ERR(__wt_schema_drop(session, tier->name, cfg));
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
+ tier = tiered->tiers[i].tier;
+ if (tier != NULL)
+ WT_ERR(__wt_schema_drop(session, tier->name, cfg));
}
ret = __wt_metadata_remove(session, uri);
diff --git a/src/third_party/wiredtiger/src/schema/schema_list.c b/src/third_party/wiredtiger/src/schema/schema_list.c
index 341b5e2a65d..3d901b99425 100644
--- a/src/third_party/wiredtiger/src/schema/schema_list.c
+++ b/src/third_party/wiredtiger/src/schema/schema_list.c
@@ -9,6 +9,58 @@
#include "wt_internal.h"
/*
+ * __schema_get_tiered_uri --
+ * Get the tiered handle for the named table. This function overwrites the dhandle.
+ */
+static int
+__schema_get_tiered_uri(
+ WT_SESSION_IMPL *session, const char *uri, uint32_t flags, WT_TIERED **tieredp)
+{
+ WT_DECL_RET;
+ WT_TIERED *tiered;
+
+ *tieredp = NULL;
+
+ WT_ERR(__wt_session_get_dhandle(session, uri, NULL, NULL, flags));
+ tiered = (WT_TIERED *)session->dhandle;
+ *tieredp = tiered;
+err:
+ return (ret);
+}
+/*
+ * __wt_schema_get_tiered_uri --
+ * Get the tiered handle for the named table.
+ */
+int
+__wt_schema_get_tiered_uri(
+ WT_SESSION_IMPL *session, const char *uri, uint32_t flags, WT_TIERED **tieredp)
+{
+ WT_DECL_RET;
+
+ WT_SAVE_DHANDLE(session, ret = __schema_get_tiered_uri(session, uri, flags, tieredp));
+ return (ret);
+}
+
+/*
+ * __wt_schema_release_tiered --
+ * Release a tiered handle.
+ */
+int
+__wt_schema_release_tiered(WT_SESSION_IMPL *session, WT_TIERED **tieredp)
+{
+ WT_DECL_RET;
+ WT_TIERED *tiered;
+
+ if ((tiered = *tieredp) == NULL)
+ return (0);
+ *tieredp = NULL;
+
+ WT_WITH_DHANDLE(session, &tiered->iface, ret = __wt_session_release_dhandle(session));
+
+ return (ret);
+}
+
+/*
* __wt_schema_get_table_uri --
* Get the table handle for the named table.
*/
@@ -165,7 +217,8 @@ __wt_schema_close_table(WT_SESSION_IMPL *session, WT_TABLE *table)
table->idx_alloc = 0;
WT_ASSERT(session,
- F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE) || F_ISSET(S2C(session), WT_CONN_CLOSING));
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE) ||
+ F_ISSET(S2C(session), WT_CONN_CLOSING));
table->cg_complete = table->idx_complete = false;
return (ret);
diff --git a/src/third_party/wiredtiger/src/schema/schema_open.c b/src/third_party/wiredtiger/src/schema/schema_open.c
index 8202607fb74..d75d1b2f121 100644
--- a/src/third_party/wiredtiger/src/schema/schema_open.c
+++ b/src/third_party/wiredtiger/src/schema/schema_open.c
@@ -42,7 +42,7 @@ __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table)
u_int i;
char *cgconfig;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE));
if (table->cg_complete)
return (0);
@@ -406,7 +406,7 @@ __schema_open_table(WT_SESSION_IMPL *session)
table_cfg = table->iface.cfg;
tablename = table->iface.name;
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE));
WT_RET(__wt_config_gets(session, table_cfg, "columns", &cval));
WT_RET(__wt_config_gets(session, table_cfg, "key_format", &cval));
diff --git a/src/third_party/wiredtiger/src/schema/schema_truncate.c b/src/third_party/wiredtiger/src/schema/schema_truncate.c
index 35e13389373..6dbeb264d80 100644
--- a/src/third_party/wiredtiger/src/schema/schema_truncate.c
+++ b/src/third_party/wiredtiger/src/schema/schema_truncate.c
@@ -52,9 +52,12 @@ __truncate_tiered(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
WT_STAT_DATA_INCR(session, cursor_truncate);
- /* Truncate the column groups. */
- for (i = 0; i < tiered->ntiers; i++)
- WT_ERR(__wt_schema_truncate(session, tiered->tiers[i]->name, cfg));
+ /* Truncate the tiered entries. */
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
+ if (tiered->tiers[i].tier == NULL)
+ continue;
+ WT_ERR(__wt_schema_truncate(session, tiered->tiers[i].name, cfg));
+ }
err:
WT_TRET(__wt_session_release_dhandle(session));
diff --git a/src/third_party/wiredtiger/src/schema/schema_util.c b/src/third_party/wiredtiger/src/schema/schema_util.c
index 1687d51eac4..eb342f32a5a 100644
--- a/src/third_party/wiredtiger/src/schema/schema_util.c
+++ b/src/third_party/wiredtiger/src/schema/schema_util.c
@@ -87,8 +87,8 @@ __wt_schema_internal_session(WT_SESSION_IMPL *session, WT_SESSION_IMPL **int_ses
if (F_ISSET(session->txn, WT_TXN_RUNNING)) {
/* We should not have a schema txn running now. */
WT_ASSERT(session, !F_ISSET(session, WT_SESSION_SCHEMA_TXN));
- WT_RET(
- __wt_open_internal_session(S2C(session), "schema", true, session->flags, int_sessionp));
+ WT_RET(__wt_open_internal_session(
+ S2C(session), "schema", true, session->flags, session->lock_flags, int_sessionp));
}
return (0);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_worker.c b/src/third_party/wiredtiger/src/schema/schema_worker.c
index 12c1d453742..c8c8ecd7558 100644
--- a/src/third_party/wiredtiger/src/schema/schema_worker.c
+++ b/src/third_party/wiredtiger/src/schema/schema_worker.c
@@ -59,8 +59,10 @@ __wt_schema_tiered_worker(WT_SESSION_IMPL *session, const char *uri,
WT_RET(__wt_session_get_dhandle(session, uri, NULL, NULL, open_flags));
tiered = (WT_TIERED *)session->dhandle;
- for (i = 0; i < tiered->ntiers; i++) {
- dhandle = tiered->tiers[i];
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
+ dhandle = tiered->tiers[i].tier;
+ if (dhandle == NULL)
+ continue;
WT_SAVE_DHANDLE(session,
ret = __wt_schema_worker(session, dhandle->name, file_func, name_func, cfg, open_flags));
WT_ERR(ret);
@@ -142,7 +144,7 @@ __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri,
* checkpoints, do not. Opening indexes requires the handle write lock, so check whether
* that lock is held when deciding what to do.
*/
- if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE))
+ if (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_TABLE_WRITE))
WT_ERR(__wt_schema_open_indices(session, table));
for (i = 0; i < table->nindices; i++) {
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index cb4e12df887..338df444cd2 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -429,7 +429,7 @@ err:
*/
static int
__session_open_cursor_int(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
- WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp)
+ WT_CURSOR *other, const char *cfg[], uint64_t hash_value, WT_CURSOR **cursorp)
{
WT_COLGROUP *colgroup;
WT_DATA_SOURCE *dsrc;
@@ -528,6 +528,9 @@ __session_open_cursor_int(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *
*cursorp = NULL;
}
+ if (*cursorp != NULL)
+ (*cursorp)->uri_hash = hash_value;
+
return (ret);
}
@@ -540,18 +543,22 @@ __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, co
WT_CURSOR **cursorp)
{
WT_DECL_RET;
+ uint64_t hash_value;
+
+ hash_value = 0;
/* We should not open other cursors when there are open history store cursors in the session. */
WT_ASSERT(session, strcmp(uri, WT_HS_URI) == 0 || session->hs_cursor_counter == 0);
/* We do not cache any subordinate tables/files cursors. */
if (owner == NULL) {
- if ((ret = __wt_cursor_cache_get(session, uri, NULL, cfg, cursorp)) == 0)
+ __wt_cursor_get_hash(session, uri, NULL, &hash_value);
+ if ((ret = __wt_cursor_cache_get(session, uri, hash_value, NULL, cfg, cursorp)) == 0)
return (0);
WT_RET_NOTFOUND_OK(ret);
}
- return (__session_open_cursor_int(session, uri, owner, NULL, cfg, cursorp));
+ return (__session_open_cursor_int(session, uri, owner, NULL, cfg, hash_value, cursorp));
}
/*
@@ -565,10 +572,11 @@ __session_open_cursor(WT_SESSION *wt_session, const char *uri, WT_CURSOR *to_dup
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ uint64_t hash_value;
bool dup_backup, statjoin;
cursor = *cursorp = NULL;
-
+ hash_value = 0;
dup_backup = false;
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL(session, open_cursor, config, cfg);
@@ -579,7 +587,8 @@ __session_open_cursor(WT_SESSION *wt_session, const char *uri, WT_CURSOR *to_dup
WT_ERR_MSG(session, EINVAL,
"should be passed either a URI or a cursor to duplicate, but not both");
- if ((ret = __wt_cursor_cache_get(session, uri, to_dup, cfg, &cursor)) == 0)
+ __wt_cursor_get_hash(session, uri, to_dup, &hash_value);
+ if ((ret = __wt_cursor_cache_get(session, uri, hash_value, to_dup, cfg, &cursor)) == 0)
goto done;
/*
@@ -600,8 +609,11 @@ __session_open_cursor(WT_SESSION *wt_session, const char *uri, WT_CURSOR *to_dup
}
}
+ if (config != NULL && (WT_PREFIX_MATCH(uri, "backup:") || to_dup != NULL))
+ __wt_verbose(session, WT_VERB_BACKUP, "Backup cursor config \"%s\"", config);
+
WT_ERR(__session_open_cursor_int(
- session, uri, NULL, statjoin || dup_backup ? to_dup : NULL, cfg, &cursor));
+ session, uri, NULL, statjoin || dup_backup ? to_dup : NULL, cfg, hash_value, &cursor));
done:
if (to_dup != NULL && !statjoin && !dup_backup)
@@ -2142,7 +2154,7 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, con
* caller decline this work.
*/
if (open_metadata) {
- WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ WT_ASSERT(session, !FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA));
if ((ret = __wt_metadata_cursor(session, NULL)) != 0) {
WT_TRET(__wt_session_close_internal(session));
return (ret);
@@ -2159,7 +2171,7 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, con
*/
int
__wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata,
- uint32_t session_flags, WT_SESSION_IMPL **sessionp)
+ uint32_t session_flags, uint32_t session_lock_flags, WT_SESSION_IMPL **sessionp)
{
WT_SESSION_IMPL *session;
@@ -2175,6 +2187,7 @@ __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open
* during close. Set a flag to avoid this: internal sessions are not closed automatically.
*/
F_SET(session, session_flags | WT_SESSION_INTERNAL);
+ FLD_SET(session->lock_flags, session_lock_flags);
*sessionp = session;
return (0);
diff --git a/src/third_party/wiredtiger/src/session/session_dhandle.c b/src/third_party/wiredtiger/src/session/session_dhandle.c
index 502610d434b..98e58f56aea 100644
--- a/src/third_party/wiredtiger/src/session/session_dhandle.c
+++ b/src/third_party/wiredtiger/src/session/session_dhandle.c
@@ -494,7 +494,7 @@ __wt_session_get_dhandle(WT_SESSION_IMPL *session, const char *uri, const char *
* handles in the meantime. A combination of the schema and handle list locks are used to
* enforce this.
*/
- if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
+ if (!FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA)) {
dhandle->excl_session = NULL;
dhandle->excl_ref = 0;
F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
diff --git a/src/third_party/wiredtiger/src/support/modify.c b/src/third_party/wiredtiger/src/support/modify.c
index 8e5d955f94d..390e7d2acd7 100644
--- a/src/third_party/wiredtiger/src/support/modify.c
+++ b/src/third_party/wiredtiger/src/support/modify.c
@@ -437,108 +437,6 @@ err:
}
/*
- * __wt_modify_vector_init --
- * Initialize a modify vector.
- */
-void
-__wt_modify_vector_init(WT_SESSION_IMPL *session, WT_MODIFY_VECTOR *modifies)
-{
- WT_CLEAR(*modifies);
- modifies->session = session;
- modifies->listp = modifies->list;
-}
-
-/*
- * __wt_modify_vector_push --
- * Push a modify update pointer to a modify vector. If we exceed the allowed stack space in the
- * vector, we'll be doing malloc here.
- */
-int
-__wt_modify_vector_push(WT_MODIFY_VECTOR *modifies, WT_UPDATE *upd)
-{
- WT_DECL_RET;
- bool migrate_from_stack;
-
- migrate_from_stack = false;
-
- if (modifies->size >= WT_MODIFY_VECTOR_STACK_SIZE) {
- if (modifies->allocated_bytes == 0 && modifies->size == WT_MODIFY_VECTOR_STACK_SIZE) {
- migrate_from_stack = true;
- modifies->listp = NULL;
- }
- WT_ERR(__wt_realloc_def(
- modifies->session, &modifies->allocated_bytes, modifies->size + 1, &modifies->listp));
- if (migrate_from_stack)
- memcpy(modifies->listp, modifies->list, sizeof(modifies->list));
- }
- modifies->listp[modifies->size++] = upd;
- return (0);
-
-err:
- /*
- * This only happens when we're migrating from the stack to the heap but failed to allocate. In
- * that case, point back to the stack allocated memory and set the allocation to zero to
- * indicate that we don't have heap memory to free.
- *
- * If we're already on the heap, we have nothing to do. The realloc call above won't touch the
- * list pointer unless allocation is successful and we won't have incremented the size yet.
- */
- if (modifies->listp == NULL) {
- WT_ASSERT(modifies->session, modifies->size == WT_MODIFY_VECTOR_STACK_SIZE);
- modifies->listp = modifies->list;
- modifies->allocated_bytes = 0;
- }
- return (ret);
-}
-
-/*
- * __wt_modify_vector_pop --
- * Pop an update pointer off a modify vector.
- */
-void
-__wt_modify_vector_pop(WT_MODIFY_VECTOR *modifies, WT_UPDATE **updp)
-{
- WT_ASSERT(modifies->session, modifies->size > 0);
-
- *updp = modifies->listp[--modifies->size];
-}
-
-/*
- * __wt_modify_vector_peek --
- * Peek an update pointer off a modify vector.
- */
-void
-__wt_modify_vector_peek(WT_MODIFY_VECTOR *modifies, WT_UPDATE **updp)
-{
- WT_ASSERT(modifies->session, modifies->size > 0);
-
- *updp = modifies->listp[modifies->size - 1];
-}
-
-/*
- * __wt_modify_vector_clear --
- * Clear a modify vector.
- */
-void
-__wt_modify_vector_clear(WT_MODIFY_VECTOR *modifies)
-{
- modifies->size = 0;
-}
-
-/*
- * __wt_modify_vector_free --
- * Free any resources associated with a modify vector. If we exceeded the allowed stack space on
- * the vector and had to fallback to dynamic allocations, we'll be doing a free here.
- */
-void
-__wt_modify_vector_free(WT_MODIFY_VECTOR *modifies)
-{
- if (modifies->allocated_bytes != 0)
- __wt_free(modifies->session, modifies->listp);
- __wt_modify_vector_init(modifies->session, modifies);
-}
-
-/*
* __wt_modify_reconstruct_from_upd_list --
* Takes an in-memory modify and populates an update value with the reconstructed full value.
*/
@@ -548,8 +446,8 @@ __wt_modify_reconstruct_from_upd_list(
{
WT_CURSOR *cursor;
WT_DECL_RET;
- WT_MODIFY_VECTOR modifies;
WT_TIME_WINDOW tw;
+ WT_UPDATE_VECTOR modifies;
WT_ASSERT(session, upd->type == WT_UPDATE_MODIFY);
@@ -560,7 +458,7 @@ __wt_modify_reconstruct_from_upd_list(
upd_value->tw.start_txn = upd->txnid;
/* Construct full update */
- __wt_modify_vector_init(session, &modifies);
+ __wt_update_vector_init(session, &modifies);
/* Find a complete update. */
for (; upd != NULL; upd = upd->next) {
if (upd->txnid == WT_TXN_ABORTED)
@@ -570,7 +468,7 @@ __wt_modify_reconstruct_from_upd_list(
break;
if (upd->type == WT_UPDATE_MODIFY)
- WT_ERR(__wt_modify_vector_push(&modifies, upd));
+ WT_ERR(__wt_update_vector_push(&modifies, upd));
}
/*
* If there's no full update, the base item is the on-page item. If the update is a tombstone,
@@ -599,11 +497,11 @@ __wt_modify_reconstruct_from_upd_list(
}
/* Once we have a base item, roll forward through any visible modify updates. */
while (modifies.size > 0) {
- __wt_modify_vector_pop(&modifies, &upd);
+ __wt_update_vector_pop(&modifies, &upd);
WT_ERR(__wt_modify_apply_item(session, cursor->value_format, &upd_value->buf, upd->data));
}
upd_value->type = WT_UPDATE_STANDARD;
err:
- __wt_modify_vector_free(&modifies);
+ __wt_update_vector_free(&modifies);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 499781e181e..d622d44589e 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -118,6 +118,7 @@ static const char *const __stats_dsrc_desc[] = {
"cache: bytes read into cache",
"cache: bytes written from cache",
"cache: checkpoint blocked page eviction",
+ "cache: checkpoint of history store file blocked non-history store page eviction",
"cache: eviction walk target pages histogram - 0-9",
"cache: eviction walk target pages histogram - 10-31",
"cache: eviction walk target pages histogram - 128 and higher",
@@ -136,8 +137,8 @@ static const char *const __stats_dsrc_desc[] = {
"cache: history store table insert calls",
"cache: history store table insert calls that returned restart",
"cache: history store table out-of-order resolved updates that lose their durable timestamp",
- "cache: history store table out-of-order updates that were fixed up by moving existing records",
- "cache: history store table out-of-order updates that were fixed up during insertion",
+ "cache: history store table out-of-order updates that were fixed up by reinserting with the "
+ "fixed timestamp",
"cache: history store table reads",
"cache: history store table reads missed",
"cache: history store table reads requiring squashed modifies",
@@ -146,8 +147,8 @@ static const char *const __stats_dsrc_desc[] = {
"cache: history store table truncation to remove an update",
"cache: history store table truncation to remove range of updates due to key being removed from "
"the data page during reconciliation",
- "cache: history store table truncation to remove range of updates due to non timestamped update "
- "on data page",
+ "cache: history store table truncation to remove range of updates due to out-of-order timestamp "
+ "update on data page",
"cache: history store table writes requiring squashed modifies",
"cache: in-memory page passed criteria to be split",
"cache: in-memory page splits",
@@ -174,6 +175,7 @@ static const char *const __stats_dsrc_desc[] = {
"cursor: Total number of entries skipped by cursor next calls",
"cursor: Total number of entries skipped by cursor prev calls",
"cursor: Total number of entries skipped to position the history store cursor",
+ "cursor: Total number of times a search near has exited due to prefix config",
"cursor: cursor next calls that skip due to a globally visible history store tombstone",
"cursor: cursor next calls that skip greater than or equal to 100 entries",
"cursor: cursor next calls that skip less than 100 entries",
@@ -372,6 +374,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cache_bytes_read = 0;
stats->cache_bytes_write = 0;
stats->cache_eviction_checkpoint = 0;
+ stats->cache_eviction_blocked_checkpoint_hs = 0;
stats->cache_eviction_target_page_lt10 = 0;
stats->cache_eviction_target_page_lt32 = 0;
stats->cache_eviction_target_page_ge128 = 0;
@@ -390,8 +393,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cache_hs_insert = 0;
stats->cache_hs_insert_restart = 0;
stats->cache_hs_order_lose_durable_timestamp = 0;
- stats->cache_hs_order_fixup_move = 0;
- stats->cache_hs_order_fixup_insert = 0;
+ stats->cache_hs_order_reinsert = 0;
stats->cache_hs_read = 0;
stats->cache_hs_read_miss = 0;
stats->cache_hs_read_squash = 0;
@@ -399,7 +401,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cache_hs_key_truncate_rts = 0;
stats->cache_hs_key_truncate = 0;
stats->cache_hs_key_truncate_onpage_removal = 0;
- stats->cache_hs_key_truncate_non_ts = 0;
+ stats->cache_hs_order_remove = 0;
stats->cache_hs_write_squash = 0;
stats->cache_inmem_splittable = 0;
stats->cache_inmem_split = 0;
@@ -426,6 +428,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cursor_next_skip_total = 0;
stats->cursor_prev_skip_total = 0;
stats->cursor_skip_hs_cur_position = 0;
+ stats->cursor_search_near_prefix_fast_paths = 0;
stats->cursor_next_hs_tombstone = 0;
stats->cursor_next_skip_ge_100 = 0;
stats->cursor_next_skip_lt_100 = 0;
@@ -610,6 +613,7 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
to->cache_bytes_read += from->cache_bytes_read;
to->cache_bytes_write += from->cache_bytes_write;
to->cache_eviction_checkpoint += from->cache_eviction_checkpoint;
+ to->cache_eviction_blocked_checkpoint_hs += from->cache_eviction_blocked_checkpoint_hs;
to->cache_eviction_target_page_lt10 += from->cache_eviction_target_page_lt10;
to->cache_eviction_target_page_lt32 += from->cache_eviction_target_page_lt32;
to->cache_eviction_target_page_ge128 += from->cache_eviction_target_page_ge128;
@@ -628,8 +632,7 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
to->cache_hs_insert += from->cache_hs_insert;
to->cache_hs_insert_restart += from->cache_hs_insert_restart;
to->cache_hs_order_lose_durable_timestamp += from->cache_hs_order_lose_durable_timestamp;
- to->cache_hs_order_fixup_move += from->cache_hs_order_fixup_move;
- to->cache_hs_order_fixup_insert += from->cache_hs_order_fixup_insert;
+ to->cache_hs_order_reinsert += from->cache_hs_order_reinsert;
to->cache_hs_read += from->cache_hs_read;
to->cache_hs_read_miss += from->cache_hs_read_miss;
to->cache_hs_read_squash += from->cache_hs_read_squash;
@@ -637,7 +640,7 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
to->cache_hs_key_truncate_rts += from->cache_hs_key_truncate_rts;
to->cache_hs_key_truncate += from->cache_hs_key_truncate;
to->cache_hs_key_truncate_onpage_removal += from->cache_hs_key_truncate_onpage_removal;
- to->cache_hs_key_truncate_non_ts += from->cache_hs_key_truncate_non_ts;
+ to->cache_hs_order_remove += from->cache_hs_order_remove;
to->cache_hs_write_squash += from->cache_hs_write_squash;
to->cache_inmem_splittable += from->cache_inmem_splittable;
to->cache_inmem_split += from->cache_inmem_split;
@@ -664,6 +667,7 @@ __wt_stat_dsrc_aggregate_single(WT_DSRC_STATS *from, WT_DSRC_STATS *to)
to->cursor_next_skip_total += from->cursor_next_skip_total;
to->cursor_prev_skip_total += from->cursor_prev_skip_total;
to->cursor_skip_hs_cur_position += from->cursor_skip_hs_cur_position;
+ to->cursor_search_near_prefix_fast_paths += from->cursor_search_near_prefix_fast_paths;
to->cursor_next_hs_tombstone += from->cursor_next_hs_tombstone;
to->cursor_next_skip_ge_100 += from->cursor_next_skip_ge_100;
to->cursor_next_skip_lt_100 += from->cursor_next_skip_lt_100;
@@ -843,6 +847,8 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
to->cache_eviction_checkpoint += WT_STAT_READ(from, cache_eviction_checkpoint);
+ to->cache_eviction_blocked_checkpoint_hs +=
+ WT_STAT_READ(from, cache_eviction_blocked_checkpoint_hs);
to->cache_eviction_target_page_lt10 += WT_STAT_READ(from, cache_eviction_target_page_lt10);
to->cache_eviction_target_page_lt32 += WT_STAT_READ(from, cache_eviction_target_page_lt32);
to->cache_eviction_target_page_ge128 += WT_STAT_READ(from, cache_eviction_target_page_ge128);
@@ -865,8 +871,7 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
to->cache_hs_insert_restart += WT_STAT_READ(from, cache_hs_insert_restart);
to->cache_hs_order_lose_durable_timestamp +=
WT_STAT_READ(from, cache_hs_order_lose_durable_timestamp);
- to->cache_hs_order_fixup_move += WT_STAT_READ(from, cache_hs_order_fixup_move);
- to->cache_hs_order_fixup_insert += WT_STAT_READ(from, cache_hs_order_fixup_insert);
+ to->cache_hs_order_reinsert += WT_STAT_READ(from, cache_hs_order_reinsert);
to->cache_hs_read += WT_STAT_READ(from, cache_hs_read);
to->cache_hs_read_miss += WT_STAT_READ(from, cache_hs_read_miss);
to->cache_hs_read_squash += WT_STAT_READ(from, cache_hs_read_squash);
@@ -876,7 +881,7 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
to->cache_hs_key_truncate += WT_STAT_READ(from, cache_hs_key_truncate);
to->cache_hs_key_truncate_onpage_removal +=
WT_STAT_READ(from, cache_hs_key_truncate_onpage_removal);
- to->cache_hs_key_truncate_non_ts += WT_STAT_READ(from, cache_hs_key_truncate_non_ts);
+ to->cache_hs_order_remove += WT_STAT_READ(from, cache_hs_order_remove);
to->cache_hs_write_squash += WT_STAT_READ(from, cache_hs_write_squash);
to->cache_inmem_splittable += WT_STAT_READ(from, cache_inmem_splittable);
to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split);
@@ -903,6 +908,8 @@ __wt_stat_dsrc_aggregate(WT_DSRC_STATS **from, WT_DSRC_STATS *to)
to->cursor_next_skip_total += WT_STAT_READ(from, cursor_next_skip_total);
to->cursor_prev_skip_total += WT_STAT_READ(from, cursor_prev_skip_total);
to->cursor_skip_hs_cur_position += WT_STAT_READ(from, cursor_skip_hs_cur_position);
+ to->cursor_search_near_prefix_fast_paths +=
+ WT_STAT_READ(from, cursor_search_near_prefix_fast_paths);
to->cursor_next_hs_tombstone += WT_STAT_READ(from, cursor_next_hs_tombstone);
to->cursor_next_skip_ge_100 += WT_STAT_READ(from, cursor_next_skip_ge_100);
to->cursor_next_skip_lt_100 += WT_STAT_READ(from, cursor_next_skip_lt_100);
@@ -1025,8 +1032,6 @@ static const char *const __stats_connection_desc[] = {
"cache: forced eviction - pages selected count",
"cache: forced eviction - pages selected unable to be evicted count",
"cache: forced eviction - pages selected unable to be evicted time",
- "cache: forced eviction - session returned rollback error while force evicting due to being "
- "oldest",
"cache: hazard pointer check calls",
"cache: hazard pointer check entries walked",
"cache: hazard pointer maximum array length",
@@ -1284,6 +1289,7 @@ static const char *const __stats_connection_desc[] = {
"transaction: rollback to stable pages visited",
"transaction: rollback to stable tree walk skipping pages",
"transaction: rollback to stable updates aborted",
+ "transaction: sessions scanned in each walk of concurrent sessions",
"transaction: set timestamp calls",
"transaction: set timestamp durable calls",
"transaction: set timestamp durable updates",
@@ -1293,6 +1299,7 @@ static const char *const __stats_connection_desc[] = {
"transaction: set timestamp stable updates",
"transaction: transaction begins",
"transaction: transaction checkpoint currently running",
+ "transaction: transaction checkpoint currently running for history store file",
"transaction: transaction checkpoint generation",
"transaction: transaction checkpoint history store file duration (usecs)",
"transaction: transaction checkpoint max time (msecs)",
@@ -1337,6 +1344,7 @@ static const char *const __stats_connection_desc[] = {
"cache: bytes read into cache",
"cache: bytes written from cache",
"cache: checkpoint blocked page eviction",
+ "cache: checkpoint of history store file blocked non-history store page eviction",
"cache: eviction walk target pages histogram - 0-9",
"cache: eviction walk target pages histogram - 10-31",
"cache: eviction walk target pages histogram - 128 and higher",
@@ -1355,8 +1363,8 @@ static const char *const __stats_connection_desc[] = {
"cache: history store table insert calls",
"cache: history store table insert calls that returned restart",
"cache: history store table out-of-order resolved updates that lose their durable timestamp",
- "cache: history store table out-of-order updates that were fixed up by moving existing records",
- "cache: history store table out-of-order updates that were fixed up during insertion",
+ "cache: history store table out-of-order updates that were fixed up by reinserting with the "
+ "fixed timestamp",
"cache: history store table reads",
"cache: history store table reads missed",
"cache: history store table reads requiring squashed modifies",
@@ -1365,8 +1373,8 @@ static const char *const __stats_connection_desc[] = {
"cache: history store table truncation to remove an update",
"cache: history store table truncation to remove range of updates due to key being removed from "
"the data page during reconciliation",
- "cache: history store table truncation to remove range of updates due to non timestamped update "
- "on data page",
+ "cache: history store table truncation to remove range of updates due to out-of-order timestamp "
+ "update on data page",
"cache: history store table writes requiring squashed modifies",
"cache: in-memory page passed criteria to be split",
"cache: in-memory page splits",
@@ -1393,6 +1401,7 @@ static const char *const __stats_connection_desc[] = {
"cursor: Total number of entries skipped by cursor next calls",
"cursor: Total number of entries skipped by cursor prev calls",
"cursor: Total number of entries skipped to position the history store cursor",
+ "cursor: Total number of times a search near has exited due to prefix config",
"cursor: cursor next calls that skip due to a globally visible history store tombstone",
"cursor: cursor next calls that skip greater than or equal to 100 entries",
"cursor: cursor next calls that skip less than 100 entries",
@@ -1544,7 +1553,6 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_eviction_force = 0;
stats->cache_eviction_force_fail = 0;
stats->cache_eviction_force_fail_time = 0;
- stats->cache_eviction_force_rollback = 0;
stats->cache_hazard_checks = 0;
stats->cache_hazard_walks = 0;
stats->cache_hazard_max = 0;
@@ -1800,6 +1808,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->txn_rts_pages_visited = 0;
stats->txn_rts_tree_walk_skip_pages = 0;
stats->txn_rts_upd_aborted = 0;
+ stats->txn_sessions_walked = 0;
stats->txn_set_ts = 0;
stats->txn_set_ts_durable = 0;
stats->txn_set_ts_durable_upd = 0;
@@ -1809,6 +1818,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->txn_set_ts_stable_upd = 0;
stats->txn_begin = 0;
/* not clearing txn_checkpoint_running */
+ /* not clearing txn_checkpoint_running_hs */
/* not clearing txn_checkpoint_generation */
stats->txn_hs_ckpt_duration = 0;
/* not clearing txn_checkpoint_time_max */
@@ -1852,6 +1862,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_bytes_read = 0;
stats->cache_bytes_write = 0;
stats->cache_eviction_checkpoint = 0;
+ stats->cache_eviction_blocked_checkpoint_hs = 0;
stats->cache_eviction_target_page_lt10 = 0;
stats->cache_eviction_target_page_lt32 = 0;
stats->cache_eviction_target_page_ge128 = 0;
@@ -1870,8 +1881,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_hs_insert = 0;
stats->cache_hs_insert_restart = 0;
stats->cache_hs_order_lose_durable_timestamp = 0;
- stats->cache_hs_order_fixup_move = 0;
- stats->cache_hs_order_fixup_insert = 0;
+ stats->cache_hs_order_reinsert = 0;
stats->cache_hs_read = 0;
stats->cache_hs_read_miss = 0;
stats->cache_hs_read_squash = 0;
@@ -1879,7 +1889,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_hs_key_truncate_rts = 0;
stats->cache_hs_key_truncate = 0;
stats->cache_hs_key_truncate_onpage_removal = 0;
- stats->cache_hs_key_truncate_non_ts = 0;
+ stats->cache_hs_order_remove = 0;
stats->cache_hs_write_squash = 0;
stats->cache_inmem_splittable = 0;
stats->cache_inmem_split = 0;
@@ -1906,6 +1916,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cursor_next_skip_total = 0;
stats->cursor_prev_skip_total = 0;
stats->cursor_skip_hs_cur_position = 0;
+ stats->cursor_search_near_prefix_fast_paths = 0;
stats->cursor_next_hs_tombstone = 0;
stats->cursor_next_skip_ge_100 = 0;
stats->cursor_next_skip_lt_100 = 0;
@@ -2036,7 +2047,6 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->cache_eviction_force += WT_STAT_READ(from, cache_eviction_force);
to->cache_eviction_force_fail += WT_STAT_READ(from, cache_eviction_force_fail);
to->cache_eviction_force_fail_time += WT_STAT_READ(from, cache_eviction_force_fail_time);
- to->cache_eviction_force_rollback += WT_STAT_READ(from, cache_eviction_force_rollback);
to->cache_hazard_checks += WT_STAT_READ(from, cache_hazard_checks);
to->cache_hazard_walks += WT_STAT_READ(from, cache_hazard_walks);
if ((v = WT_STAT_READ(from, cache_hazard_max)) > to->cache_hazard_max)
@@ -2309,6 +2319,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->txn_rts_pages_visited += WT_STAT_READ(from, txn_rts_pages_visited);
to->txn_rts_tree_walk_skip_pages += WT_STAT_READ(from, txn_rts_tree_walk_skip_pages);
to->txn_rts_upd_aborted += WT_STAT_READ(from, txn_rts_upd_aborted);
+ to->txn_sessions_walked += WT_STAT_READ(from, txn_sessions_walked);
to->txn_set_ts += WT_STAT_READ(from, txn_set_ts);
to->txn_set_ts_durable += WT_STAT_READ(from, txn_set_ts_durable);
to->txn_set_ts_durable_upd += WT_STAT_READ(from, txn_set_ts_durable_upd);
@@ -2318,6 +2329,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->txn_set_ts_stable_upd += WT_STAT_READ(from, txn_set_ts_stable_upd);
to->txn_begin += WT_STAT_READ(from, txn_begin);
to->txn_checkpoint_running += WT_STAT_READ(from, txn_checkpoint_running);
+ to->txn_checkpoint_running_hs += WT_STAT_READ(from, txn_checkpoint_running_hs);
to->txn_checkpoint_generation += WT_STAT_READ(from, txn_checkpoint_generation);
to->txn_hs_ckpt_duration += WT_STAT_READ(from, txn_hs_ckpt_duration);
to->txn_checkpoint_time_max += WT_STAT_READ(from, txn_checkpoint_time_max);
@@ -2364,6 +2376,8 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
to->cache_eviction_checkpoint += WT_STAT_READ(from, cache_eviction_checkpoint);
+ to->cache_eviction_blocked_checkpoint_hs +=
+ WT_STAT_READ(from, cache_eviction_blocked_checkpoint_hs);
to->cache_eviction_target_page_lt10 += WT_STAT_READ(from, cache_eviction_target_page_lt10);
to->cache_eviction_target_page_lt32 += WT_STAT_READ(from, cache_eviction_target_page_lt32);
to->cache_eviction_target_page_ge128 += WT_STAT_READ(from, cache_eviction_target_page_ge128);
@@ -2386,8 +2400,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->cache_hs_insert_restart += WT_STAT_READ(from, cache_hs_insert_restart);
to->cache_hs_order_lose_durable_timestamp +=
WT_STAT_READ(from, cache_hs_order_lose_durable_timestamp);
- to->cache_hs_order_fixup_move += WT_STAT_READ(from, cache_hs_order_fixup_move);
- to->cache_hs_order_fixup_insert += WT_STAT_READ(from, cache_hs_order_fixup_insert);
+ to->cache_hs_order_reinsert += WT_STAT_READ(from, cache_hs_order_reinsert);
to->cache_hs_read += WT_STAT_READ(from, cache_hs_read);
to->cache_hs_read_miss += WT_STAT_READ(from, cache_hs_read_miss);
to->cache_hs_read_squash += WT_STAT_READ(from, cache_hs_read_squash);
@@ -2397,7 +2410,7 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->cache_hs_key_truncate += WT_STAT_READ(from, cache_hs_key_truncate);
to->cache_hs_key_truncate_onpage_removal +=
WT_STAT_READ(from, cache_hs_key_truncate_onpage_removal);
- to->cache_hs_key_truncate_non_ts += WT_STAT_READ(from, cache_hs_key_truncate_non_ts);
+ to->cache_hs_order_remove += WT_STAT_READ(from, cache_hs_order_remove);
to->cache_hs_write_squash += WT_STAT_READ(from, cache_hs_write_squash);
to->cache_inmem_splittable += WT_STAT_READ(from, cache_inmem_splittable);
to->cache_inmem_split += WT_STAT_READ(from, cache_inmem_split);
@@ -2424,6 +2437,8 @@ __wt_stat_connection_aggregate(WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *
to->cursor_next_skip_total += WT_STAT_READ(from, cursor_next_skip_total);
to->cursor_prev_skip_total += WT_STAT_READ(from, cursor_prev_skip_total);
to->cursor_skip_hs_cur_position += WT_STAT_READ(from, cursor_skip_hs_cur_position);
+ to->cursor_search_near_prefix_fast_paths +=
+ WT_STAT_READ(from, cursor_search_near_prefix_fast_paths);
to->cursor_next_hs_tombstone += WT_STAT_READ(from, cursor_next_hs_tombstone);
to->cursor_next_skip_ge_100 += WT_STAT_READ(from, cursor_next_skip_ge_100);
to->cursor_next_skip_lt_100 += WT_STAT_READ(from, cursor_next_skip_lt_100);
diff --git a/src/third_party/wiredtiger/src/support/thread_group.c b/src/third_party/wiredtiger/src/support/thread_group.c
index 7b70ef63906..6bac6d5450d 100644
--- a/src/third_party/wiredtiger/src/support/thread_group.c
+++ b/src/third_party/wiredtiger/src/support/thread_group.c
@@ -181,7 +181,7 @@ __thread_group_resize(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t
/* Threads get their own session. */
session_flags = LF_ISSET(WT_THREAD_CAN_WAIT) ? WT_SESSION_CAN_WAIT : 0;
WT_ERR(
- __wt_open_internal_session(conn, group->name, false, session_flags, &thread->session));
+ __wt_open_internal_session(conn, group->name, false, session_flags, 0, &thread->session));
if (LF_ISSET(WT_THREAD_PANIC_FAIL))
F_SET(thread, WT_THREAD_PANIC_FAIL);
thread->id = i;
diff --git a/src/third_party/wiredtiger/src/support/update_vector.c b/src/third_party/wiredtiger/src/support/update_vector.c
new file mode 100644
index 00000000000..ef57b28f7f4
--- /dev/null
+++ b/src/third_party/wiredtiger/src/support/update_vector.c
@@ -0,0 +1,111 @@
+/*-
+ * Copyright (c) 2014-present MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_update_vector_init --
+ * Initialize a update vector.
+ */
+void
+__wt_update_vector_init(WT_SESSION_IMPL *session, WT_UPDATE_VECTOR *updates)
+{
+ WT_CLEAR(*updates);
+ updates->session = session;
+ updates->listp = updates->list;
+}
+
+/*
+ * __wt_update_vector_push --
+ * Push a update pointer to a update vector. If we exceed the allowed stack space in the vector,
+ * we'll be doing malloc here.
+ */
+int
+__wt_update_vector_push(WT_UPDATE_VECTOR *updates, WT_UPDATE *upd)
+{
+ WT_DECL_RET;
+ bool migrate_from_stack;
+
+ migrate_from_stack = false;
+
+ if (updates->size >= WT_UPDATE_VECTOR_STACK_SIZE) {
+ if (updates->allocated_bytes == 0 && updates->size == WT_UPDATE_VECTOR_STACK_SIZE) {
+ migrate_from_stack = true;
+ updates->listp = NULL;
+ }
+ WT_ERR(__wt_realloc_def(
+ updates->session, &updates->allocated_bytes, updates->size + 1, &updates->listp));
+ if (migrate_from_stack)
+ memcpy(updates->listp, updates->list, sizeof(updates->list));
+ }
+ updates->listp[updates->size++] = upd;
+ return (0);
+
+err:
+ /*
+ * This only happens when we're migrating from the stack to the heap but failed to allocate. In
+ * that case, point back to the stack allocated memory and set the allocation to zero to
+ * indicate that we don't have heap memory to free.
+ *
+ * If we're already on the heap, we have nothing to do. The realloc call above won't touch the
+ * list pointer unless allocation is successful and we won't have incremented the size yet.
+ */
+ if (updates->listp == NULL) {
+ WT_ASSERT(updates->session, updates->size == WT_UPDATE_VECTOR_STACK_SIZE);
+ updates->listp = updates->list;
+ updates->allocated_bytes = 0;
+ }
+ return (ret);
+}
+
+/*
+ * __wt_update_vector_pop --
+ * Pop an update pointer off a update vector.
+ */
+void
+__wt_update_vector_pop(WT_UPDATE_VECTOR *updates, WT_UPDATE **updp)
+{
+ WT_ASSERT(updates->session, updates->size > 0);
+
+ *updp = updates->listp[--updates->size];
+}
+
+/*
+ * __wt_update_vector_peek --
+ * Peek an update pointer off a update vector.
+ */
+void
+__wt_update_vector_peek(WT_UPDATE_VECTOR *updates, WT_UPDATE **updp)
+{
+ WT_ASSERT(updates->session, updates->size > 0);
+
+ *updp = updates->listp[updates->size - 1];
+}
+
+/*
+ * __wt_update_vector_clear --
+ * Clear a update vector.
+ */
+void
+__wt_update_vector_clear(WT_UPDATE_VECTOR *updates)
+{
+ updates->size = 0;
+}
+
+/*
+ * __wt_update_vector_free --
+ * Free any resources associated with a update vector. If we exceeded the allowed stack space on
+ * the vector and had to fallback to dynamic allocations, we'll be doing a free here.
+ */
+void
+__wt_update_vector_free(WT_UPDATE_VECTOR *updates)
+{
+ if (updates->allocated_bytes != 0)
+ __wt_free(updates->session, updates->listp);
+ __wt_update_vector_init(updates->session, updates);
+}
diff --git a/src/third_party/wiredtiger/src/tiered/tiered_config.c b/src/third_party/wiredtiger/src/tiered/tiered_config.c
new file mode 100644
index 00000000000..23eb24131cc
--- /dev/null
+++ b/src/third_party/wiredtiger/src/tiered/tiered_config.c
@@ -0,0 +1,188 @@
+/*-
+ * Copyright (c) 2014-present MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __tiered_confchk --
+ * Check for a valid tiered storage source.
+ */
+static int
+__tiered_confchk(
+ WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name, WT_NAMED_STORAGE_SOURCE **nstoragep)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_NAMED_STORAGE_SOURCE *nstorage;
+
+ *nstoragep = NULL;
+
+ if (name->len == 0 || WT_STRING_MATCH("none", name->str, name->len))
+ return (0);
+
+ conn = S2C(session);
+ TAILQ_FOREACH (nstorage, &conn->storagesrcqh, q)
+ if (WT_STRING_MATCH(nstorage->name, name->str, name->len)) {
+ *nstoragep = nstorage;
+ return (0);
+ }
+ WT_RET_MSG(session, EINVAL, "unknown storage source '%.*s'", (int)name->len, name->str);
+}
+
+/*
+ * __tiered_common_config --
+ * Parse configuration options common to connection and btrees.
+ */
+static int
+__tiered_common_config(WT_SESSION_IMPL *session, const char **cfg, WT_BUCKET_STORAGE *bstorage)
+{
+ WT_CONFIG_ITEM cval;
+
+ WT_RET(__wt_config_gets(session, cfg, "tiered_storage.local_retention", &cval));
+ bstorage->retain_secs = (uint64_t)cval.val;
+
+ WT_RET(__wt_config_gets(session, cfg, "tiered_storage.object_target_size", &cval));
+ bstorage->object_size = (uint64_t)cval.val;
+
+ return (0);
+}
+
+/*
+ * __wt_tiered_bucket_config --
+ * Given a configuration, (re)configure the bucket storage and return that structure.
+ */
+int
+__wt_tiered_bucket_config(
+ WT_SESSION_IMPL *session, const char *cfg[], WT_BUCKET_STORAGE **bstoragep)
+{
+ WT_BUCKET_STORAGE *bstorage, *new;
+ WT_CONFIG_ITEM auth, bucket, name, prefix;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_NAMED_STORAGE_SOURCE *nstorage;
+ WT_STORAGE_SOURCE *storage;
+ uint64_t hash_bucket, hash;
+
+ *bstoragep = NULL;
+
+ WT_RET(__wt_config_gets(session, cfg, "tiered_storage.name", &name));
+ bstorage = new = NULL;
+ conn = S2C(session);
+
+ __wt_spin_lock(session, &conn->storage_lock);
+
+ WT_ERR(__tiered_confchk(session, &name, &nstorage));
+ if (nstorage == NULL) {
+ WT_ERR(__wt_config_gets(session, cfg, "tiered_storage.bucket", &bucket));
+ if (bucket.len != 0)
+ WT_ERR_MSG(
+ session, EINVAL, "tiered_storage.bucket requires tiered_storage.name to be set");
+ goto done;
+ }
+ /*
+ * Check if tiered storage is set on the connection. If someone wants tiered storage on a table,
+ * it needs to be configured on the database as well.
+ */
+ if (conn->bstorage == NULL && bstoragep != &conn->bstorage)
+ WT_ERR_MSG(
+ session, EINVAL, "table tiered storage requires connection tiered storage to be set");
+ /* A bucket and bucket_prefix are required, auth_token is not. */
+ WT_ERR(__wt_config_gets(session, cfg, "tiered_storage.bucket", &bucket));
+ if (bucket.len == 0)
+ WT_ERR_MSG(session, EINVAL, "table tiered storage requires bucket to be set");
+ WT_ERR(__wt_config_gets(session, cfg, "tiered_storage.bucket_prefix", &prefix));
+ if (prefix.len == 0)
+ WT_ERR_MSG(session, EINVAL, "table tiered storage requires bucket_prefix to be set");
+ WT_ERR(__wt_config_gets(session, cfg, "tiered_storage.auth_token", &auth));
+
+ hash = __wt_hash_city64(bucket.str, bucket.len);
+ hash_bucket = hash & (conn->hash_size - 1);
+ TAILQ_FOREACH (bstorage, &nstorage->buckethashqh[hash_bucket], q) {
+ if (WT_STRING_MATCH(bstorage->bucket, bucket.str, bucket.len) &&
+ (WT_STRING_MATCH(bstorage->bucket_prefix, prefix.str, prefix.len))) {
+ *bstoragep = bstorage;
+ goto done;
+ }
+ }
+
+ WT_ERR(__wt_calloc_one(session, &new));
+ WT_ERR(__wt_strndup(session, auth.str, auth.len, &new->auth_token));
+ WT_ERR(__wt_strndup(session, bucket.str, bucket.len, &new->bucket));
+ WT_ERR(__wt_strndup(session, prefix.str, prefix.len, &new->bucket_prefix));
+
+ storage = nstorage->storage_source;
+ WT_ERR(storage->ss_customize_file_system(storage, &session->iface, new->bucket,
+ new->bucket_prefix, new->auth_token, NULL, &new->file_system));
+ new->storage_source = storage;
+
+ /* If we're creating a new bucket storage, parse the other settings into it. */
+ TAILQ_INSERT_HEAD(&nstorage->bucketqh, new, q);
+ TAILQ_INSERT_HEAD(&nstorage->buckethashqh[hash_bucket], new, hashq);
+ F_SET(new, WT_BUCKET_FREE);
+ WT_ERR(__tiered_common_config(session, cfg, new));
+ *bstoragep = new;
+
+done:
+ if (0) {
+err:
+ if (new != NULL) {
+ __wt_free(session, new->bucket);
+ __wt_free(session, new->bucket_prefix);
+ }
+ __wt_free(session, new);
+ }
+ __wt_spin_unlock(session, &conn->storage_lock);
+ return (ret);
+}
+
+/*
+ * __wt_tiered_conn_config --
+ * Parse and setup the storage server options for the connection.
+ */
+int
+__wt_tiered_conn_config(WT_SESSION_IMPL *session, const char **cfg, bool reconfig)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ if (!reconfig)
+ WT_RET(__wt_tiered_bucket_config(session, cfg, &conn->bstorage));
+
+ /* If the connection is not set up for tiered storage there is nothing more to do. */
+ if (conn->bstorage == NULL)
+ return (0);
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_CONFIG: bucket %s", conn->bstorage->bucket);
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIERED_CONFIG: prefix %s", conn->bstorage->bucket_prefix);
+
+ /*
+ * If reconfiguring, see if the other settings have changed on the system bucket storage.
+ */
+ WT_ASSERT(session, conn->bstorage != NULL);
+ if (reconfig)
+ WT_ERR(__tiered_common_config(session, cfg, conn->bstorage));
+
+ WT_STAT_CONN_SET(session, tiered_object_size, conn->bstorage->object_size);
+ WT_STAT_CONN_SET(session, tiered_retention, conn->bstorage->retain_secs);
+
+ /*
+ * Set up the designated file system for the "none" bucket.
+ */
+ WT_ASSERT(session, conn->file_system != NULL);
+ conn->bstorage_none.file_system = conn->file_system;
+
+ return (0);
+
+err:
+ __wt_free(session, conn->bstorage->auth_token);
+ __wt_free(session, conn->bstorage->bucket);
+ __wt_free(session, conn->bstorage->bucket_prefix);
+ __wt_free(session, conn->bstorage);
+ return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/tiered/tiered_cursor.c b/src/third_party/wiredtiger/src/tiered/tiered_cursor.c
index f0aa30d2023..db45db54f9e 100644
--- a/src/third_party/wiredtiger/src/tiered/tiered_cursor.c
+++ b/src/third_party/wiredtiger/src/tiered/tiered_cursor.c
@@ -8,9 +8,9 @@
#include "wt_internal.h"
-#define WT_FORALL_CURSORS(curtiered, c, i) \
- for ((i) = (curtiered)->tiered->ntiers; (i) > 0;) \
- if (((c) = (curtiered)->cursors[--(i)]) != NULL)
+#define WT_FORALL_CURSORS(curtiered, c, i) \
+ for ((i) = 0; i < WT_TIERED_MAX_TIERS;) \
+ if (((c) = (curtiered)->cursors[(i)++]) != NULL)
#define WT_TIERED_CURCMP(s, tiered, c1, c2, cmp) \
__wt_compare(s, (tiered)->collator, &(c1)->key, &(c2)->key, &(cmp))
@@ -34,8 +34,6 @@ __curtiered_open_cursors(WT_CURSOR_TIERED *curtiered)
dhandle = NULL;
tiered = curtiered->tiered;
- WT_ASSERT(session, tiered->ntiers > 0);
-
/*
* If the key is pointing to memory that is pinned by a tier cursor, take a copy before closing
* cursors.
@@ -46,14 +44,16 @@ __curtiered_open_cursors(WT_CURSOR_TIERED *curtiered)
F_CLR(curtiered, WT_CURTIERED_ITERATE_NEXT | WT_CURTIERED_ITERATE_PREV);
WT_ASSERT(session, curtiered->cursors == NULL);
- WT_ERR(__wt_calloc_def(session, tiered->ntiers, &curtiered->cursors));
+ WT_ERR(__wt_calloc_def(session, WT_TIERED_MAX_TIERS, &curtiered->cursors));
/* Open the cursors for tiers that have changed. */
__wt_verbose(session, WT_VERB_TIERED,
- "tiered opening cursor session(%p):tiered cursor(%p), tiers: %u", (void *)session,
- (void *)curtiered, tiered->ntiers);
- for (i = 0; i != tiered->ntiers; i++) {
- dhandle = tiered->tiers[i];
+ "tiered opening cursor session(%p):tiered cursor(%p), tiers: %d", (void *)session,
+ (void *)curtiered, (int)WT_TIERED_MAX_TIERS);
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
+ dhandle = tiered->tiers[i].tier;
+ if (dhandle == NULL)
+ continue;
/*
* Read from the checkpoint if the file has been written. Once all cursors switch, the
@@ -87,7 +87,7 @@ __curtiered_close_cursors(WT_SESSION_IMPL *session, WT_CURSOR_TIERED *curtiered)
return (0);
/* Walk the cursors, closing them. */
- for (i = 0; i < curtiered->tiered->ntiers; i++) {
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
if ((c = (curtiered)->cursors[i]) != NULL) {
curtiered->cursors[i] = NULL;
WT_RET(c->close(c));
@@ -795,15 +795,13 @@ __curtiered_put(WT_CURSOR_TIERED *curtiered, const WT_ITEM *key, const WT_ITEM *
bool position, bool reserve)
{
WT_CURSOR *primary;
- WT_TIERED *tiered;
-
- tiered = curtiered->tiered;
+ int (*func)(WT_CURSOR *);
/*
* Clear the existing cursor position. Don't clear the primary cursor: we're about to use it
* anyway.
*/
- primary = curtiered->cursors[tiered->ntiers - 1];
+ primary = curtiered->cursors[WT_TIERED_INDEX_LOCAL];
WT_RET(__curtiered_reset_cursors(curtiered, primary));
/* If necessary, set the position for future scans. */
@@ -811,14 +809,15 @@ __curtiered_put(WT_CURSOR_TIERED *curtiered, const WT_ITEM *key, const WT_ITEM *
curtiered->current = primary;
primary->set_key(primary, key);
- if (reserve) {
- WT_RET(primary->reserve(primary));
- } else {
- primary->set_value(primary, value);
- WT_RET(primary->insert(primary));
- }
- return (0);
+ /* Our API always leaves the cursor positioned after a reserve call. */
+ WT_ASSERT(CUR2S(curtiered), !reserve || position);
+ func = primary->insert;
+ if (position)
+ func = reserve ? primary->reserve : primary->update;
+ if (!reserve)
+ primary->set_value(primary, value);
+ return (func(primary));
}
/*
@@ -1010,21 +1009,6 @@ err:
}
/*
- * __curtiered_random_tier --
- * Pick a tier at random, weighted by the size of all tiers. Weighting proportional to documents
- * avoids biasing towards small tiers. Then return the cursor on the tier we have picked.
- */
-static void
-__curtiered_random_tier(WT_SESSION_IMPL *session, WT_CURSOR_TIERED *curtiered, WT_CURSOR **cursor)
-{
- u_int i;
-
- /* TODO: make randomness respect tree size. */
- i = __wt_random(&session->rnd) % curtiered->tiered->ntiers;
- *cursor = curtiered->cursors[i];
-}
-
-/*
* __curtiered_next_random --
* WT_CURSOR->next method for the tiered cursor type when configured with next_random.
*/
@@ -1035,6 +1019,7 @@ __curtiered_next_random(WT_CURSOR *cursor)
WT_CURSOR_TIERED *curtiered;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ u_int i, tier;
int exact;
c = NULL;
@@ -1044,29 +1029,33 @@ __curtiered_next_random(WT_CURSOR *cursor)
__cursor_novalue(cursor);
WT_ERR(__curtiered_enter(curtiered, false));
- for (;;) {
- __curtiered_random_tier(session, curtiered, &c);
- /*
- * This call to next_random on the tier can potentially end in WT_NOTFOUND if the tier we
- * picked is empty. We want to retry in that case.
- */
+ /*
+ * Select a random tier. If it is empty, try the next tier and so on, wrapping around until we
+ * find something or run out of tiers.
+ */
+ tier = __wt_random(&session->rnd) % WT_TIERED_MAX_TIERS;
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
+ c = curtiered->cursors[tier];
WT_ERR_NOTFOUND_OK(__wt_curfile_next_random(c), true);
- if (ret == WT_NOTFOUND)
+ if (ret == WT_NOTFOUND) {
+ if (++tier == WT_TIERED_MAX_TIERS)
+ tier = 0;
continue;
+ }
F_SET(cursor, WT_CURSTD_KEY_INT);
WT_ERR(c->get_key(c, &cursor->key));
/*
- * Search near the current key to resolve any tombstones and position to a valid document.
- * If we see a WT_NOTFOUND here that is valid, as the tree has no documents visible to us.
+ * Search near the current key to resolve any tombstones and position to a valid record. If
+ * we see a WT_NOTFOUND here that is valid, as the tree has no documents visible to us.
*/
WT_ERR(__curtiered_search_near(cursor, &exact));
break;
}
- /* We have found a valid doc. Set that we are now positioned */
- if (0) {
err:
+ if (ret != 0) {
+ /* We didn't find a valid record. Don't leave cursor positioned */
F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
}
__curtiered_leave(curtiered);
@@ -1074,6 +1063,70 @@ err:
}
/*
+ * __curtiered_insert_bulk --
+ * WT_CURSOR->insert method for tiered bulk cursors.
+ */
+static int
+__curtiered_insert_bulk(WT_CURSOR *cursor)
+{
+ WT_CURSOR *bulk_cursor;
+ WT_CURSOR_TIERED *curtiered;
+ WT_SESSION_IMPL *session;
+
+ curtiered = (WT_CURSOR_TIERED *)cursor;
+ session = CUR2S(curtiered);
+ bulk_cursor = curtiered->cursors[WT_TIERED_INDEX_LOCAL];
+
+ WT_ASSERT(session, bulk_cursor != NULL);
+ bulk_cursor->set_key(bulk_cursor, &cursor->key);
+ bulk_cursor->set_value(bulk_cursor, &cursor->value);
+ WT_RET(bulk_cursor->insert(bulk_cursor));
+
+ return (0);
+}
+
+/*
+ * __curtiered_open_bulk --
+ * WT_SESSION->open_cursor method for tiered bulk cursors.
+ */
+static int
+__curtiered_open_bulk(WT_CURSOR_TIERED *curtiered, const char *cfg[])
+{
+ WT_CURSOR *cursor;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_TIERED *tiered;
+
+ cursor = &curtiered->iface;
+ session = CUR2S(curtiered);
+ tiered = curtiered->tiered;
+
+ /* Bulk cursors only support insert and close. */
+ __wt_cursor_set_notsup(cursor);
+ cursor->insert = __curtiered_insert_bulk;
+ cursor->close = __wt_curtiered_close;
+
+ WT_ASSERT(session, curtiered->cursors == NULL);
+ WT_ERR(__wt_calloc_def(session, WT_TIERED_MAX_TIERS, &curtiered->cursors));
+
+ /* Open a bulk cursor on the local tier. */
+ dhandle = tiered->tiers[WT_TIERED_INDEX_LOCAL].tier;
+ WT_ASSERT(session, dhandle != NULL);
+ WT_ERR(__wt_open_cursor(
+ session, dhandle->name, cursor, cfg, &curtiered->cursors[WT_TIERED_INDEX_LOCAL]));
+
+ /* Child cursors always use overwrite and raw mode. */
+ F_SET(curtiered->cursors[WT_TIERED_INDEX_LOCAL], WT_CURSTD_OVERWRITE | WT_CURSTD_RAW);
+
+ if (0) {
+err:
+ __wt_free(session, curtiered->cursors);
+ }
+ return (ret);
+}
+
+/*
* __wt_curtiered_open --
* WT_SESSION->open_cursor method for tiered cursors.
*/
@@ -1129,7 +1182,7 @@ __wt_curtiered_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
/* Check whether the exclusive open for a bulk load succeeded. */
if (bulk && ret == EBUSY)
- WT_ERR_MSG(session, EINVAL, "bulk-load is only supported on newly created trees");
+ ret = EINVAL;
/* Flag any errors from the tree get. */
WT_ERR(ret);
@@ -1142,7 +1195,7 @@ __wt_curtiered_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
cursor = (WT_CURSOR *)curtiered;
*cursor = iface;
cursor->session = (WT_SESSION *)session;
- WT_ERR(__wt_strdup(session, tiered->name, &cursor->uri));
+ WT_ERR(__wt_strdup(session, tiered->iface.name, &cursor->uri));
cursor->key_format = tiered->key_format;
cursor->value_format = tiered->value_format;
@@ -1159,7 +1212,7 @@ __wt_curtiered_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp));
if (bulk)
- WT_ERR(ENOTSUP); /* TODO */
+ WT_ERR(__curtiered_open_bulk(curtiered, cfg));
if (0) {
err:
diff --git a/src/third_party/wiredtiger/src/tiered/tiered_handle.c b/src/third_party/wiredtiger/src/tiered/tiered_handle.c
index e326e247717..a1bb6bc37a6 100644
--- a/src/third_party/wiredtiger/src/tiered/tiered_handle.c
+++ b/src/third_party/wiredtiger/src/tiered/tiered_handle.c
@@ -9,62 +9,529 @@
#include "wt_internal.h"
/*
+ * __tiered_dhandle_setup --
+ * Given a tiered index and name, set up the dhandle information.
+ */
+static int
+__tiered_dhandle_setup(WT_SESSION_IMPL *session, WT_TIERED *tiered, uint32_t i, const char *name)
+{
+ WT_DECL_RET;
+ WT_TIERED_TIERS *tier;
+ uint32_t id, type;
+
+ WT_RET(__wt_session_get_dhandle(session, name, NULL, NULL, 0));
+ if (i == WT_TIERED_INDEX_INVALID) {
+ type = session->dhandle->type;
+ if (type == WT_DHANDLE_TYPE_BTREE)
+ id = WT_TIERED_INDEX_LOCAL;
+ else if (type == WT_DHANDLE_TYPE_TIERED)
+ id = WT_TIERED_INDEX_LOCAL;
+ else
+ WT_ERR_MSG(
+ session, EINVAL, "Unknown or unsupported tiered dhandle type %" PRIu32, type);
+ } else {
+ WT_ASSERT(session, i < WT_TIERED_MAX_TIERS);
+ id = i;
+ }
+ /* Reference the dhandle and set it in the tier array. */
+ tier = &tiered->tiers[id];
+ (void)__wt_atomic_addi32(&session->dhandle->session_inuse, 1);
+ tier->tier = session->dhandle;
+
+ /* The Btree needs to use the bucket storage to do file system operations. */
+ if (session->dhandle->type == WT_DHANDLE_TYPE_BTREE)
+ ((WT_BTREE *)session->dhandle->handle)->bstorage = tiered->bstorage;
+err:
+ WT_RET(__wt_session_release_dhandle(session));
+ return (ret);
+}
+
+/*
+ * __tiered_init_tiers --
+ * Given a tiered table 'tiers' configuration set up the dhandle array.
+ */
+static int
+__tiered_init_tiers(WT_SESSION_IMPL *session, WT_TIERED *tiered, WT_CONFIG_ITEM *tierconf)
+{
+ WT_CONFIG cparser;
+ WT_CONFIG_ITEM ckey, cval;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ __wt_config_subinit(session, &cparser, tierconf);
+ while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0) {
+ /* Set up the tiers array based on the metadata. */
+ WT_ERR(__wt_buf_fmt(session, tmp, "%.*s", (int)ckey.len, ckey.str));
+ __wt_verbose(
+ session, WT_VERB_TIERED, "INIT_TIERS: tiered URI dhandle %s", (char *)tmp->data);
+ WT_ERR(__tiered_dhandle_setup(
+ session, tiered, WT_TIERED_INDEX_INVALID, (const char *)tmp->data));
+ }
+ WT_ERR_NOTFOUND_OK(ret, false);
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
+/*
+ * __tiered_create_local --
+ * Create a new local name for a tiered table. Must be called single threaded.
+ */
+static int
+__tiered_create_local(WT_SESSION_IMPL *session, WT_TIERED *tiered)
+{
+ WT_DECL_RET;
+ WT_TIERED_TIERS *this_tier;
+ const char *cfg[4] = {NULL, NULL, NULL, NULL};
+ const char *config, *name;
+
+ config = name = NULL;
+
+ /* If this ever can be multi-threaded, this would need to be atomic. */
+ tiered->current_id = tiered->next_id++;
+ /* XXX Remove when we have real flags. */
+ F_SET(tiered, WT_TIERED_FLAG_UNUSED);
+ WT_ERR(
+ __wt_tiered_name(session, &tiered->iface, tiered->current_id, WT_TIERED_NAME_LOCAL, &name));
+ __wt_verbose(session, WT_VERB_TIERED, "TIER_CREATE_LOCAL: LOCAL: %s", name);
+ cfg[0] = WT_CONFIG_BASE(session, object_meta);
+ cfg[1] = tiered->obj_config;
+ __wt_verbose(session, WT_VERB_TIERED, "TIER_CREATE_LOCAL: obj_config: %s : %s", name, cfg[1]);
+ WT_ASSERT(session, tiered->obj_config != NULL);
+ WT_ERR(__wt_config_merge(session, cfg, NULL, (const char **)&config));
+ /*
+ * XXX Need to verify user doesn't create a table of the same name. What does LSM do? It
+ * definitely has the same problem with chunks.
+ */
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIER_CREATE_LOCAL: schema create LOCAL: %s : %s", name, config);
+ WT_ERR(__wt_schema_create(session, name, config));
+ this_tier = &tiered->tiers[WT_TIERED_INDEX_LOCAL];
+ if (this_tier->name != NULL)
+ __wt_free(session, this_tier->name);
+ this_tier->name = name;
+ F_SET(this_tier, WT_TIERS_OP_READ | WT_TIERS_OP_WRITE);
+
+ if (0) {
+err:
+ /* Only free name on error. */
+ __wt_free(session, name);
+ }
+ __wt_free(session, config);
+ return (ret);
+}
+
+/*
+ * __tiered_create_object --
+ * Create an object name of the given number.
+ */
+static int
+__tiered_create_object(WT_SESSION_IMPL *session, WT_TIERED *tiered)
+{
+ WT_DECL_RET;
+ const char *cfg[4] = {NULL, NULL, NULL, NULL};
+ const char *config, *name, *orig_name;
+
+ config = name = NULL;
+ config = name = orig_name = NULL;
+ orig_name = tiered->tiers[WT_TIERED_INDEX_LOCAL].name;
+ /*
+ * If we have an existing local file in the tier, alter the table to indicate this one is now
+ * readonly.
+ */
+ if (orig_name != NULL) {
+ cfg[0] = "readonly=true";
+ WT_WITHOUT_DHANDLE(session, ret = __wt_schema_alter(session, orig_name, cfg));
+ WT_ERR(ret);
+ }
+ /*
+ * Create the name and metadata of the new shared object of the current local object.
+ * The data structure keeps this id so that we don't have to parse and manipulate strings.
+ * I.e. if we have file:example-000000002.wt we want object:example-000000002.wtobj.
+ */
+ WT_ERR(
+ __wt_tiered_name(session, &tiered->iface, tiered->current_id, WT_TIERED_NAME_OBJECT, &name));
+ cfg[0] = WT_CONFIG_BASE(session, object_meta);
+ cfg[1] = tiered->obj_config;
+ cfg[2] = "readonly=true";
+ WT_ASSERT(session, tiered->obj_config != NULL);
+ WT_ERR(__wt_config_merge(session, cfg, NULL, (const char **)&config));
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIER_CREATE_OBJECT: schema create %s : %s", name, config);
+ /* Create the new shared object. */
+ WT_ERR(__wt_schema_create(session, name, config));
+
+err:
+ __wt_free(session, config);
+ __wt_free(session, name);
+ return (ret);
+}
+
+/*
+ * __tiered_create_tier_tree --
+ * Create a tier name for a tiered table.
+ */
+static int
+__tiered_create_tier_tree(WT_SESSION_IMPL *session, WT_TIERED *tiered)
+{
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_TIERED_TIERS *this_tier;
+ const char *cfg[4] = {NULL, NULL, NULL, NULL};
+ const char *config, *name;
+
+ config = name = NULL;
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+
+ /* Create tier:example for the new tiered tree. */
+ WT_ERR(__wt_tiered_name(session, &tiered->iface, 0, WT_TIERED_NAME_SHARED, &name));
+ cfg[0] = WT_CONFIG_BASE(session, tier_meta);
+ WT_ASSERT(session, tiered->bstorage != NULL);
+ WT_ERR(__wt_buf_fmt(session, tmp, ",readonly=true,tiered_storage=(bucket=%s,bucket_prefix=%s)",
+ tiered->bstorage->bucket, tiered->bstorage->bucket_prefix));
+ cfg[2] = tmp->data;
+ WT_ERR(__wt_config_merge(session, cfg, NULL, &config));
+ /* Set up a tier:example metadata for the first time. */
+ __wt_verbose(session, WT_VERB_TIERED, "CREATE_TIER_TREE: schema create: %s : %s", name, config);
+ WT_ERR(__wt_schema_create(session, name, config));
+ this_tier = &tiered->tiers[WT_TIERED_INDEX_SHARED];
+ WT_ASSERT(session, this_tier->name == NULL);
+ this_tier->name = name;
+ F_SET(this_tier, WT_TIERS_OP_FLUSH | WT_TIERS_OP_READ);
+
+ if (0)
+err:
+ /* Only free on error. */
+ __wt_free(session, name);
+ __wt_free(session, config);
+ __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
+/*
+ * __tiered_update_dhandles --
+ * Update the dhandle list for a tiered structure after object switching.
+ */
+static int
+__tiered_update_dhandles(WT_SESSION_IMPL *session, WT_TIERED *tiered)
+{
+ WT_DECL_RET;
+ uint32_t i;
+
+ /* Now get the dhandle and add it to the array. */
+ for (i = 0; i < WT_TIERED_MAX_TIERS; ++i) {
+ /*
+ * If we have a tiered dhandle we can either skip if it is the same name or we decrement the
+ * old one and get a new one for the new name.
+ */
+ if (tiered->tiers[i].tier != NULL) {
+ WT_ASSERT(session, tiered->tiers[i].name != NULL);
+ if (strcmp(tiered->tiers[i].tier->name, tiered->tiers[i].name) == 0)
+ continue;
+ else
+ (void)__wt_atomic_subi32(&tiered->tiers[i].tier->session_inuse, 1);
+ }
+ if (tiered->tiers[i].name == NULL)
+ continue;
+ __wt_verbose(
+ session, WT_VERB_TIERED, "UPDATE_DH: Get dhandle for %s", tiered->tiers[i].name);
+ WT_ERR(__tiered_dhandle_setup(session, tiered, i, tiered->tiers[i].name));
+ }
+err:
+ __wt_verbose(session, WT_VERB_TIERED, "UPDATE_DH: DONE ret %d", ret);
+ if (ret != 0) {
+ /* Need to undo our dhandles. Close and dereference all. */
+ for (i = 0; i < WT_TIERED_MAX_TIERS; ++i) {
+ if (tiered->tiers[i].tier != NULL)
+ (void)__wt_atomic_subi32(&tiered->tiers[i].tier->session_inuse, 1);
+ __wt_free(session, tiered->tiers[i].name);
+ tiered->tiers[i].tier = NULL;
+ tiered->tiers[i].name = NULL;
+ }
+ }
+ return (ret);
+}
+
+/*
+ * __tiered_update_metadata --
+ * Update the metadata for a tiered structure after object switching.
+ */
+static int
+__tiered_update_metadata(WT_SESSION_IMPL *session, WT_TIERED *tiered, const char *orig_config)
+{
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ uint32_t i;
+ const char *cfg[4] = {NULL, NULL, NULL, NULL};
+ const char *newconfig;
+
+ dhandle = &tiered->iface;
+ newconfig = NULL;
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+
+ WT_RET(__wt_buf_fmt(session, tmp, "last=%" PRIu64 ",tiers=(\"", tiered->current_id));
+ for (i = 0; i < WT_TIERED_MAX_TIERS; ++i) {
+ if (tiered->tiers[i].name == NULL) {
+ __wt_verbose(session, WT_VERB_TIERED, "TIER_UPDATE_META: names[%" PRIu32 "] NULL", i);
+ continue;
+ }
+ __wt_verbose(session, WT_VERB_TIERED, "TIER_UPDATE_META: names[%" PRIu32 "]: %s", i,
+ tiered->tiers[i].name);
+ WT_RET(__wt_buf_catfmt(session, tmp, "%s%s\"", i == 0 ? "" : ",", tiered->tiers[i].name));
+ }
+ WT_RET(__wt_buf_catfmt(session, tmp, ")"));
+
+ cfg[0] = WT_CONFIG_BASE(session, tiered_meta);
+ cfg[1] = orig_config;
+ cfg[2] = tmp->data;
+ WT_ERR(__wt_config_merge(session, cfg, NULL, &newconfig));
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIER_UPDATE_META: Update TIERED: %s %s", dhandle->name, newconfig);
+ WT_ERR(__wt_metadata_update(session, dhandle->name, newconfig));
+
+err:
+ __wt_free(session, newconfig);
+ __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
+/*
+ * __tiered_switch --
+ * Given a tiered table, make all the metadata updates underneath to switch to the next object.
+ * The switch handles going from nothing to local-only, local-only to both local and shared, and
+ * having shared-only and creating a local object. Must be single threaded.
+ */
+static int
+__tiered_switch(WT_SESSION_IMPL *session, const char *config)
+{
+ WT_DECL_RET;
+ WT_TIERED *tiered;
+ bool need_object, need_tree, tracking;
+
+ tiered = (WT_TIERED *)session->dhandle;
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIER_SWITCH: called %s %s", session->dhandle->name, config);
+
+ need_object = tiered->tiers[WT_TIERED_INDEX_LOCAL].tier != NULL;
+ need_tree = need_object && tiered->tiers[WT_TIERED_INDEX_SHARED].tier == NULL;
+ /*
+ * There are four possibilities to our tiers configuration. In all of them we need to create
+ * a new local tier file object dhandle and add it as element index zero of the tiers array.
+ * Then we may or may not do other operations depending on the state otherwise. These are
+ * presented in order of increasing amount of work that needs to be done.
+ * 1. tiers=() - New and empty. We only need to add in the local file object.
+ * 2. tiers=("tier:...") - Existing shared tier only. Here too we only need to add
+ * in the local file object.
+ * 3. tiers=("file:...", "tier:...") - Both local and shared tiers exist in the metadata.
+ * We need to create and add the next local file object (N + 1) and create a shared
+ * object in the metadata for the current local file object (N).
+ * 4. tiers=("file:...") - Existing local tier only. We need to do all of the parts listed
+ * in the #3 above, and also create the shared tier metadata entry.
+ *
+ * Step 4 must be done after some order of 1-3.
+ * 1. Create the "object:" entry in metadata if needed.
+ * 2. Create the "tier:" entry in metadata if needed.
+ * 3. Create the new "file:" local entry in metadata.
+ * 4. Update the "tiered:" with new tiers and object number.
+ * 5. Meta tracking off to "commit" all the metadata operations.
+ * 6. Revise the dhandles in the tiered structure to reflect new state of the world.
+ */
+
+ /*
+ * To be implemented with flush_tier:
+ * - Close the current object.
+ * - Copy the current one to the cloud. It also remains in the local store.
+ */
+
+ WT_RET(__wt_meta_track_on(session));
+ tracking = true;
+ /* Create the object: entry in the metadata. */
+ if (need_object)
+ WT_ERR(__tiered_create_object(session, tiered));
+
+ if (need_tree)
+ WT_ERR(__tiered_create_tier_tree(session, tiered));
+
+ /* We always need to create a local object. */
+ WT_ERR(__tiered_create_local(session, tiered));
+
+ /*
+ * Note that removal of overlapping local objects is not in the purview of this function. Some
+ * other mechanism will remove outdated tiers. Here's where it could be done though.
+ */
+
+ /* Update the tiered: metadata to new object number and tiered array. */
+ WT_ERR(__tiered_update_metadata(session, tiered, config));
+ tracking = false;
+ WT_ERR(__wt_meta_track_off(session, true, ret != 0));
+ WT_ERR(__tiered_update_dhandles(session, tiered));
+err:
+ __wt_verbose(session, WT_VERB_TIERED, "TIER_SWITCH: DONE ret %d", ret);
+ if (tracking)
+ WT_RET(__wt_meta_track_off(session, true, ret != 0));
+ return (ret);
+}
+
+/*
+ * __wt_tiered_switch --
+ * Switch metadata, external version.
+ */
+int
+__wt_tiered_switch(WT_SESSION_IMPL *session, const char *config)
+{
+ WT_DECL_RET;
+
+ /*
+ * For now just a wrapper to internal function. Maybe there's more to do externally, like wrap
+ * it in a lock or with a dhandle or walk the dhandle list here rather than higher up.
+ */
+ WT_SAVE_DHANDLE(session, ret = __tiered_switch(session, config));
+ return (ret);
+}
+
+/*
+ * __wt_tiered_name --
+ * Given a dhandle structure and object number generate the URI name of the given type. XXX
+ * Currently this is only used in this file but I anticipate it may be of use outside. If not,
+ * make this static and tiered_name instead.
+ */
+int
+__wt_tiered_name(
+ WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, uint64_t id, uint32_t flags, const char **retp)
+{
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ const char *name;
+
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ name = dhandle->name;
+ /* Skip the prefix depending on what we're given. */
+ if (dhandle->type == WT_DHANDLE_TYPE_TIERED)
+ WT_PREFIX_SKIP_REQUIRED(session, name, "tiered:");
+ else {
+ WT_ASSERT(session, dhandle->type == WT_DHANDLE_TYPE_TIERED_TREE);
+ WT_ASSERT(session, !LF_ISSET(WT_TIERED_NAME_SHARED));
+ WT_PREFIX_SKIP_REQUIRED(session, name, "tier:");
+ }
+
+ /*
+ * Separate object numbers from the base table name with a dash. Separate from the suffix with a
+ * dot. We generate a different name style based on the type.
+ */
+ if (LF_ISSET(WT_TIERED_NAME_LOCAL)) {
+ if (LF_ISSET(WT_TIERED_NAME_PREFIX))
+ WT_ERR(__wt_buf_fmt(session, tmp, "file:%s-", name));
+ else
+ WT_ERR(__wt_buf_fmt(session, tmp, "file:%s-%010" PRIu64 ".wt", name, id));
+ } else if (LF_ISSET(WT_TIERED_NAME_OBJECT)) {
+ if (LF_ISSET(WT_TIERED_NAME_PREFIX))
+ WT_ERR(__wt_buf_fmt(session, tmp, "object:%s-", name));
+ else
+ WT_ERR(__wt_buf_fmt(session, tmp, "object:%s-%010" PRIu64 ".wtobj", name, id));
+ } else {
+ WT_ASSERT(session, !LF_ISSET(WT_TIERED_NAME_PREFIX));
+ WT_ASSERT(session, LF_ISSET(WT_TIERED_NAME_SHARED));
+ WT_ERR(__wt_buf_fmt(session, tmp, "tier:%s", name));
+ }
+ WT_ERR(__wt_strndup(session, tmp->data, tmp->size, retp));
+ __wt_verbose(session, WT_VERB_TIERED, "Generated tiered name: %s", *retp);
+err:
+ __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
+/*
* __tiered_open --
* Open a tiered data handle (internal version).
*/
static int
__tiered_open(WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_CONFIG cparser;
- WT_CONFIG_ITEM ckey, cval, tierconf;
+ WT_CONFIG_ITEM cval, tierconf;
WT_DATA_HANDLE *dhandle;
- WT_DECL_ITEM(buf);
+ WT_DECL_ITEM(tmp);
WT_DECL_RET;
WT_TIERED *tiered;
- u_int i;
- const char **tiered_cfg;
+ uint32_t unused;
+ char *metaconf;
+ const char *obj_cfg[] = {WT_CONFIG_BASE(session, object_meta), NULL, NULL};
+ const char **tiered_cfg, *config;
dhandle = session->dhandle;
tiered = (WT_TIERED *)dhandle;
tiered_cfg = dhandle->cfg;
+ config = NULL;
+ metaconf = NULL;
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
WT_UNUSED(cfg);
- WT_RET(__wt_config_gets(session, tiered_cfg, "key_format", &cval));
- WT_RET(__wt_strndup(session, cval.str, cval.len, &tiered->key_format));
- WT_RET(__wt_config_gets(session, tiered_cfg, "value_format", &cval));
- WT_RET(__wt_strndup(session, cval.str, cval.len, &tiered->value_format));
-
- /* Point to some items in the copy to save re-parsing. */
- WT_RET(__wt_config_gets(session, tiered_cfg, "tiered.tiers", &tierconf));
-
- /* Count the number of tiers. */
- __wt_config_subinit(session, &cparser, &tierconf);
- while ((ret = __wt_config_next(&cparser, &ckey, &cval)) == 0)
- ++tiered->ntiers;
- WT_RET_NOTFOUND_OK(ret);
-
- WT_ASSERT(session, tiered->ntiers > 0);
-
- WT_RET(__wt_scr_alloc(session, 0, &buf));
- WT_ERR(__wt_calloc_def(session, tiered->ntiers, &tiered->tiers));
-
- __wt_config_subinit(session, &cparser, &tierconf);
- for (i = 0; i < tiered->ntiers; i++) {
- WT_ERR(__wt_config_next(&cparser, &ckey, &cval));
- WT_ERR(__wt_buf_fmt(session, buf, "%.*s", (int)ckey.len, ckey.str));
- WT_ERR(__wt_session_get_dhandle(session, (const char *)buf->data, NULL, cfg, 0));
- (void)__wt_atomic_addi32(&session->dhandle->session_inuse, 1);
- /* Load in reverse order (based on LSM logic). */
- tiered->tiers[(tiered->ntiers - 1) - i] = session->dhandle;
- WT_ERR(__wt_session_release_dhandle(session));
+ /* Set up the bstorage from the configuration first. */
+ WT_RET(__wt_config_gets(session, tiered_cfg, "tiered_storage.name", &cval));
+ if (cval.len == 0)
+ tiered->bstorage = S2C(session)->bstorage;
+ else
+ WT_ERR(__wt_tiered_bucket_config(session, tiered_cfg, &tiered->bstorage));
+ WT_ASSERT(session, tiered->bstorage != NULL);
+ /* Collapse into one string for later use in switch. */
+ WT_ERR(__wt_config_merge(session, tiered_cfg, NULL, &config));
+
+ /*
+ * Pull in any configuration of the original table for the object and file components that may
+ * have been sent in on the create.
+ */
+ obj_cfg[1] = config;
+ WT_ERR(__wt_config_collapse(session, obj_cfg, &metaconf));
+ tiered->obj_config = metaconf;
+ metaconf = NULL;
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_OPEN: obj_config %s", tiered->obj_config);
+
+ WT_ERR(__wt_config_getones(session, config, "key_format", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &tiered->key_format));
+ WT_ERR(__wt_config_getones(session, config, "value_format", &cval));
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &tiered->value_format));
+
+ WT_ERR(__wt_config_getones(session, config, "last", &cval));
+ tiered->current_id = (uint64_t)cval.val;
+ tiered->next_id = tiered->current_id + 1;
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_OPEN: current %d, next %d",
+ (int)tiered->current_id, (int)tiered->next_id);
+
+ ret = __wt_config_getones(session, config, "tiers", &tierconf);
+ WT_ERR_NOTFOUND_OK(ret, true);
+
+ /* Open tiers if we have them, otherwise initialize. */
+ if (tiered->current_id != 0)
+ WT_ERR(__tiered_init_tiers(session, tiered, &tierconf));
+ else {
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIERED_OPEN: create %s config %s", dhandle->name, config);
+ WT_ERR(__wt_tiered_switch(session, config));
+
+ /* XXX brute force, need to figure out functions to use to do this properly. */
+ /* We need to update the dhandle config entry to reflect the new tiers metadata. */
+ WT_ERR(__wt_metadata_search(session, dhandle->name, &metaconf));
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_OPEN: after switch meta conf %s %s",
+ dhandle->name, metaconf);
+ __wt_free(session, dhandle->cfg[1]);
+ dhandle->cfg[1] = metaconf;
+ }
+ if (0) {
+ /* Temp code to keep s_all happy. */
+ FLD_SET(unused, WT_TIERED_OBJ_LOCAL | WT_TIERED_TREE_UNUSED);
}
if (0) {
err:
+ __wt_free(session, tiered->obj_config);
__wt_free(session, tiered->tiers);
+ __wt_free(session, metaconf);
}
- __wt_scr_free(session, &buf);
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_OPEN: Done ret %d", ret);
+ __wt_scr_free(session, &tmp);
+ __wt_free(session, config);
return (ret);
}
@@ -84,22 +551,96 @@ __wt_tiered_open(WT_SESSION_IMPL *session, const char *cfg[])
/*
* __wt_tiered_close --
- * Close a tiered data handle.
+ * Close a tiered data handle. TODO: When this returns an actual meaningful return value, remove
+ * its entry from s_void.
*/
int
__wt_tiered_close(WT_SESSION_IMPL *session, WT_TIERED *tiered)
{
- WT_DECL_RET;
- u_int i;
+#if 0
+ WT_DATA_HANDLE *dhandle;
+#endif
+ uint32_t i;
- ret = 0;
__wt_free(session, tiered->key_format);
__wt_free(session, tiered->value_format);
- if (tiered->tiers != NULL) {
- for (i = 0; i < tiered->ntiers; i++)
- (void)__wt_atomic_subi32(&tiered->tiers[i]->session_inuse, 1);
- __wt_free(session, tiered->tiers);
+ __wt_free(session, tiered->obj_config);
+ __wt_verbose(session, WT_VERB_TIERED, "%s", "TIERED_CLOSE: called");
+ /*
+ * For the moment we don't have anything to return. But all the callers currently expect a real
+ * return value from a close function. And this may become more complex later. During connection
+ * close the other dhandles may be closed and freed before this dhandle. So just free the names.
+ */
+ for (i = 0; i < WT_TIERED_MAX_TIERS; i++) {
+#if 0
+ dhandle = tiered->tiers[i].tier;
+ /*
+ * XXX We cannot decrement on connection close but we need to decrement on sweep close or
+ * other individual close.
+ */
+ (void)__wt_atomic_subi32(&dhandle->session_inuse, 1);
+#endif
+ if (tiered->tiers[i].name != NULL)
+ __wt_free(session, tiered->tiers[i].name);
}
+ return (0);
+}
+
+/*
+ * __wt_tiered_tree_open --
+ * Open a tiered tree data handle.
+ */
+int
+__wt_tiered_tree_open(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ const char *key, *object, *value;
+
+ WT_UNUSED(cfg);
+ object = NULL;
+ /*
+ * Set dhandle->handle with tiered tree structure, initialized.
+ */
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_TREE_OPEN: Called %s", session->dhandle->name);
+ WT_ASSERT(session, session->dhandle != NULL);
+ WT_RET(__wt_metadata_cursor(session, &cursor));
+ WT_ERR(__wt_tiered_name(
+ session, session->dhandle, 0, WT_TIERED_NAME_OBJECT | WT_TIERED_NAME_PREFIX, &object));
+ /*
+ * Walk looking for our objects.
+ */
+ while (cursor->next(cursor) == 0) {
+ cursor->get_key(cursor, &key);
+ cursor->get_value(cursor, &value);
+ /*
+ * NOTE: Here we do anything we need to do to open or access each shared object.
+ */
+ if (!WT_STRING_MATCH(key, object, strlen(object)))
+ continue;
+ __wt_verbose(
+ session, WT_VERB_TIERED, "TIERED_TREE_OPEN: metadata for %s: %s", object, value);
+ }
+err:
+ WT_TRET(__wt_metadata_cursor_release(session, &cursor));
+ __wt_free(session, object);
+ return (ret);
+}
+
+/*
+ * __wt_tiered_tree_close --
+ * Close a tiered tree data handle.
+ */
+int
+__wt_tiered_tree_close(WT_SESSION_IMPL *session, WT_TIERED_TREE *tiered_tree)
+{
+ WT_DECL_RET;
+
+ __wt_verbose(session, WT_VERB_TIERED, "TIERED_TREE_CLOSE: called %s", tiered_tree->iface.name);
+ ret = 0;
+ __wt_free(session, tiered_tree->key_format);
+ __wt_free(session, tiered_tree->value_format);
+
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 10887763194..9aa2f085386 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -171,6 +171,7 @@ __wt_txn_active(WT_SESSION_IMPL *session, uint64_t txnid)
WT_ORDERED_READ(session_cnt, conn->session_cnt);
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
/* If the transaction is in the list, it is uncommitted. */
if (s->id == txnid)
goto done;
@@ -243,6 +244,7 @@ __txn_get_snapshot_int(WT_SESSION_IMPL *session, bool publish)
WT_ORDERED_READ(session_cnt, conn->session_cnt);
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
/*
* Build our snapshot of any concurrent transaction IDs.
*
@@ -344,6 +346,7 @@ __txn_oldest_scan(WT_SESSION_IMPL *session, uint64_t *oldest_idp, uint64_t *last
WT_ORDERED_READ(session_cnt, conn->session_cnt);
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
/* Update the last running transaction ID. */
while ((id = s->id) != WT_TXN_NONE && WT_TXNID_LE(prev_oldest_id, id) &&
WT_TXNID_LT(id, last_running)) {
@@ -1046,6 +1049,9 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
WT_DECL_RET;
WT_TXN *txn;
WT_UPDATE *fix_upd, *tombstone, *upd;
+#ifdef HAVE_DIAGNOSTIC
+ WT_UPDATE *head_upd;
+#endif
size_t not_used;
uint32_t hs_btree_id;
bool upd_appended;
@@ -1057,12 +1063,18 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
WT_RET(__txn_search_prepared_op(session, op, cursorp, &upd));
+ __wt_verbose(session, WT_VERB_TRANSACTION,
+ "resolving prepared op for txnid: %" PRIu64 " that %s", txn->id,
+ commit ? "committed" : "roll backed");
/*
* Aborted updates can exist in the update chain of our transaction. Generally this will occur
* due to a reserved update. As such we should skip over these updates.
*/
for (; upd != NULL && upd->txnid == WT_TXN_ABORTED; upd = upd->next)
;
+#ifdef HAVE_DIAGNOSTIC
+ head_upd = upd;
+#endif
/*
* The head of the update chain is not a prepared update, which means all the prepared updates
@@ -1171,6 +1183,28 @@ __txn_resolve_prepared_op(WT_SESSION_IMPL *session, WT_TXN_OP *op, bool commit,
if (fix_upd != NULL)
WT_ERR(__txn_fixup_prepared_update(session, hs_cursor, fix_upd, commit));
+#ifdef HAVE_DIAGNOSTIC
+ for (; head_upd != NULL; head_upd = head_upd->next) {
+ /*
+ * Assert if we still have an update from the current transaction that hasn't been aborted.
+ * Only perform this check if aborting the prepared transaction.
+ */
+ WT_ASSERT(
+ session, commit || head_upd->txnid == WT_TXN_ABORTED || head_upd->txnid != txn->id);
+
+ if (head_upd->txnid == WT_TXN_ABORTED)
+ continue;
+
+ /*
+ * If we restored an update from the history store, it should be the last update on the
+ * chain.
+ */
+ if (upd_appended && head_upd->type == WT_UPDATE_STANDARD &&
+ F_ISSET(head_upd, WT_UPDATE_RESTORED_FROM_HS))
+ WT_ASSERT(session, head_upd->next == NULL);
+ }
+#endif
+
err:
if (hs_cursor != NULL)
WT_TRET(hs_cursor->close(hs_cursor));
@@ -1350,7 +1384,7 @@ __txn_mod_compare(const void *a, const void *b)
*/
if (aopt->type == WT_TXN_OP_BASIC_ROW || aopt->type == WT_TXN_OP_INMEM_ROW)
return (aopt->btree->collator == NULL ?
- __wt_lex_compare(&aopt->u.op_row.key, &bopt->u.op_row.key) :
+ __wt_lex_compare(&aopt->u.op_row.key, &bopt->u.op_row.key, false) :
0);
return (aopt->u.op_col.recno < bopt->u.op_col.recno);
}
@@ -2025,7 +2059,6 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
txn_global->current = txn_global->last_running = txn_global->metadata_pinned =
txn_global->oldest_id = WT_TXN_FIRST;
- WT_RET(__wt_spin_init(session, &txn_global->id_lock, "transaction id lock"));
WT_RWLOCK_INIT_TRACKED(session, &txn_global->rwlock, txn_global);
WT_RET(__wt_rwlock_init(session, &txn_global->visibility_rwlock));
@@ -2053,7 +2086,6 @@ __wt_txn_global_destroy(WT_SESSION_IMPL *session)
if (txn_global == NULL)
return;
- __wt_spin_destroy(session, &txn_global->id_lock);
__wt_rwlock_destroy(session, &txn_global->rwlock);
__wt_rwlock_destroy(session, &txn_global->visibility_rwlock);
__wt_free(session, txn_global->txn_shared_list);
@@ -2127,7 +2159,7 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char **cfg)
}
s = NULL;
- WT_TRET(__wt_open_internal_session(conn, "close_ckpt", true, 0, &s));
+ WT_TRET(__wt_open_internal_session(conn, "close_ckpt", true, 0, 0, &s));
if (s != NULL) {
const char *checkpoint_cfg[] = {
WT_CONFIG_BASE(session, WT_SESSION_checkpoint), ckpt_cfg, NULL};
@@ -2152,7 +2184,7 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char **cfg)
* eviction.
*/
int
-__wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative)
+__wt_txn_is_blocking(WT_SESSION_IMPL *session)
{
WT_TXN *txn;
WT_TXN_SHARED *txn_shared;
@@ -2166,10 +2198,6 @@ __wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative)
if (F_ISSET(txn, WT_TXN_PREPARE))
return (0);
- /* The checkpoint transaction shouldn't be blocking but if it is don't roll it back. */
- if (WT_SESSION_IS_CHECKPOINT(session))
- return (0);
-
/*
* MongoDB can't (yet) handle rolling back read only transactions. For this reason, don't check
* unless there's at least one update or we're configured to time out thread operations (a way
@@ -2179,22 +2207,6 @@ __wt_txn_is_blocking(WT_SESSION_IMPL *session, bool conservative)
return (0);
/*
- * Be less aggressive about aborting the oldest transaction in the case of trying to make
- * forced eviction successful. Specifically excuse it if:
- * * Hasn't done many updates
- * * Is in the middle of a commit or abort
- *
- * This threshold that we're comparing the number of updates to is related and must be greater
- * than the threshold we use in reconciliation's "need split" helper. If we're going to rollback
- * a transaction, we need to have considered splitting the page in the case that its updates are
- * on a single page.
- */
- if (conservative &&
- (txn->mod_count < (10 + WT_REC_SPLIT_MIN_ITEMS_USE_MEM) ||
- F_ISSET(session, WT_SESSION_RESOLVING_TXN)))
- return (0);
-
- /*
* Check if either the transaction's ID or its pinned ID is equal to the oldest transaction ID.
*/
return (txn_shared->id == global_oldest || txn_shared->pinned_id == global_oldest ?
@@ -2337,6 +2349,7 @@ __wt_verbose_dump_txn(WT_SESSION_IMPL *session)
*/
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
/* Skip sessions with no active transaction */
if ((id = s->id) == WT_TXN_NONE && s->pinned_id == WT_TXN_NONE)
continue;
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 5c6026cd176..dba739792a2 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -897,7 +897,13 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
*/
if (F_ISSET(hs_dhandle, WT_DHANDLE_OPEN)) {
time_start_hs = __wt_clock(session);
+ conn->txn_global.checkpoint_running_hs = true;
+ WT_STAT_CONN_SET(session, txn_checkpoint_running_hs, 1);
+
WT_WITH_DHANDLE(session, hs_dhandle, ret = __wt_checkpoint(session, cfg));
+
+ WT_STAT_CONN_SET(session, txn_checkpoint_running_hs, 0);
+ conn->txn_global.checkpoint_running_hs = false;
WT_ERR(ret);
/*
@@ -1870,8 +1876,9 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ASSERT(session, session->dhandle->checkpoint == NULL);
/* We must hold the metadata lock if checkpointing the metadata. */
- WT_ASSERT(
- session, !WT_IS_METADATA(session->dhandle) || F_ISSET(session, WT_SESSION_LOCKED_METADATA));
+ WT_ASSERT(session,
+ !WT_IS_METADATA(session->dhandle) ||
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_METADATA));
WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval));
force = cval.val != 0;
@@ -1942,6 +1949,16 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
return (__wt_set_return(session, EBUSY));
/*
+ * Make sure there isn't a potential race between backup copying the metadata and a checkpoint
+ * changing the metadata. Backup holds both the checkpoint and schema locks. Checkpoint should
+ * hold those also except on the final checkpoint during close. Confirm the caller either is the
+ * final checkpoint or holds at least one of the locks.
+ */
+ WT_ASSERT(session,
+ final ||
+ (FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_CHECKPOINT) ||
+ FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA)));
+ /*
* Turn on metadata tracking if:
* - The session is not already doing metadata tracking.
* - The file was not bulk loaded.
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index 2baf6131d97..a1cbbdc564a 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -558,6 +558,7 @@ __recovery_correct_write_gen(WT_SESSION_IMPL *session)
WT_DECL_RET;
char *config, *uri;
+ uri = NULL;
WT_RET(__wt_metadata_cursor(session, &cursor));
while ((ret = cursor->next(cursor)) == 0) {
WT_ERR(cursor->get_key(cursor, &uri));
@@ -573,6 +574,8 @@ __recovery_correct_write_gen(WT_SESSION_IMPL *session)
WT_ERR_NOTFOUND_OK(ret, false);
err:
+ if (ret != 0 && uri != NULL)
+ __wt_err(session, ret, "unable to correct write gen for %s", uri);
WT_TRET(__wt_metadata_cursor_release(session, &cursor));
return (ret);
}
@@ -586,6 +589,7 @@ static int
__recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
{
WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
WT_LSN lsn;
uint32_t fileid, lsnfile, lsnoffset;
@@ -606,7 +610,9 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
"metadata corruption: files %s and %s have the same file ID %u", uri,
r->files[fileid].uri, fileid);
WT_RET(__wt_strdup(r->session, uri, &r->files[fileid].uri));
- WT_RET(__wt_config_getones(r->session, config, "checkpoint_lsn", &cval));
+ if ((ret = __wt_config_getones(r->session, config, "checkpoint_lsn", &cval)) != 0)
+ WT_RET_MSG(
+ r->session, ret, "Failed recovery setup for %s: cannot parse config '%s'", uri, config);
/* If there is no checkpoint logged for the file, apply everything. */
if (cval.type != WT_CONFIG_ITEM_STRUCT)
WT_INIT_LSN(&lsn);
@@ -614,8 +620,9 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
else if (sscanf(cval.str, "(%" SCNu32 ",%" SCNu32 ")", &lsnfile, &lsnoffset) == 2)
WT_SET_LSN(&lsn, lsnfile, lsnoffset);
else
- WT_RET_MSG(
- r->session, EINVAL, "Failed to parse checkpoint LSN '%.*s'", (int)cval.len, cval.str);
+ WT_RET_MSG(r->session, EINVAL,
+ "Failed recovery setup for %s: cannot parse checkpoint LSN '%.*s'", uri, (int)cval.len,
+ cval.str);
WT_ASSIGN_LSN(&r->files[fileid].ckpt_lsn, &lsn);
__wt_verbose(r->session, WT_VERB_RECOVERY,
@@ -627,7 +634,9 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
WT_ASSIGN_LSN(&r->max_ckpt_lsn, &lsn);
/* Update the base write gen based on this file's configuration. */
- return (__wt_metadata_update_base_write_gen(r->session, config));
+ if ((ret = __wt_metadata_update_base_write_gen(r->session, config)) != 0)
+ WT_RET_MSG(r->session, ret, "Failed recovery setup for %s: cannot update write gen", uri);
+ return (0);
}
/*
@@ -786,7 +795,8 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[])
was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP);
/* We need a real session for recovery. */
- WT_RET(__wt_open_internal_session(conn, "txn-recover", false, WT_SESSION_NO_LOGGING, &session));
+ WT_RET(
+ __wt_open_internal_session(conn, "txn-recover", false, WT_SESSION_NO_LOGGING, 0, &session));
r.session = session;
WT_MAX_LSN(&r.max_ckpt_lsn);
WT_MAX_LSN(&r.max_rec_lsn);
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 4f4edaec110..de2ff910072 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -377,10 +377,14 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page
* stop timestamp if the original update's commit timestamp is out of order. We may see
* records newer than or equal to the onpage value if eviction runs concurrently with
* checkpoint. In that case, don't verify the first record.
+ *
+ * If we have fixed the out-of-order timestamps, then the newer update reinserted with an
+ * older timestamp may have a durable timestamp that is smaller than the current stop
+ * durable timestamp.
*/
WT_ASSERT(session,
hs_stop_durable_ts <= newer_hs_durable_ts || hs_start_ts == hs_stop_durable_ts ||
- first_record);
+ hs_start_ts == newer_hs_durable_ts || first_record);
if (hs_stop_durable_ts < newer_hs_durable_ts)
WT_STAT_CONN_DATA_INCR(session, txn_rts_hs_stop_older_than_newer_start);
@@ -419,7 +423,7 @@ __rollback_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE *page
__wt_timestamp_to_string(hs_durable_ts, ts_string[1]),
__wt_timestamp_to_string(hs_stop_durable_ts, ts_string[2]),
__wt_timestamp_to_string(rollback_timestamp, ts_string[3]), hs_tw->start_txn, type);
- WT_ASSERT(session, hs_tw->start_ts < unpack->tw.start_ts);
+ WT_ASSERT(session, hs_tw->start_ts <= unpack->tw.start_ts);
valid_update_found = true;
break;
}
@@ -1408,7 +1412,7 @@ __rollback_to_stable_btree_apply(WT_SESSION_IMPL *session)
__wt_timestamp_to_string(rollback_timestamp, ts_string[0]),
__wt_timestamp_to_string(txn_global->oldest_timestamp, ts_string[1]));
- WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ WT_ASSERT(session, FLD_ISSET(session->lock_flags, WT_SESSION_LOCKED_SCHEMA));
WT_RET(__wt_metadata_cursor(session, &cursor));
if (F_ISSET(S2C(session), WT_CONN_RECOVERING))
@@ -1586,39 +1590,58 @@ err:
* Rollback all modifications with timestamps more recent than the passed in timestamp.
*/
static int
-__rollback_to_stable(WT_SESSION_IMPL *session)
+__rollback_to_stable(WT_SESSION_IMPL *session, bool no_ckpt)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_TXN_GLOBAL *txn_global;
conn = S2C(session);
+ txn_global = &conn->txn_global;
+
+ /*
+ * Rollback to stable should ignore tombstones in the history store since it needs to scan the
+ * entire table sequentially.
+ */
+ F_SET(session, WT_SESSION_ROLLBACK_TO_STABLE);
- WT_RET(__rollback_to_stable_check(session));
+ WT_ERR(__rollback_to_stable_check(session));
/*
* Allocate a non-durable btree bitstring. We increment the global value before using it, so the
* current value is already in use, and hence we need to add one here.
*/
conn->stable_rollback_maxfile = conn->next_file_id + 1;
- WT_WITH_SCHEMA_LOCK(session, ret = __rollback_to_stable_btree_apply(session));
+ WT_ERR(__rollback_to_stable_btree_apply(session));
+
+ /* Rollback the global durable timestamp to the stable timestamp. */
+ txn_global->has_durable_timestamp = txn_global->has_stable_timestamp;
+ txn_global->durable_timestamp = txn_global->stable_timestamp;
+
+ /*
+ * If the configuration is not in-memory, forcibly log a checkpoint after rollback to stable to
+ * ensure that both in-memory and on-disk versions are the same unless caller requested for no
+ * checkpoint.
+ */
+ if (!F_ISSET(conn, WT_CONN_IN_MEMORY) && !no_ckpt)
+ WT_ERR(session->iface.checkpoint(&session->iface, "force=1"));
+err:
+ F_CLR(session, WT_SESSION_ROLLBACK_TO_STABLE);
return (ret);
}
/*
* __wt_rollback_to_stable --
- * Rollback all modifications with timestamps more recent than the passed in timestamp.
+ * Rollback the database to the stable timestamp.
*/
int
__wt_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[], bool no_ckpt)
{
WT_DECL_RET;
- WT_TXN_GLOBAL *txn_global;
WT_UNUSED(cfg);
- txn_global = &S2C(session)->txn_global;
-
/*
* Don't use the connection's default session: we are working on data handles and (a) don't want
* to cache all of them forever, plus (b) can't guarantee that no other method will be called
@@ -1626,30 +1649,13 @@ __wt_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[], bool no_ckp
* rollback to stable doesn't generate log records.
*/
WT_RET(__wt_open_internal_session(S2C(session), "txn rollback_to_stable", true,
- F_MASK(session, WT_SESSION_NO_LOGGING), &session));
+ F_MASK(session, WT_SESSION_NO_LOGGING), 0, &session));
- /*
- * Rollback to stable should ignore tombstones in the history store since it needs to scan the
- * entire table sequentially.
- */
WT_STAT_CONN_SET(session, txn_rollback_to_stable_running, 1);
- F_SET(session, WT_SESSION_ROLLBACK_TO_STABLE);
- ret = __rollback_to_stable(session);
- F_CLR(session, WT_SESSION_ROLLBACK_TO_STABLE);
+ WT_WITH_CHECKPOINT_LOCK(
+ session, WT_WITH_SCHEMA_LOCK(session, ret = __rollback_to_stable(session, no_ckpt)));
WT_STAT_CONN_SET(session, txn_rollback_to_stable_running, 0);
- WT_RET(ret);
- /* Rollback the global durable timestamp to the stable timestamp. */
- txn_global->has_durable_timestamp = txn_global->has_stable_timestamp;
- txn_global->durable_timestamp = txn_global->stable_timestamp;
-
- /*
- * If the configuration is not in-memory, forcibly log a checkpoint after rollback to stable to
- * ensure that both in-memory and on-disk versions are the same unless caller requested for no
- * checkpoint.
- */
- if (!F_ISSET(S2C(session), WT_CONN_IN_MEMORY) && !no_ckpt)
- WT_TRET(session->iface.checkpoint(&session->iface, "force=1"));
WT_TRET(__wt_session_close_internal(session));
return (ret);
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 6b046373187..6acd265fd2d 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -113,6 +113,7 @@ __wt_txn_get_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *tsp, uin
WT_ORDERED_READ(session_cnt, conn->session_cnt);
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
__txn_get_read_timestamp(s, &tmp_read_ts);
/*
* A zero timestamp is possible here only when the oldest timestamp is not accounted for.
@@ -172,6 +173,7 @@ __txn_global_query_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *tsp, cons
WT_ORDERED_READ(session_cnt, conn->session_cnt);
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
__txn_get_durable_timestamp(s, &tmpts);
if (tmpts != WT_TS_NONE && --tmpts < ts)
ts = tmpts;
@@ -504,6 +506,7 @@ __txn_assert_after_reads(WT_SESSION_IMPL *session, const char *op, wt_timestamp_
WT_ORDERED_READ(session_cnt, S2C(session)->session_cnt);
WT_STAT_CONN_INCR(session, txn_walk_sessions);
for (i = 0, s = txn_global->txn_shared_list; i < session_cnt; i++, s++) {
+ WT_STAT_CONN_INCR(session, txn_sessions_walked);
__txn_get_read_timestamp(s, &tmp_timestamp);
if (tmp_timestamp != WT_TS_NONE && tmp_timestamp >= ts) {
__wt_readunlock(session, &txn_global->rwlock);